]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_bio.c
040678d37c28a4e394eedf97419de6e6c442e249
[apple/xnu.git] / bsd / nfs / nfs_bio.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1989, 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * This code is derived from software contributed to Berkeley by
28 * Rick Macklem at The University of Guelph.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
59 * FreeBSD-Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $
60 */
61
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/resourcevar.h>
65 #include <sys/signalvar.h>
66 #include <sys/proc.h>
67 #include <sys/buf.h>
68 #include <sys/vnode.h>
69 #include <sys/mount.h>
70 #include <sys/kernel.h>
71 #include <sys/sysctl.h>
72 #include <sys/ubc.h>
73
74 #include <sys/vm.h>
75 #include <sys/vmparam.h>
76
77 #include <sys/time.h>
78 #include <kern/clock.h>
79
80 #include <nfs/rpcv2.h>
81 #include <nfs/nfsproto.h>
82 #include <nfs/nfs.h>
83 #include <nfs/nfsmount.h>
84 #include <nfs/nqnfs.h>
85 #include <nfs/nfsnode.h>
86
87 #include <sys/kdebug.h>
88
89 static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
90 struct proc *p, int operation));
91 static struct buf *nfs_getwriteblk __P((struct vnode *vp, daddr_t bn,
92 int size, struct proc *p,
93 struct ucred *cred, int off, int len));
94
95 extern int nfs_numasync;
96 extern struct nfsstats nfsstats;
97
98 /*
99 * Vnode op for read using bio
100 * Any similarity to readip() is purely coincidental
101 */
102 int
103 nfs_bioread(vp, uio, ioflag, cred, getpages)
104 register struct vnode *vp;
105 register struct uio *uio;
106 int ioflag;
107 struct ucred *cred;
108 int getpages;
109 {
110 register struct nfsnode *np = VTONFS(vp);
111 register int biosize, diff, i;
112 struct buf *bp = 0, *rabp;
113 struct vattr vattr;
114 struct proc *p;
115 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
116 daddr_t lbn, rabn;
117 int bufsize;
118 int nra, error = 0, n = 0, on = 0, not_readin;
119 int operation = (getpages? BLK_PAGEIN : BLK_READ);
120
121 #if DIAGNOSTIC
122 if (uio->uio_rw != UIO_READ)
123 panic("nfs_read mode");
124 #endif
125 if (uio->uio_resid == 0)
126 return (0);
127 if (uio->uio_offset < 0)
128 return (EINVAL);
129 p = uio->uio_procp;
130 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
131 (void)nfs_fsinfo(nmp, vp, cred, p);
132 /*due to getblk/vm interractions, use vm page size or less values */
133 biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
134 /*
135 * For nfs, cache consistency can only be maintained approximately.
136 * Although RFC1094 does not specify the criteria, the following is
137 * believed to be compatible with the reference port.
138 * For nqnfs, full cache consistency is maintained within the loop.
139 * For nfs:
140 * If the file's modify time on the server has changed since the
141 * last read rpc or you have written to the file,
142 * you may have lost data cache consistency with the
143 * server, so flush all of the file's data out of the cache.
144 * Then force a getattr rpc to ensure that you have up to date
145 * attributes.
146 * NB: This implies that cache data can be read when up to
147 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
148 * attributes this could be forced by setting n_attrstamp to 0 before
149 * the VOP_GETATTR() call.
150 */
151 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
152 if (np->n_flag & NMODIFIED) {
153 if (vp->v_type != VREG) {
154 if (vp->v_type != VDIR)
155 panic("nfs: bioread, not dir");
156 nfs_invaldir(vp);
157 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
158 if (error)
159 return (error);
160 }
161 np->n_attrstamp = 0;
162 error = VOP_GETATTR(vp, &vattr, cred, p);
163 if (error)
164 return (error);
165 np->n_mtime = vattr.va_mtime.tv_sec;
166 } else {
167 error = VOP_GETATTR(vp, &vattr, cred, p);
168 if (error)
169 return (error);
170 if (np->n_mtime != vattr.va_mtime.tv_sec) {
171 if (vp->v_type == VDIR)
172 nfs_invaldir(vp);
173 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
174 if (error)
175 return (error);
176 np->n_mtime = vattr.va_mtime.tv_sec;
177 }
178 }
179 }
180 do {
181
182 /*
183 * Get a valid lease. If cached data is stale, flush it.
184 */
185 if (nmp->nm_flag & NFSMNT_NQNFS) {
186 if (NQNFS_CKINVALID(vp, np, ND_READ)) {
187 do {
188 error = nqnfs_getlease(vp, ND_READ, cred, p);
189 } while (error == NQNFS_EXPIRED);
190 if (error)
191 return (error);
192 if (np->n_lrev != np->n_brev ||
193 (np->n_flag & NQNFSNONCACHE) ||
194 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
195 if (vp->v_type == VDIR)
196 nfs_invaldir(vp);
197 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
198 if (error)
199 return (error);
200 np->n_brev = np->n_lrev;
201 }
202 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
203 nfs_invaldir(vp);
204 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
205 if (error)
206 return (error);
207 }
208 }
209 if (np->n_flag & NQNFSNONCACHE) {
210 switch (vp->v_type) {
211 case VREG:
212 return (nfs_readrpc(vp, uio, cred));
213 case VLNK:
214 return (nfs_readlinkrpc(vp, uio, cred));
215 case VDIR:
216 break;
217 default:
218 printf(" NQNFSNONCACHE: type %x unexpected\n",
219 vp->v_type);
220 };
221 }
222 switch (vp->v_type) {
223 case VREG:
224 nfsstats.biocache_reads++;
225 lbn = uio->uio_offset / biosize;
226 on = uio->uio_offset & (biosize - 1);
227 not_readin = 1;
228
229 /*
230 * Start the read ahead(s), as required.
231 */
232 if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
233 for (nra = 0; nra < nmp->nm_readahead &&
234 (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
235 rabn = lbn + 1 + nra;
236 if (!incore(vp, rabn)) {
237 rabp = nfs_getcacheblk(vp, rabn, biosize, p, operation);
238 if (!rabp)
239 return (EINTR);
240 if (!ISSET(rabp->b_flags, (B_CACHE|B_DELWRI))) {
241 SET(rabp->b_flags, (B_READ | B_ASYNC));
242 if (nfs_asyncio(rabp, cred)) {
243 SET(rabp->b_flags, (B_INVAL|B_ERROR));
244 rabp->b_error = EIO;
245 brelse(rabp);
246 }
247 } else
248 brelse(rabp);
249 }
250 }
251 }
252
253 /*
254 * If the block is in the cache and has the required data
255 * in a valid region, just copy it out.
256 * Otherwise, get the block and write back/read in,
257 * as required.
258 */
259 again:
260 bufsize = biosize;
261 if ((off_t)(lbn + 1) * biosize > np->n_size &&
262 (off_t)(lbn + 1) * biosize - np->n_size < biosize) {
263 bufsize = np->n_size - lbn * biosize;
264 bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
265 }
266 bp = nfs_getcacheblk(vp, lbn, bufsize, p, operation);
267 if (!bp)
268 return (EINTR);
269
270 if (!ISSET(bp->b_flags, B_CACHE)) {
271 SET(bp->b_flags, B_READ);
272 CLR(bp->b_flags, (B_DONE | B_ERROR | B_INVAL));
273 not_readin = 0;
274 error = nfs_doio(bp, cred, p);
275 if (error) {
276 brelse(bp);
277 return (error);
278 }
279 }
280 if (bufsize > on) {
281 n = min((unsigned)(bufsize - on), uio->uio_resid);
282 } else {
283 n = 0;
284 }
285 diff = np->n_size - uio->uio_offset;
286 if (diff < n)
287 n = diff;
288 if (not_readin && n > 0) {
289 if (on < bp->b_validoff || (on + n) > bp->b_validend) {
290 SET(bp->b_flags, (B_NOCACHE|B_INVAFTERWRITE));
291 if (bp->b_dirtyend > 0) {
292 if (!ISSET(bp->b_flags, B_DELWRI))
293 panic("nfsbioread");
294 if (VOP_BWRITE(bp) == EINTR)
295 return (EINTR);
296 } else
297 brelse(bp);
298 goto again;
299 }
300 }
301 vp->v_lastr = lbn;
302 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
303 if (diff < n)
304 n = diff;
305 break;
306 case VLNK:
307 nfsstats.biocache_readlinks++;
308 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p, operation);
309 if (!bp)
310 return (EINTR);
311 if (!ISSET(bp->b_flags, B_CACHE)) {
312 SET(bp->b_flags, B_READ);
313 error = nfs_doio(bp, cred, p);
314 if (error) {
315 SET(bp->b_flags, B_ERROR);
316 brelse(bp);
317 return (error);
318 }
319 }
320 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
321 on = 0;
322 break;
323 case VDIR:
324 nfsstats.biocache_readdirs++;
325 if (np->n_direofoffset
326 && uio->uio_offset >= np->n_direofoffset) {
327 return (0);
328 }
329 lbn = uio->uio_offset / NFS_DIRBLKSIZ;
330 on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
331 bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p, operation);
332 if (!bp)
333 return (EINTR);
334 if (!ISSET(bp->b_flags, B_CACHE)) {
335 SET(bp->b_flags, B_READ);
336 error = nfs_doio(bp, cred, p);
337 if (error) {
338 brelse(bp);
339 while (error == NFSERR_BAD_COOKIE) {
340 nfs_invaldir(vp);
341 error = nfs_vinvalbuf(vp, 0, cred, p, 1);
342 /*
343 * Yuck! The directory has been modified on the
344 * server. The only way to get the block is by
345 * reading from the beginning to get all the
346 * offset cookies.
347 */
348 for (i = 0; i <= lbn && !error; i++) {
349 if (np->n_direofoffset
350 && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
351 return (0);
352 bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p, operation);
353 if (!bp)
354 return (EINTR);
355 if (!ISSET(bp->b_flags, B_DONE)) {
356 SET(bp->b_flags, B_READ);
357 error = nfs_doio(bp, cred, p);
358 if (error) {
359 brelse(bp);
360 } else if (i < lbn)
361 brelse(bp);
362 }
363 }
364 }
365 if (error)
366 return (error);
367 }
368 }
369
370 /*
371 * If not eof and read aheads are enabled, start one.
372 * (You need the current block first, so that you have the
373 * directory offset cookie of the next block.)
374 */
375 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
376 (np->n_direofoffset == 0 ||
377 (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
378 !(np->n_flag & NQNFSNONCACHE) &&
379 !incore(vp, lbn + 1)) {
380 rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p, operation);
381 if (rabp) {
382 if (!ISSET(rabp->b_flags, (B_CACHE|B_DELWRI))) {
383 SET(rabp->b_flags, (B_READ | B_ASYNC));
384 if (nfs_asyncio(rabp, cred)) {
385 SET(rabp->b_flags, (B_INVAL|B_ERROR));
386 rabp->b_error = EIO;
387 brelse(rabp);
388 }
389 } else {
390 brelse(rabp);
391 }
392 }
393 }
394 /*
395 * Make sure we use a signed variant of min() since
396 * the second term may be negative.
397 */
398 n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
399 break;
400 default:
401 printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
402 break;
403 };
404
405 if (n > 0) {
406 error = uiomove(bp->b_data + on, (int)n, uio);
407 }
408 switch (vp->v_type) {
409 case VREG:
410 break;
411 case VLNK:
412 n = 0;
413 break;
414 case VDIR:
415 if (np->n_flag & NQNFSNONCACHE)
416 SET(bp->b_flags, B_INVAL);
417 break;
418 default:
419 printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
420 }
421 brelse(bp);
422 } while (error == 0 && uio->uio_resid > 0 && n > 0);
423 return (error);
424 }
425
426 /*
427 * Vnode op for write using bio
428 */
429 int
430 nfs_write(ap)
431 struct vop_write_args /* {
432 struct vnode *a_vp;
433 struct uio *a_uio;
434 int a_ioflag;
435 struct ucred *a_cred;
436 } */ *ap;
437 {
438 register int biosize;
439 register struct uio *uio = ap->a_uio;
440 struct proc *p = uio->uio_procp;
441 register struct vnode *vp = ap->a_vp;
442 struct nfsnode *np = VTONFS(vp);
443 register struct ucred *cred = ap->a_cred;
444 int ioflag = ap->a_ioflag;
445 struct buf *bp;
446 struct vattr vattr;
447 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
448 daddr_t lbn;
449 int bufsize;
450 int n, on, error = 0, iomode, must_commit;
451
452 #if DIAGNOSTIC
453 if (uio->uio_rw != UIO_WRITE)
454 panic("nfs_write mode");
455 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != current_proc())
456 panic("nfs_write proc");
457 #endif
458 if (vp->v_type != VREG)
459 return (EIO);
460 if (np->n_flag & NWRITEERR) {
461 np->n_flag &= ~NWRITEERR;
462 return (np->n_error);
463 }
464 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
465 (void)nfs_fsinfo(nmp, vp, cred, p);
466 if (ioflag & (IO_APPEND | IO_SYNC)) {
467 if (np->n_flag & NMODIFIED) {
468 np->n_attrstamp = 0;
469 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
470 if (error)
471 return (error);
472 }
473 if (ioflag & IO_APPEND) {
474 np->n_attrstamp = 0;
475 error = VOP_GETATTR(vp, &vattr, cred, p);
476 if (error)
477 return (error);
478 uio->uio_offset = np->n_size;
479 }
480 }
481 if (uio->uio_offset < 0)
482 return (EINVAL);
483 if (uio->uio_resid == 0)
484 return (0);
485 /*
486 * Maybe this should be above the vnode op call, but so long as
487 * file servers have no limits, i don't think it matters
488 */
489 if (p && uio->uio_offset + uio->uio_resid >
490 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
491 psignal(p, SIGXFSZ);
492 return (EFBIG);
493 }
494 /*
495 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
496 * will be the same size within a filesystem. nfs_writerpc will
497 * still use nm_wsize when sizing the rpc's.
498 */
499 /*due to getblk/vm interractions, use vm page size or less values */
500 biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
501
502 do {
503 /*
504 * Check for a valid write lease.
505 */
506 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
507 NQNFS_CKINVALID(vp, np, ND_WRITE)) {
508 do {
509 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
510 } while (error == NQNFS_EXPIRED);
511 if (error)
512 return (error);
513 if (np->n_lrev != np->n_brev ||
514 (np->n_flag & NQNFSNONCACHE)) {
515 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
516 if (error)
517 return (error);
518 np->n_brev = np->n_lrev;
519 }
520 }
521 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
522 iomode = NFSV3WRITE_FILESYNC;
523 error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
524 if (must_commit)
525 nfs_clearcommit(vp->v_mount);
526 return (error);
527 }
528 nfsstats.biocache_writes++;
529 lbn = uio->uio_offset / biosize;
530 on = uio->uio_offset & (biosize-1);
531 n = min((unsigned)(biosize - on), uio->uio_resid);
532 again:
533 if (uio->uio_offset + n > np->n_size) {
534 np->n_size = uio->uio_offset + n;
535 np->n_flag |= NMODIFIED;
536 if (UBCISVALID(vp))
537 ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
538 }
539 bufsize = biosize;
540 #if 0
541 /* (removed for UBC) */
542 if ((lbn + 1) * biosize > np->n_size) {
543 bufsize = np->n_size - lbn * biosize;
544 bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
545 }
546 #endif
547 bp = nfs_getwriteblk(vp, lbn, bufsize, p, cred, on, n);
548 if (!bp)
549 return (EINTR);
550 if (ISSET(bp->b_flags, B_ERROR)) {
551 error = bp->b_error;
552 brelse(bp);
553 return (error);
554 }
555 if (bp->b_wcred == NOCRED) {
556 /*
557 * NFS has embedded ucred.
558 * Can not crhold() here as that causes zone corruption
559 */
560 bp->b_wcred = crdup(cred);
561 }
562 np->n_flag |= NMODIFIED;
563
564 /*
565 * Check for valid write lease and get one as required.
566 * In case getblk() and/or bwrite() delayed us.
567 */
568 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
569 NQNFS_CKINVALID(vp, np, ND_WRITE)) {
570 do {
571 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
572 } while (error == NQNFS_EXPIRED);
573 if (error) {
574 brelse(bp);
575 return (error);
576 }
577 if (np->n_lrev != np->n_brev ||
578 (np->n_flag & NQNFSNONCACHE)) {
579 brelse(bp);
580 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
581 if (error)
582 return (error);
583 np->n_brev = np->n_lrev;
584 goto again;
585 }
586 }
587 error = uiomove((char *)bp->b_data + on, n, uio);
588 if (error) {
589 SET(bp->b_flags, B_ERROR);
590 brelse(bp);
591 return (error);
592 }
593 if (bp->b_dirtyend > 0) {
594 bp->b_dirtyoff = min(on, bp->b_dirtyoff);
595 bp->b_dirtyend = max((on + n), bp->b_dirtyend);
596 } else {
597 bp->b_dirtyoff = on;
598 bp->b_dirtyend = on + n;
599 }
600 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
601 bp->b_validoff > bp->b_dirtyend) {
602 bp->b_validoff = bp->b_dirtyoff;
603 bp->b_validend = bp->b_dirtyend;
604 } else {
605 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
606 bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
607 }
608
609 /*
610 * Since this block is being modified, it must be written
611 * again and not just committed.
612 */
613 CLR(bp->b_flags, B_NEEDCOMMIT);
614
615 /*
616 * If the lease is non-cachable or IO_SYNC do bwrite().
617 */
618 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
619 bp->b_proc = p;
620 error = VOP_BWRITE(bp);
621 if (error)
622 return (error);
623 if (np->n_flag & NQNFSNONCACHE) {
624 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
625 if (error)
626 return (error);
627 }
628 } else if ((n + on) == biosize &&
629 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
630 bp->b_proc = (struct proc *)0;
631 SET(bp->b_flags, B_ASYNC);
632 (void)nfs_writebp(bp, 0);
633 } else
634 bdwrite(bp);
635 } while (uio->uio_resid > 0 && n > 0);
636 return (0);
637 }
638
639 /*
640 * Get a cache block for writing. The range to be written is
641 * (off..off+len) within the block. This routine ensures that the
642 * block is either has no dirty region or that the given range is
643 * contiguous with the existing dirty region.
644 */
645 static struct buf *
646 nfs_getwriteblk(vp, bn, size, p, cred, off, len)
647 struct vnode *vp;
648 daddr_t bn;
649 int size;
650 struct proc *p;
651 struct ucred *cred;
652 int off, len;
653 {
654 struct nfsnode *np = VTONFS(vp);
655 struct buf *bp;
656 int error;
657 struct iovec iov;
658 struct uio uio;
659 off_t boff;
660
661 again:
662 bp = nfs_getcacheblk(vp, bn, size, p, BLK_WRITE);
663 if (!bp)
664 return (NULL);
665 if (bp->b_wcred == NOCRED) {
666 /*
667 * NFS has embedded ucred.
668 * Can not crhold() here as that causes zone corruption
669 */
670 bp->b_wcred = crdup(cred);
671 }
672
673 if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) {
674 bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
675 }
676
677 /*
678 * UBC doesn't (yet) handle partial pages so nfs_biowrite was
679 * hacked to never bdwrite, to start every little write right away.
680 * Running IE Avie noticed the performance problem, thus this code,
681 * which permits those delayed writes by ensuring an initial read
682 * of the entire page. The read may hit eof ("short read") but
683 * that we will handle.
684 *
685 * We are quite dependant on the correctness of B_CACHE so check
686 * that first in case of problems.
687 */
688 if (!ISSET(bp->b_flags, B_CACHE) && len < PAGE_SIZE) {
689 struct nfsnode *np = VTONFS(vp);
690
691 boff = (off_t)bp->b_blkno * DEV_BSIZE;
692 uio.uio_iov = &iov;
693 uio.uio_iovcnt = 1;
694 uio.uio_offset = boff;
695 uio.uio_resid = PAGE_SIZE;
696 uio.uio_segflg = UIO_SYSSPACE;
697 uio.uio_rw = UIO_READ;
698 uio.uio_procp = p;
699 iov.iov_base = bp->b_data;
700 iov.iov_len = PAGE_SIZE;
701 error = nfs_readrpc(vp, &uio, cred);
702 if (error) {
703 bp->b_error = error;
704 SET(bp->b_flags, B_ERROR);
705 printf("nfs_getwriteblk: readrpc returned %d", error);
706 }
707 if (uio.uio_resid > 0)
708 bzero(iov.iov_base, uio.uio_resid);
709 bp->b_validoff = 0;
710 bp->b_validend = PAGE_SIZE - uio.uio_resid;
711 if (np->n_size > boff + bp->b_validend)
712 bp->b_validend = min(np->n_size - boff, PAGE_SIZE);
713 bp->b_dirtyoff = 0;
714 bp->b_dirtyend = 0;
715 }
716
717 /*
718 * If the new write will leave a contiguous dirty
719 * area, just update the b_dirtyoff and b_dirtyend,
720 * otherwise try to extend the dirty region.
721 */
722 if (bp->b_dirtyend > 0 &&
723 (off > bp->b_dirtyend || (off + len) < bp->b_dirtyoff)) {
724 off_t start, end;
725
726 boff = (off_t)bp->b_blkno * DEV_BSIZE;
727 if (off > bp->b_dirtyend) {
728 start = boff + bp->b_validend;
729 end = boff + off;
730 } else {
731 start = boff + off + len;
732 end = boff + bp->b_validoff;
733 }
734
735 /*
736 * It may be that the valid region in the buffer
737 * covers the region we want, in which case just
738 * extend the dirty region. Otherwise we try to
739 * extend the valid region.
740 */
741 if (end > start) {
742 uio.uio_iov = &iov;
743 uio.uio_iovcnt = 1;
744 uio.uio_offset = start;
745 uio.uio_resid = end - start;
746 uio.uio_segflg = UIO_SYSSPACE;
747 uio.uio_rw = UIO_READ;
748 uio.uio_procp = p;
749 iov.iov_base = bp->b_data + (start - boff);
750 iov.iov_len = end - start;
751 error = nfs_readrpc(vp, &uio, cred);
752 if (error) {
753 /*
754 * If we couldn't read, do not do a VOP_BWRITE
755 * as originally coded. That, could also error
756 * and looping back to "again" as it was doing
757 * could have us stuck trying to write same buffer
758 * again. nfs_write, will get the entire region
759 * if nfs_readrpc was successful. If not successful
760 * we should just error out. Errors like ESTALE
761 * would keep us in this loop rather than transient
762 * errors justifying a retry. We can return from here
763 * instead of altering dirty region later in routine.
764 * We did not write out old dirty region at this point.
765 */
766 bp->b_error = error;
767 SET(bp->b_flags, B_ERROR);
768 printf("nfs_getwriteblk: readrpc (2) returned %d", error);
769 return bp;
770 } else {
771 /*
772 * The read worked.
773 */
774 if (uio.uio_resid > 0) {
775 /*
776 * If there was a short read,
777 * just zero fill.
778 */
779 bzero(iov.iov_base,
780 uio.uio_resid);
781 }
782 if (off > bp->b_dirtyend)
783 bp->b_validend = off;
784 else
785 bp->b_validoff = off + len;
786 }
787 }
788
789 /*
790 * We now have a valid region which extends up to the
791 * dirty region which we want.
792 */
793 if (off > bp->b_dirtyend)
794 bp->b_dirtyend = off;
795 else
796 bp->b_dirtyoff = off + len;
797 }
798
799 return bp;
800 }
801
802 /*
803 * Get an nfs cache block.
804 * Allocate a new one if the block isn't currently in the cache
805 * and return the block marked busy. If the calling process is
806 * interrupted by a signal for an interruptible mount point, return
807 * NULL.
808 */
809 static struct buf *
810 nfs_getcacheblk(vp, bn, size, p, operation)
811 struct vnode *vp;
812 daddr_t bn;
813 int size;
814 struct proc *p;
815 int operation; /* defined in sys/buf.h */
816 {
817 register struct buf *bp;
818 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
819 /*due to getblk/vm interractions, use vm page size or less values */
820 int biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
821
822 if (nmp->nm_flag & NFSMNT_INT) {
823 bp = getblk(vp, bn, size, PCATCH, 0, operation);
824 while (bp == (struct buf *)0) {
825 if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
826 return ((struct buf *)0);
827 bp = getblk(vp, bn, size, 0, 2 * hz, operation);
828 }
829 } else
830 bp = getblk(vp, bn, size, 0, 0, operation);
831
832 if( vp->v_type == VREG)
833 bp->b_blkno = (bn * biosize) / DEV_BSIZE;
834
835 return (bp);
836 }
837
838 /*
839 * Flush and invalidate all dirty buffers. If another process is already
840 * doing the flush, just wait for completion.
841 */
842 int
843 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
844 struct vnode *vp;
845 int flags;
846 struct ucred *cred;
847 struct proc *p;
848 int intrflg;
849 {
850 register struct nfsnode *np = VTONFS(vp);
851 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
852 int error = 0, slpflag, slptimeo;
853 int didhold = 0;
854
855 if ((nmp->nm_flag & NFSMNT_INT) == 0)
856 intrflg = 0;
857 if (intrflg) {
858 slpflag = PCATCH;
859 slptimeo = 2 * hz;
860 } else {
861 slpflag = 0;
862 slptimeo = 0;
863 }
864 /*
865 * First wait for any other process doing a flush to complete.
866 */
867 while (np->n_flag & NFLUSHINPROG) {
868 np->n_flag |= NFLUSHWANT;
869 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
870 slptimeo);
871 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
872 return (EINTR);
873 }
874
875 /*
876 * Now, flush as required.
877 */
878 np->n_flag |= NFLUSHINPROG;
879 error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
880 while (error) {
881 /* we seem to be stuck in a loop here if the thread got aborted.
882 * nfs_flush will return EINTR. Not sure if that will cause
883 * other consequences due to EINTR having other meanings in NFS
884 * To handle, no dirty pages, it seems safe to just return from
885 * here. But if we did have dirty pages, how would we get them
886 * written out if thread was aborted? Some other strategy is
887 * necessary. -- EKN
888 */
889 if ((intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) ||
890 ((error == EINTR) && current_thread_aborted())) {
891 np->n_flag &= ~NFLUSHINPROG;
892 if (np->n_flag & NFLUSHWANT) {
893 np->n_flag &= ~NFLUSHWANT;
894 wakeup((caddr_t)&np->n_flag);
895 }
896 return (EINTR);
897 }
898 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
899 }
900 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
901 if (np->n_flag & NFLUSHWANT) {
902 np->n_flag &= ~NFLUSHWANT;
903 wakeup((caddr_t)&np->n_flag);
904 }
905 didhold = ubc_hold(vp);
906 if (didhold) {
907 (void) ubc_clean(vp, 1); /* get the pages out of vm also */
908 ubc_rele(vp);
909 }
910 return (0);
911 }
912
913 /*
914 * Initiate asynchronous I/O. Return an error if no nfsiods are available.
915 * This is mainly to avoid queueing async I/O requests when the nfsiods
916 * are all hung on a dead server.
917 */
918 int
919 nfs_asyncio(bp, cred)
920 register struct buf *bp;
921 struct ucred *cred;
922 {
923 struct nfsmount *nmp;
924 int i;
925 int gotiod;
926 int slpflag = 0;
927 int slptimeo = 0;
928 int error;
929
930 if (nfs_numasync == 0)
931 return (EIO);
932
933 nmp = VFSTONFS(bp->b_vp->v_mount);
934 again:
935 if (nmp->nm_flag & NFSMNT_INT)
936 slpflag = PCATCH;
937 gotiod = FALSE;
938
939 /*
940 * Find a free iod to process this request.
941 */
942 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
943 if (nfs_iodwant[i]) {
944 /*
945 * Found one, so wake it up and tell it which
946 * mount to process.
947 */
948 NFS_DPF(ASYNCIO,
949 ("nfs_asyncio: waking iod %d for mount %p\n",
950 i, nmp));
951 nfs_iodwant[i] = (struct proc *)0;
952 nfs_iodmount[i] = nmp;
953 nmp->nm_bufqiods++;
954 wakeup((caddr_t)&nfs_iodwant[i]);
955 gotiod = TRUE;
956 break;
957 }
958
959 /*
960 * If none are free, we may already have an iod working on this mount
961 * point. If so, it will process our request.
962 */
963 if (!gotiod) {
964 if (nmp->nm_bufqiods > 0) {
965 NFS_DPF(ASYNCIO,
966 ("nfs_asyncio: %d iods are already processing mount %p\n",
967 nmp->nm_bufqiods, nmp));
968 gotiod = TRUE;
969 }
970 }
971
972 /*
973 * If we have an iod which can process the request, then queue
974 * the buffer.
975 */
976 if (gotiod) {
977 /*
978 * Ensure that the queue never grows too large.
979 */
980 while (nmp->nm_bufqlen >= 2*nfs_numasync) {
981 NFS_DPF(ASYNCIO,
982 ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
983 nmp->nm_bufqwant = TRUE;
984 error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
985 "nfsaio", slptimeo);
986 if (error) {
987 if (nfs_sigintr(nmp, NULL, bp->b_proc))
988 return (EINTR);
989 if (slpflag == PCATCH) {
990 slpflag = 0;
991 slptimeo = 2 * hz;
992 }
993 }
994 /*
995 * We might have lost our iod while sleeping,
996 * so check and loop if nescessary.
997 */
998 if (nmp->nm_bufqiods == 0) {
999 NFS_DPF(ASYNCIO,
1000 ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
1001 goto again;
1002 }
1003 }
1004
1005 if (ISSET(bp->b_flags, B_READ)) {
1006 if (bp->b_rcred == NOCRED && cred != NOCRED) {
1007 /*
1008 * NFS has embedded ucred.
1009 * Can not crhold() here as that causes zone corruption
1010 */
1011 bp->b_rcred = crdup(cred);
1012 }
1013 } else {
1014 SET(bp->b_flags, B_WRITEINPROG);
1015 if (bp->b_wcred == NOCRED && cred != NOCRED) {
1016 /*
1017 * NFS has embedded ucred.
1018 * Can not crhold() here as that causes zone corruption
1019 */
1020 bp->b_wcred = crdup(cred);
1021 }
1022 }
1023
1024 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
1025 nmp->nm_bufqlen++;
1026 return (0);
1027 }
1028
1029 /*
1030 * All the iods are busy on other mounts, so return EIO to
1031 * force the caller to process the i/o synchronously.
1032 */
1033 NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
1034 return (EIO);
1035 }
1036
1037 /*
1038 * Do an I/O operation to/from a cache block. This may be called
1039 * synchronously or from an nfsiod.
1040 */
1041 int
1042 nfs_doio(bp, cr, p)
1043 register struct buf *bp;
1044 struct ucred *cr;
1045 struct proc *p;
1046 {
1047 register struct uio *uiop;
1048 register struct vnode *vp;
1049 struct nfsnode *np;
1050 struct nfsmount *nmp;
1051 int error = 0, diff, len, iomode, must_commit = 0;
1052 struct uio uio;
1053 struct iovec io;
1054
1055 vp = bp->b_vp;
1056 NFSTRACE(NFSTRC_DIO, vp);
1057 np = VTONFS(vp);
1058 nmp = VFSTONFS(vp->v_mount);
1059 uiop = &uio;
1060 uiop->uio_iov = &io;
1061 uiop->uio_iovcnt = 1;
1062 uiop->uio_segflg = UIO_SYSSPACE;
1063 uiop->uio_procp = p;
1064
1065 /*
1066 * With UBC, getblk() can return a buf with B_DONE set.
1067 * This indicates that the VM has valid data for that page.
1068 * NFS being stateless, this case poses a problem.
1069 * By definition, the NFS server should always be consulted
1070 * for the data in that page.
1071 * So we choose to clear the B_DONE and to the IO.
1072 *
1073 * XXX revisit this if there is a performance issue.
1074 * XXX In that case, we could play the attribute cache games ...
1075 */
1076 if (ISSET(bp->b_flags, B_DONE)) {
1077 if (!ISSET(bp->b_flags, B_ASYNC))
1078 panic("nfs_doio: done and not async");
1079 CLR(bp->b_flags, B_DONE);
1080 }
1081
1082 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 256)) | DBG_FUNC_START,
1083 (int)np->n_size, bp->b_blkno * DEV_BSIZE, bp->b_bcount, bp->b_flags, 0);
1084
1085 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 257)) | DBG_FUNC_NONE,
1086 bp->b_validoff, bp->b_validend, bp->b_dirtyoff, bp->b_dirtyend, 0);
1087
1088 /*
1089 * Historically, paging was done with physio, but no more.
1090 */
1091 if (ISSET(bp->b_flags, B_PHYS)) {
1092 /*
1093 * ...though reading /dev/drum still gets us here.
1094 */
1095 io.iov_len = uiop->uio_resid = bp->b_bcount;
1096 /* mapping was done by vmapbuf() */
1097 io.iov_base = bp->b_data;
1098 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
1099 if (ISSET(bp->b_flags, B_READ)) {
1100 uiop->uio_rw = UIO_READ;
1101 nfsstats.read_physios++;
1102 error = nfs_readrpc(vp, uiop, cr);
1103 } else {
1104 int com;
1105
1106 iomode = NFSV3WRITE_DATASYNC;
1107 uiop->uio_rw = UIO_WRITE;
1108 nfsstats.write_physios++;
1109 error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
1110 }
1111 if (error) {
1112 SET(bp->b_flags, B_ERROR);
1113 bp->b_error = error;
1114 }
1115 } else if (ISSET(bp->b_flags, B_READ)) {
1116 io.iov_len = uiop->uio_resid = bp->b_bcount;
1117 io.iov_base = bp->b_data;
1118 uiop->uio_rw = UIO_READ;
1119 switch (vp->v_type) {
1120 case VREG:
1121 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
1122 nfsstats.read_bios++;
1123 error = nfs_readrpc(vp, uiop, cr);
1124
1125 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 262)) | DBG_FUNC_NONE,
1126 (int)np->n_size, bp->b_blkno * DEV_BSIZE, uiop->uio_resid, error, 0);
1127
1128
1129 if (!error) {
1130 bp->b_validoff = 0;
1131 if (uiop->uio_resid) {
1132 /*
1133 * If len > 0, there is a hole in the file and
1134 * no writes after the hole have been pushed to
1135 * the server yet.
1136 * Just zero fill the rest of the valid area.
1137 */
1138 diff = bp->b_bcount - uiop->uio_resid;
1139 len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
1140 + diff);
1141 if (len > 0) {
1142 len = min(len, uiop->uio_resid);
1143 bzero((char *)bp->b_data + diff, len);
1144 bp->b_validend = diff + len;
1145
1146 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 258)) | DBG_FUNC_NONE,
1147 diff, len, 0, 1, 0);
1148
1149 } else
1150 bp->b_validend = diff;
1151 } else
1152 bp->b_validend = bp->b_bcount;
1153 #if 1 /* USV + JOE [ */
1154 if (bp->b_validend < bp->b_bufsize) {
1155 /*
1156 * we're about to release a partial buffer after a read... the only
1157 * way we should get here is if this buffer contains the EOF
1158 * before releasing it, we'll zero out to the end of the buffer
1159 * so that if a mmap of this page occurs, we'll see zero's even
1160 * if a ftruncate extends the file in the meantime
1161 */
1162 bzero((caddr_t)(bp->b_data + bp->b_validend), (bp->b_bufsize - bp->b_validend));
1163
1164 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 258)) | DBG_FUNC_NONE,
1165 bp->b_validend, (bp->b_bufsize - bp->b_validend), 0, 2, 0);
1166 }
1167 #endif /* ] USV + JOE */
1168 }
1169 if (p && (vp->v_flag & VTEXT) &&
1170 (((nmp->nm_flag & NFSMNT_NQNFS) &&
1171 NQNFS_CKINVALID(vp, np, ND_READ) &&
1172 np->n_lrev != np->n_brev) ||
1173 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
1174 np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
1175 uprintf("Process killed due to text file modification\n");
1176 psignal(p, SIGKILL);
1177 p->p_flag |= P_NOSWAP;
1178 }
1179 break;
1180 case VLNK:
1181 uiop->uio_offset = (off_t)0;
1182 nfsstats.readlink_bios++;
1183 error = nfs_readlinkrpc(vp, uiop, cr);
1184 break;
1185 case VDIR:
1186 nfsstats.readdir_bios++;
1187 uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
1188 if (!(nmp->nm_flag & NFSMNT_NFSV3))
1189 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; /* dk@farm.org */
1190 if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
1191 error = nfs_readdirplusrpc(vp, uiop, cr);
1192 if (error == NFSERR_NOTSUPP)
1193 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
1194 }
1195 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
1196 error = nfs_readdirrpc(vp, uiop, cr);
1197 break;
1198 default:
1199 printf("nfs_doio: type %x unexpected\n",vp->v_type);
1200 break;
1201 };
1202 if (error) {
1203 SET(bp->b_flags, B_ERROR);
1204 bp->b_error = error;
1205 }
1206 } else {
1207 if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size)
1208 bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
1209
1210 if (bp->b_dirtyend > bp->b_dirtyoff) {
1211
1212 io.iov_len = uiop->uio_resid = bp->b_dirtyend
1213 - bp->b_dirtyoff;
1214 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
1215 + bp->b_dirtyoff;
1216 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
1217 uiop->uio_rw = UIO_WRITE;
1218
1219 nfsstats.write_bios++;
1220 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
1221 iomode = NFSV3WRITE_UNSTABLE;
1222 else
1223 iomode = NFSV3WRITE_FILESYNC;
1224 SET(bp->b_flags, B_WRITEINPROG);
1225 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
1226 if (!error && iomode == NFSV3WRITE_UNSTABLE)
1227 SET(bp->b_flags, B_NEEDCOMMIT);
1228 else
1229 CLR(bp->b_flags, B_NEEDCOMMIT);
1230 CLR(bp->b_flags, B_WRITEINPROG);
1231
1232 /*
1233 * For an interrupted write, the buffer is still valid
1234 * and the write hasn't been pushed to the server yet,
1235 * so we can't set B_ERROR and report the interruption
1236 * by setting B_EINTR. For the B_ASYNC case, B_EINTR
1237 * is not relevant, so the rpc attempt is essentially
1238 * a noop. For the case of a V3 write rpc not being
1239 * committed to stable storage, the block is still
1240 * dirty and requires either a commit rpc or another
1241 * write rpc with iomode == NFSV3WRITE_FILESYNC before
1242 * the block is reused. This is indicated by setting
1243 * the B_DELWRI and B_NEEDCOMMIT flags.
1244 */
1245 if (error == EINTR
1246 || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
1247 int s;
1248
1249 CLR(bp->b_flags, (B_INVAL|B_NOCACHE));
1250 SET(bp->b_flags, B_DELWRI);
1251
1252 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 261)) | DBG_FUNC_NONE,
1253 bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0);
1254
1255 /*
1256 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
1257 * buffer to the clean list, we have to reassign it back to the
1258 * dirty one. Ugh.
1259 */
1260 if (ISSET(bp->b_flags, B_ASYNC)) {
1261 s = splbio();
1262 reassignbuf(bp, vp);
1263 splx(s);
1264 } else {
1265 SET(bp->b_flags, B_EINTR);
1266 }
1267 } else {
1268 if (error) {
1269 SET(bp->b_flags, B_ERROR);
1270 bp->b_error = np->n_error = error;
1271 np->n_flag |= NWRITEERR;
1272 }
1273 bp->b_dirtyoff = bp->b_dirtyend = 0;
1274
1275 #if 1 /* JOE */
1276 /*
1277 * validoff and validend represent the real data present in this buffer
1278 * if validoff is non-zero, than we have to invalidate the buffer and kill
1279 * the page when biodone is called... the same is also true when validend
1280 * doesn't extend all the way to the end of the buffer and validend doesn't
1281 * equate to the current EOF... eventually we need to deal with this in a
1282 * more humane way (like keeping the partial buffer without making it immediately
1283 * available to the VM page cache).
1284 */
1285 if (bp->b_validoff)
1286 SET(bp->b_flags, B_INVAL);
1287 else
1288 if (bp->b_validend < bp->b_bufsize) {
1289 if ((((off_t)bp->b_blkno * (off_t)DEV_BSIZE) + bp->b_validend) == np->n_size) {
1290 bzero((caddr_t)(bp->b_data + bp->b_validend), (bp->b_bufsize - bp->b_validend));
1291
1292 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 259)) | DBG_FUNC_NONE,
1293 bp->b_validend, (bp->b_bufsize - bp->b_validend), 0, 0, 0);;
1294 }
1295 else
1296 SET(bp->b_flags, B_INVAL);
1297 }
1298 #endif
1299 }
1300
1301 } else {
1302
1303 #if 1 /* JOE */
1304 if (bp->b_validoff)
1305 SET(bp->b_flags, B_INVAL);
1306 else if (bp->b_validend < bp->b_bufsize) {
1307 if ((((off_t)bp->b_blkno * (off_t)DEV_BSIZE) + bp->b_validend) != np->n_size)
1308 SET(bp->b_flags, B_INVAL);
1309 }
1310 if (bp->b_flags & B_INVAL) {
1311 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 260)) | DBG_FUNC_NONE,
1312 bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0);
1313 }
1314 #endif
1315 bp->b_resid = 0;
1316 biodone(bp);
1317 NFSTRACE(NFSTRC_DIO_DONE, vp);
1318 return (0);
1319 }
1320 }
1321 bp->b_resid = uiop->uio_resid;
1322 if (must_commit)
1323 nfs_clearcommit(vp->v_mount);
1324
1325 if (bp->b_flags & B_INVAL) {
1326 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 260)) | DBG_FUNC_NONE,
1327 bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0);
1328 }
1329 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 256)) | DBG_FUNC_END,
1330 bp->b_validoff, bp->b_validend, bp->b_bcount, error, 0);
1331
1332 biodone(bp);
1333 NFSTRACE(NFSTRC_DIO_DONE, vp);
1334 return (error);
1335 }