2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 * Copyright (c) 1989, 1993
25 * The Regents of the University of California. All rights reserved.
27 * This code is derived from software contributed to Berkeley by
28 * Rick Macklem at The University of Guelph.
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
59 * FreeBSD-Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/resourcevar.h>
64 #include <sys/signalvar.h>
66 #include <sys/malloc.h>
67 #include <sys/vnode.h>
68 #include <sys/dirent.h>
69 #include <sys/mount.h>
70 #include <sys/kernel.h>
71 #include <sys/sysctl.h>
75 #include <sys/vmparam.h>
78 #include <kern/clock.h>
80 #include <nfs/rpcv2.h>
81 #include <nfs/nfsproto.h>
83 #include <nfs/nfsmount.h>
84 #include <nfs/nqnfs.h>
85 #include <nfs/nfsnode.h>
87 #include <sys/kdebug.h>
89 #define FSDBG(A, B, C, D, E) \
90 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
91 (int)(B), (int)(C), (int)(D), (int)(E), 0)
92 #define FSDBG_TOP(A, B, C, D, E) \
93 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
94 (int)(B), (int)(C), (int)(D), (int)(E), 0)
95 #define FSDBG_BOT(A, B, C, D, E) \
96 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
97 (int)(B), (int)(C), (int)(D), (int)(E), 0)
99 extern int nfs_numasync
;
100 extern int nfs_ioddelwri
;
101 extern struct nfsstats nfsstats
;
103 #define NFSBUFHASH(dvp, lbn) \
104 (&nfsbufhashtbl[((long)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & nfsbufhash])
105 LIST_HEAD(nfsbufhashhead
, nfsbuf
) *nfsbufhashtbl
;
106 struct nfsbuffreehead nfsbuffree
, nfsbufdelwri
;
108 int nfsbufhashlock
, nfsbufcnt
, nfsbufmin
, nfsbufmax
;
109 int nfsbuffreecnt
, nfsbufdelwricnt
, nfsneedbuffer
;
112 #define NFSBUFWRITE_THROTTLE 9
115 * Initialize nfsbuf lists
121 nfsbufhashtbl
= hashinit(nbuf
, M_TEMP
, &nfsbufhash
);
122 TAILQ_INIT(&nfsbuffree
);
123 TAILQ_INIT(&nfsbufdelwri
);
124 nfsbufcnt
= nfsbuffreecnt
= nfsbufdelwricnt
= 0;
125 nfsbufmin
= 128; // XXX tune me!
126 nfsbufmax
= 8192; // XXX tune me!
132 * try to free up some excess, unused nfsbufs
140 #define NFS_BUF_FREEUP() \
142 /* only call nfs_buf_freeup() if it has work to do */ \
143 if ((nfsbuffreecnt > nfsbufcnt/4) && \
144 (nfsbufcnt-nfsbuffreecnt/8 > nfsbufmin)) \
148 if (nfsbuffreecnt
< nfsbufcnt
/4)
150 cnt
= nfsbuffreecnt
/8;
151 if (nfsbufcnt
-cnt
< nfsbufmin
)
154 FSDBG(320, -1, nfsbufcnt
, nfsbuffreecnt
, cnt
);
156 fbp
= TAILQ_FIRST(&nfsbuffree
);
159 nfs_buf_remfree(fbp
);
160 /* disassociate buffer from any vnode */
163 if (fbp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
164 LIST_REMOVE(fbp
, nb_vnbufs
);
165 fbp
->nb_vnbufs
.le_next
= NFSNOLIST
;
171 LIST_REMOVE(fbp
, nb_hash
);
173 if (fbp
->nb_rcred
!= NOCRED
)
174 crfree(fbp
->nb_rcred
);
175 if (fbp
->nb_wcred
!= NOCRED
)
176 crfree(fbp
->nb_wcred
);
177 /* if buf was NB_META, dump buffer */
178 if (ISSET(fbp
->nb_flags
, NB_META
) && fbp
->nb_data
) {
179 FREE(fbp
->nb_data
, M_TEMP
);
184 FSDBG(320, -1, nfsbufcnt
, nfsbuffreecnt
, cnt
);
188 nfs_buf_remfree(struct nfsbuf
*bp
)
190 if (bp
->nb_free
.tqe_next
== NFSNOLIST
)
191 panic("nfsbuf not on free list");
192 if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
194 TAILQ_REMOVE(&nfsbufdelwri
, bp
, nb_free
);
197 TAILQ_REMOVE(&nfsbuffree
, bp
, nb_free
);
199 bp
->nb_free
.tqe_next
= NFSNOLIST
;
204 * check for existence of nfsbuf in cache
207 nfs_buf_incore(struct vnode
*vp
, daddr_t blkno
)
209 /* Search hash chain */
210 struct nfsbuf
* bp
= NFSBUFHASH(vp
, blkno
)->lh_first
;
211 for (; bp
!= NULL
; bp
= bp
->nb_hash
.le_next
)
212 if (bp
->nb_lblkno
== blkno
&& bp
->nb_vp
== vp
&&
213 !ISSET(bp
->nb_flags
, NB_INVAL
)) {
214 FSDBG(547, bp
, blkno
, bp
->nb_flags
, bp
->nb_vp
);
221 * Check if it's OK to drop a page.
223 * Called by vnode_pager() on pageout request of non-dirty page.
224 * We need to make sure that it's not part of a delayed write.
225 * If it is, we can't let the VM drop it because we may need it
226 * later when/if we need to write the data (again).
229 nfs_buf_page_inval(struct vnode
*vp
, off_t offset
)
232 bp
= nfs_buf_incore(vp
, ubc_offtoblk(vp
, offset
));
235 FSDBG(325, bp
, bp
->nb_flags
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
);
236 if (ISSET(bp
->nb_flags
, NB_BUSY
))
239 * If there's a dirty range in the buffer, check to
240 * see if this page intersects with the dirty range.
241 * If it does, we can't let the pager drop the page.
243 if (bp
->nb_dirtyend
> 0) {
244 int start
= offset
- NBOFF(bp
);
245 if (bp
->nb_dirtyend
<= start
||
246 bp
->nb_dirtyoff
>= (start
+ PAGE_SIZE
))
254 nfs_buf_upl_setup(struct nfsbuf
*bp
)
260 if (ISSET(bp
->nb_flags
, NB_PAGELIST
))
263 kret
= ubc_create_upl(bp
->nb_vp
, NBOFF(bp
), bp
->nb_bufsize
,
264 &upl
, NULL
, UPL_PRECIOUS
);
265 if (kret
== KERN_INVALID_ARGUMENT
) {
266 /* vm object probably doesn't exist any more */
267 bp
->nb_pagelist
= NULL
;
270 if (kret
!= KERN_SUCCESS
) {
271 printf("nfs_buf_upl_setup(): failed to get pagelist %d\n", kret
);
272 bp
->nb_pagelist
= NULL
;
276 FSDBG(538, bp
, NBOFF(bp
), bp
->nb_bufsize
, bp
->nb_vp
);
279 bp
->nb_pagelist
= upl
;
280 SET(bp
->nb_flags
, NB_PAGELIST
);
286 nfs_buf_upl_check(struct nfsbuf
*bp
)
289 off_t filesize
, fileoffset
;
292 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
))
295 npages
= round_page_32(bp
->nb_bufsize
) / PAGE_SIZE
;
296 filesize
= ubc_getsize(bp
->nb_vp
);
297 fileoffset
= NBOFF(bp
);
298 if (fileoffset
< filesize
)
299 SET(bp
->nb_flags
, NB_CACHE
);
301 CLR(bp
->nb_flags
, NB_CACHE
);
303 pl
= ubc_upl_pageinfo(bp
->nb_pagelist
);
304 bp
->nb_valid
= bp
->nb_dirty
= 0;
306 for (i
=0; i
< npages
; i
++, fileoffset
+= PAGE_SIZE_64
) {
307 /* anything beyond the end of the file is not valid or dirty */
308 if (fileoffset
>= filesize
)
310 if (!upl_valid_page(pl
, i
)) {
311 CLR(bp
->nb_flags
, NB_CACHE
);
315 if (upl_dirty_page(pl
, i
)) {
316 NBPGDIRTY_SET(bp
, i
);
317 if (!ISSET(bp
->nb_flags
, NB_WASDIRTY
))
318 SET(bp
->nb_flags
, NB_WASDIRTY
);
321 fileoffset
= NBOFF(bp
);
322 if (ISSET(bp
->nb_flags
, NB_CACHE
)) {
324 bp
->nb_validend
= bp
->nb_bufsize
;
325 if (fileoffset
+ bp
->nb_validend
> filesize
)
326 bp
->nb_validend
= filesize
- fileoffset
;
328 bp
->nb_validoff
= bp
->nb_validend
= -1;
330 FSDBG(539, bp
, fileoffset
, bp
->nb_valid
, bp
->nb_dirty
);
331 FSDBG(539, bp
->nb_validoff
, bp
->nb_validend
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
);
335 nfs_buf_map(struct nfsbuf
*bp
)
341 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
))
344 kret
= ubc_upl_map(bp
->nb_pagelist
, (vm_address_t
*)&(bp
->nb_data
));
345 if (kret
!= KERN_SUCCESS
)
346 panic("nfs_buf_map: ubc_upl_map() failed with (%d)", kret
);
347 if (bp
->nb_data
== 0)
348 panic("ubc_upl_map mapped 0");
349 FSDBG(540, bp
, bp
->nb_flags
, NBOFF(bp
), bp
->nb_data
);
354 * check range of pages in nfsbuf's UPL for validity
357 nfs_buf_upl_valid_range(struct nfsbuf
*bp
, int off
, int size
)
359 off_t fileoffset
, filesize
;
363 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
))
365 pl
= ubc_upl_pageinfo(bp
->nb_pagelist
);
367 size
+= off
& PAGE_MASK
;
369 fileoffset
= NBOFF(bp
);
370 filesize
= VTONFS(bp
->nb_vp
)->n_size
;
371 if ((fileoffset
+ off
+ size
) > filesize
)
372 size
= filesize
- (fileoffset
+ off
);
375 lastpg
= (off
+ size
- 1)/PAGE_SIZE
;
376 while (pg
<= lastpg
) {
377 if (!upl_valid_page(pl
, pg
))
385 * normalize an nfsbuf's valid range
387 * the read/write code guarantees that we'll always have a valid
388 * region that is an integral number of pages. If either end
389 * of the valid range isn't page-aligned, it gets corrected
390 * here as we extend the valid range through all of the
391 * contiguous valid pages.
394 nfs_buf_normalize_valid_range(struct nfsnode
*np
, struct nfsbuf
*bp
)
397 /* pull validoff back to start of contiguous valid page range */
398 pg
= bp
->nb_validoff
/PAGE_SIZE
;
399 while (pg
>= 0 && NBPGVALID(bp
,pg
))
401 bp
->nb_validoff
= (pg
+1) * PAGE_SIZE
;
402 /* push validend forward to end of contiguous valid page range */
403 npg
= bp
->nb_bufsize
/PAGE_SIZE
;
404 pg
= bp
->nb_validend
/PAGE_SIZE
;
405 while (pg
< npg
&& NBPGVALID(bp
,pg
))
407 bp
->nb_validend
= pg
* PAGE_SIZE
;
409 if (NBOFF(bp
) + bp
->nb_validend
> np
->n_size
)
410 bp
->nb_validend
= np
->n_size
% bp
->nb_bufsize
;
414 * try to push out some delayed/uncommitted writes
417 nfs_buf_delwri_push(void)
422 if (TAILQ_EMPTY(&nfsbufdelwri
))
425 /* first try to tell the nfsiods to do it */
426 if (nfs_asyncio(NULL
, NULL
) == 0)
429 /* otherwise, try to do some of the work ourselves */
431 while (i
< 8 && (bp
= TAILQ_FIRST(&nfsbufdelwri
)) != NULL
) {
432 struct nfsnode
*np
= VTONFS(bp
->nb_vp
);
434 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
435 /* put buffer at end of delwri list */
436 TAILQ_INSERT_TAIL(&nfsbufdelwri
, bp
, nb_free
);
438 nfs_flushcommits(np
->n_vnode
, (struct proc
*)0);
440 SET(bp
->nb_flags
, (NB_BUSY
| NB_ASYNC
));
448 * Get an nfs cache block.
449 * Allocate a new one if the block isn't currently in the cache
450 * and return the block marked busy. If the calling process is
451 * interrupted by a signal for an interruptible mount point, return
462 struct nfsnode
*np
= VTONFS(vp
);
464 int i
, biosize
, bufsize
, rv
;
466 int slpflag
= PCATCH
;
468 FSDBG_TOP(541, vp
, blkno
, size
, operation
);
471 if (bufsize
> MAXBSIZE
)
472 panic("nfs_buf_get: buffer larger than MAXBSIZE requested");
474 biosize
= vp
->v_mount
->mnt_stat
.f_iosize
;
476 if (UBCINVALID(vp
) || !UBCINFOEXISTS(vp
))
477 operation
= BLK_META
;
478 else if (bufsize
< biosize
)
479 /* reg files should always have biosize blocks */
482 /* if BLK_WRITE, check for too many delayed/uncommitted writes */
483 if ((operation
== BLK_WRITE
) && (nfs_nbdwrite
> ((nfsbufcnt
*3)/4))) {
484 FSDBG_TOP(542, vp
, blkno
, nfs_nbdwrite
, ((nfsbufcnt
*3)/4));
486 /* poke the delwri list */
487 nfs_buf_delwri_push();
489 /* sleep to let other threads run... */
490 tsleep(&nfs_nbdwrite
, PCATCH
, "nfs_nbdwrite", 1);
491 FSDBG_BOT(542, vp
, blkno
, nfs_nbdwrite
, ((nfsbufcnt
*3)/4));
496 * Obtain a lock to prevent a race condition if the
497 * MALLOC() below happens to block.
499 if (nfsbufhashlock
) {
500 while (nfsbufhashlock
) {
502 tsleep(&nfsbufhashlock
, PCATCH
, "nfsbufget", 0);
503 if (nfs_sigintr(VFSTONFS(vp
->v_mount
), NULL
, p
))
510 /* check for existence of nfsbuf in cache */
511 if (bp
= nfs_buf_incore(vp
, blkno
)) {
512 /* if busy, set wanted and wait */
513 if (ISSET(bp
->nb_flags
, NB_BUSY
)) {
514 FSDBG_TOP(543, vp
, blkno
, bp
, bp
->nb_flags
);
515 SET(bp
->nb_flags
, NB_WANTED
);
517 if (nfsbufhashlock
< 0) {
519 wakeup(&nfsbufhashlock
);
522 tsleep(bp
, slpflag
|(PRIBIO
+1), "nfsbufget", (slpflag
== PCATCH
) ? 0 : 2*hz
);
524 FSDBG_BOT(543, vp
, blkno
, bp
, bp
->nb_flags
);
525 if (nfs_sigintr(VFSTONFS(vp
->v_mount
), NULL
, p
)) {
526 FSDBG_BOT(541, vp
, blkno
, 0, EINTR
);
531 if (bp
->nb_bufsize
!= bufsize
)
532 panic("nfsbuf size mismatch");
533 SET(bp
->nb_flags
, (NB_BUSY
| NB_CACHE
));
535 /* additional paranoia: */
536 if (ISSET(bp
->nb_flags
, NB_PAGELIST
))
537 panic("pagelist buffer was not busy");
542 * where to get a free buffer:
543 * - alloc new if we haven't reached min bufs
545 * - alloc new if we haven't reached max allowed
546 * - start clearing out delwri list and try again
549 if ((nfsbufcnt
> nfsbufmin
) && !TAILQ_EMPTY(&nfsbuffree
)) {
550 /* pull an nfsbuf off the free list */
551 bp
= TAILQ_FIRST(&nfsbuffree
);
552 FSDBG(544, vp
, blkno
, bp
, bp
->nb_flags
);
554 if (ISSET(bp
->nb_flags
, NB_DELWRI
))
555 panic("nfs_buf_get: delwri");
556 SET(bp
->nb_flags
, NB_BUSY
);
557 /* disassociate buffer from previous vnode */
560 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
561 LIST_REMOVE(bp
, nb_vnbufs
);
562 bp
->nb_vnbufs
.le_next
= NFSNOLIST
;
568 LIST_REMOVE(bp
, nb_hash
);
569 /* nuke any creds we're holding */
571 if (cred
!= NOCRED
) {
572 bp
->nb_rcred
= NOCRED
;
576 if (cred
!= NOCRED
) {
577 bp
->nb_wcred
= NOCRED
;
580 /* if buf will no longer be NB_META, dump old buffer */
581 if ((operation
!= BLK_META
) &&
582 ISSET(bp
->nb_flags
, NB_META
) && bp
->nb_data
) {
583 FREE(bp
->nb_data
, M_TEMP
);
586 /* re-init buf fields */
588 bp
->nb_validoff
= bp
->nb_validend
= -1;
589 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
592 } else if (nfsbufcnt
< nfsbufmax
) {
593 /* just alloc a new one */
594 MALLOC(bp
, struct nfsbuf
*, sizeof(struct nfsbuf
), M_TEMP
, M_WAITOK
);
598 bzero(bp
, sizeof(*bp
));
599 bp
->nb_free
.tqe_next
= NFSNOLIST
;
600 bp
->nb_validoff
= bp
->nb_validend
= -1;
601 FSDBG(545, vp
, blkno
, bp
, 0);
603 /* too many bufs... wait for buffers to free up */
604 FSDBG_TOP(546, vp
, blkno
, nfsbufcnt
, nfsbufmax
);
606 if (nfsbufhashlock
< 0) {
608 wakeup(&nfsbufhashlock
);
612 /* poke the delwri list */
613 nfs_buf_delwri_push();
616 tsleep(&nfsneedbuffer
, PCATCH
, "nfsbufget", 0);
617 FSDBG_BOT(546, vp
, blkno
, nfsbufcnt
, nfsbufmax
);
618 if (nfs_sigintr(VFSTONFS(vp
->v_mount
), NULL
, p
)) {
619 FSDBG_BOT(541, vp
, blkno
, 0, EINTR
);
628 bp
->nb_flags
= NB_BUSY
;
629 bp
->nb_lblkno
= blkno
;
630 /* insert buf in hash */
631 LIST_INSERT_HEAD(NFSBUFHASH(vp
, blkno
), bp
, nb_hash
);
632 /* associate buffer with new vnode */
635 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
641 SET(bp
->nb_flags
, NB_META
);
642 if ((bp
->nb_bufsize
!= bufsize
) && bp
->nb_data
) {
643 FREE(bp
->nb_data
, M_TEMP
);
645 bp
->nb_validoff
= bp
->nb_validend
= -1;
646 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
649 CLR(bp
->nb_flags
, NB_CACHE
);
652 MALLOC(bp
->nb_data
, caddr_t
, bufsize
, M_TEMP
, M_WAITOK
);
654 panic("nfs_buf_get: null nb_data");
655 bp
->nb_bufsize
= bufsize
;
660 if (bufsize
< PAGE_SIZE
)
662 bp
->nb_bufsize
= bufsize
;
663 bp
->nb_validoff
= bp
->nb_validend
= -1;
665 if (UBCISVALID(vp
)) {
667 if (nfs_buf_upl_setup(bp
)) {
668 /* unable to create upl */
669 /* vm object must no longer exist */
670 /* cleanup buffer and return NULL */
671 LIST_REMOVE(bp
, nb_vnbufs
);
672 bp
->nb_vnbufs
.le_next
= NFSNOLIST
;
675 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
)
676 panic("nfsbuf on freelist");
677 TAILQ_INSERT_HEAD(&nfsbuffree
, bp
, nb_free
);
679 FSDBG_BOT(541, vp
, blkno
, 0x2bc, EIO
);
682 nfs_buf_upl_check(bp
);
687 panic("nfs_buf_get: %d unknown operation", operation
);
691 if (nfsbufhashlock
< 0) {
693 wakeup(&nfsbufhashlock
);
697 FSDBG_BOT(541, vp
, blkno
, bp
, bp
->nb_flags
);
703 nfs_buf_release(struct nfsbuf
*bp
)
705 struct vnode
*vp
= bp
->nb_vp
;
707 FSDBG_TOP(548, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_data
);
708 FSDBG(548, bp
->nb_validoff
, bp
->nb_validend
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
);
709 FSDBG(548, bp
->nb_valid
, 0, bp
->nb_dirty
, 0);
711 if (UBCINFOEXISTS(vp
) && bp
->nb_bufsize
) {
716 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
) && !ISSET(bp
->nb_flags
, NB_INVAL
)) {
717 rv
= nfs_buf_upl_setup(bp
);
719 printf("nfs_buf_release: upl create failed %d\n", rv
);
721 nfs_buf_upl_check(bp
);
723 upl
= bp
->nb_pagelist
;
725 goto pagelist_cleanup_done
;
727 if (ubc_upl_unmap(upl
) != KERN_SUCCESS
)
728 panic("ubc_upl_unmap failed");
731 if (bp
->nb_flags
& (NB_ERROR
| NB_INVAL
)) {
732 if (bp
->nb_flags
& (NB_READ
| NB_INVAL
))
733 upl_flags
= UPL_ABORT_DUMP_PAGES
;
736 ubc_upl_abort(upl
, upl_flags
);
737 goto pagelist_cleanup_done
;
739 for (i
=0; i
<= (bp
->nb_bufsize
- 1)/PAGE_SIZE
; i
++) {
740 if (!NBPGVALID(bp
,i
))
741 ubc_upl_abort_range(upl
,
742 i
*PAGE_SIZE
, PAGE_SIZE
,
743 UPL_ABORT_DUMP_PAGES
|
744 UPL_ABORT_FREE_ON_EMPTY
);
747 upl_flags
= UPL_COMMIT_SET_DIRTY
;
749 upl_flags
= UPL_COMMIT_CLEAR_DIRTY
;
750 ubc_upl_commit_range(upl
,
751 i
*PAGE_SIZE
, PAGE_SIZE
,
753 UPL_COMMIT_INACTIVATE
|
754 UPL_COMMIT_FREE_ON_EMPTY
);
757 pagelist_cleanup_done
:
758 /* was this the last buffer in the file? */
759 if (NBOFF(bp
) + bp
->nb_bufsize
> VTONFS(vp
)->n_size
) {
760 /* if so, invalidate all pages of last buffer past EOF */
761 int biosize
= vp
->v_mount
->mnt_stat
.f_iosize
;
763 off
= trunc_page_64(VTONFS(vp
)->n_size
) + PAGE_SIZE_64
;
764 size
= trunc_page_64(NBOFF(bp
) + biosize
) - off
;
766 ubc_invalidate(vp
, off
, size
);
768 CLR(bp
->nb_flags
, NB_PAGELIST
);
769 bp
->nb_pagelist
= NULL
;
772 /* Wake up any processes waiting for any buffer to become free. */
775 wakeup(&nfsneedbuffer
);
777 /* Wake up any processes waiting for _this_ buffer to become free. */
778 if (ISSET(bp
->nb_flags
, NB_WANTED
)) {
779 CLR(bp
->nb_flags
, NB_WANTED
);
783 /* If it's not cacheable, or an error, mark it invalid. */
784 if (ISSET(bp
->nb_flags
, (NB_NOCACHE
|NB_ERROR
)))
785 SET(bp
->nb_flags
, NB_INVAL
);
787 if ((bp
->nb_bufsize
<= 0) || ISSET(bp
->nb_flags
, NB_INVAL
)) {
788 /* If it's invalid or empty, dissociate it from its vnode */
789 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
790 LIST_REMOVE(bp
, nb_vnbufs
);
791 bp
->nb_vnbufs
.le_next
= NFSNOLIST
;
795 /* if this was a delayed write, wakeup anyone */
796 /* waiting for delayed writes to complete */
797 if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
798 CLR(bp
->nb_flags
, NB_DELWRI
);
801 wakeup((caddr_t
)&nfs_nbdwrite
);
803 /* put buffer at head of free list */
804 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
)
805 panic("nfsbuf on freelist");
806 TAILQ_INSERT_HEAD(&nfsbuffree
, bp
, nb_free
);
809 } else if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
810 /* put buffer at end of delwri list */
811 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
)
812 panic("nfsbuf on freelist");
813 TAILQ_INSERT_TAIL(&nfsbufdelwri
, bp
, nb_free
);
816 /* put buffer at end of free list */
817 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
)
818 panic("nfsbuf on freelist");
819 TAILQ_INSERT_TAIL(&nfsbuffree
, bp
, nb_free
);
826 /* Unlock the buffer. */
827 CLR(bp
->nb_flags
, (NB_ASYNC
| NB_BUSY
| NB_NOCACHE
| NB_STABLE
| NB_IOD
));
829 FSDBG_BOT(548, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_data
);
833 * Wait for operations on the buffer to complete.
834 * When they do, extract and return the I/O's error value.
837 nfs_buf_iowait(struct nfsbuf
*bp
)
839 FSDBG_TOP(549, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
841 while (!ISSET(bp
->nb_flags
, NB_DONE
))
842 tsleep(bp
, PRIBIO
+ 1, "nfs_buf_iowait", 0);
844 FSDBG_BOT(549, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
846 /* check for interruption of I/O, then errors. */
847 if (ISSET(bp
->nb_flags
, NB_EINTR
)) {
848 CLR(bp
->nb_flags
, NB_EINTR
);
850 } else if (ISSET(bp
->nb_flags
, NB_ERROR
))
851 return (bp
->nb_error
? bp
->nb_error
: EIO
);
856 * Mark I/O complete on a buffer.
859 nfs_buf_iodone(struct nfsbuf
*bp
)
863 FSDBG_TOP(550, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
865 if (ISSET(bp
->nb_flags
, NB_DONE
))
866 panic("nfs_buf_iodone already");
867 SET(bp
->nb_flags
, NB_DONE
); /* note that it's done */
869 * I/O was done, so don't believe
870 * the DIRTY state from VM anymore
872 CLR(bp
->nb_flags
, NB_WASDIRTY
);
874 if (!ISSET(bp
->nb_flags
, NB_READ
)) {
875 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
879 /* Wakeup the throttled write operations as needed */
881 if (vp
&& (vp
->v_flag
& VTHROTTLED
)
882 && (vp
->v_numoutput
<= (NFSBUFWRITE_THROTTLE
/ 3))) {
883 vp
->v_flag
&= ~VTHROTTLED
;
884 wakeup((caddr_t
)&vp
->v_numoutput
);
887 if (ISSET(bp
->nb_flags
, NB_ASYNC
)) /* if async, release it */
889 else { /* or just wakeup the buffer */
890 CLR(bp
->nb_flags
, NB_WANTED
);
894 FSDBG_BOT(550, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
898 nfs_buf_write_delayed(struct nfsbuf
*bp
)
900 struct proc
*p
= current_proc();
901 struct vnode
*vp
= bp
->nb_vp
;
903 FSDBG_TOP(551, bp
, NBOFF(bp
), bp
->nb_flags
, 0);
904 FSDBG(551, bp
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
, bp
->nb_dirty
);
907 * If the block hasn't been seen before:
908 * (1) Mark it as having been seen,
909 * (2) Charge for the write.
910 * (3) Make sure it's on its vnode's correct block list,
912 if (!ISSET(bp
->nb_flags
, NB_DELWRI
)) {
913 SET(bp
->nb_flags
, NB_DELWRI
);
915 p
->p_stats
->p_ru
.ru_oublock
++; /* XXX */
918 /* move to dirty list */
919 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
)
920 LIST_REMOVE(bp
, nb_vnbufs
);
921 LIST_INSERT_HEAD(&VTONFS(vp
)->n_dirtyblkhd
, bp
, nb_vnbufs
);
925 * If the vnode has "too many" write operations in progress
926 * wait for them to finish the IO
928 while (vp
->v_numoutput
>= NFSBUFWRITE_THROTTLE
) {
929 vp
->v_flag
|= VTHROTTLED
;
930 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "nfs_buf_write_delayed", 0);
934 * If we have too many delayed write buffers,
935 * more than we can "safely" handle, just fall back to
936 * doing the async write
938 if (nfs_nbdwrite
< 0)
939 panic("nfs_buf_write_delayed: Negative nfs_nbdwrite");
941 if (nfs_nbdwrite
> ((nfsbufcnt
/4)*3)) {
942 /* issue async write */
943 SET(bp
->nb_flags
, NB_ASYNC
);
945 FSDBG_BOT(551, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
949 /* Otherwise, the "write" is done, so mark and release the buffer. */
950 SET(bp
->nb_flags
, NB_DONE
);
952 FSDBG_BOT(551, bp
, NBOFF(bp
), bp
->nb_flags
, 0);
958 * Vnode op for read using bio
959 * Any similarity to readip() is purely coincidental
962 nfs_bioread(vp
, uio
, ioflag
, cred
, getpages
)
963 register struct vnode
*vp
;
964 register struct uio
*uio
;
967 int getpages
; // XXX unused!
969 struct nfsnode
*np
= VTONFS(vp
);
972 struct nfsbuf
*bp
= 0, *rabp
;
975 struct nfsmount
*nmp
= VFSTONFS(vp
->v_mount
);
976 daddr_t lbn
, rabn
, lastrabn
= -1;
978 int nra
, error
= 0, n
= 0, on
= 0;
979 int operation
= (getpages
? BLK_PAGEIN
: BLK_READ
);
981 struct dirent
*direntp
;
983 FSDBG_TOP(514, vp
, uio
->uio_offset
, uio
->uio_resid
, ioflag
);
986 if (uio
->uio_rw
!= UIO_READ
)
987 panic("nfs_read mode");
989 if (uio
->uio_resid
== 0) {
990 FSDBG_BOT(514, vp
, 0xd1e0001, 0, 0);
993 if (uio
->uio_offset
< 0) {
994 FSDBG_BOT(514, vp
, 0xd1e0002, 0, EINVAL
);
998 if ((nmp
->nm_flag
& NFSMNT_NFSV3
) &&
999 !(nmp
->nm_state
& NFSSTA_GOTFSINFO
))
1000 (void)nfs_fsinfo(nmp
, vp
, cred
, p
);
1001 biosize
= vp
->v_mount
->mnt_stat
.f_iosize
;
1003 * For nfs, cache consistency can only be maintained approximately.
1004 * Although RFC1094 does not specify the criteria, the following is
1005 * believed to be compatible with the reference port.
1006 * For nqnfs, full cache consistency is maintained within the loop.
1008 * If the file's modify time on the server has changed since the
1009 * last read rpc or you have written to the file,
1010 * you may have lost data cache consistency with the
1011 * server, so flush all of the file's data out of the cache.
1012 * Then force a getattr rpc to ensure that you have up to date
1014 * NB: This implies that cache data can be read when up to
1015 * NFS_MAXATTRTIMEO seconds out of date. If you find that you need
1016 * current attributes this could be forced by setting n_xid to 0
1017 * before the VOP_GETATTR() call.
1019 if ((nmp
->nm_flag
& NFSMNT_NQNFS
) == 0) {
1020 if (np
->n_flag
& NMODIFIED
) {
1021 if (vp
->v_type
!= VREG
) {
1022 if (vp
->v_type
!= VDIR
)
1023 panic("nfs: bioread, not dir");
1025 error
= nfs_vinvalbuf(vp
, V_SAVE
, cred
, p
, 1);
1027 FSDBG_BOT(514, vp
, 0xd1e0003, 0, error
);
1032 error
= VOP_GETATTR(vp
, &vattr
, cred
, p
);
1034 FSDBG_BOT(514, vp
, 0xd1e0004, 0, error
);
1037 np
->n_mtime
= vattr
.va_mtime
.tv_sec
;
1039 error
= VOP_GETATTR(vp
, &vattr
, cred
, p
);
1041 FSDBG_BOT(514, vp
, 0xd1e0005, 0, error
);
1044 if (np
->n_mtime
!= vattr
.va_mtime
.tv_sec
) {
1045 if (vp
->v_type
== VDIR
) {
1047 /* purge name cache entries */
1050 error
= nfs_vinvalbuf(vp
, V_SAVE
, cred
, p
, 1);
1052 FSDBG_BOT(514, vp
, 0xd1e0006, 0, error
);
1055 np
->n_mtime
= vattr
.va_mtime
.tv_sec
;
1062 * Get a valid lease. If cached data is stale, flush it.
1064 if (nmp
->nm_flag
& NFSMNT_NQNFS
) {
1065 if (NQNFS_CKINVALID(vp
, np
, ND_READ
)) {
1067 error
= nqnfs_getlease(vp
, ND_READ
, cred
, p
);
1068 } while (error
== NQNFS_EXPIRED
);
1070 FSDBG_BOT(514, vp
, 0xd1e0007, 0, error
);
1073 if (np
->n_lrev
!= np
->n_brev
||
1074 (np
->n_flag
& NQNFSNONCACHE
) ||
1075 ((np
->n_flag
& NMODIFIED
) && vp
->v_type
== VDIR
)) {
1076 if (vp
->v_type
== VDIR
)
1078 error
= nfs_vinvalbuf(vp
, V_SAVE
, cred
, p
, 1);
1080 FSDBG_BOT(514, vp
, 0xd1e0008, 0, error
);
1083 np
->n_brev
= np
->n_lrev
;
1085 } else if (vp
->v_type
== VDIR
&& (np
->n_flag
& NMODIFIED
)) {
1087 error
= nfs_vinvalbuf(vp
, V_SAVE
, cred
, p
, 1);
1089 FSDBG_BOT(514, vp
, 0xd1e0009, 0, error
);
1094 if ((np
->n_flag
& NQNFSNONCACHE
) || (vp
->v_flag
& VNOCACHE_DATA
)) {
1095 if ((vp
->v_flag
& VNOCACHE_DATA
) &&
1096 (np
->n_dirtyblkhd
.lh_first
|| np
->n_cleanblkhd
.lh_first
)) {
1097 error
= nfs_vinvalbuf(vp
, V_SAVE
, cred
, p
, 1);
1099 FSDBG_BOT(514, vp
, 0xd1e000a, 0, error
);
1103 switch (vp
->v_type
) {
1105 error
= nfs_readrpc(vp
, uio
, cred
);
1106 FSDBG_BOT(514, vp
, uio
->uio_offset
, uio
->uio_resid
, error
);
1109 error
= nfs_readlinkrpc(vp
, uio
, cred
);
1110 FSDBG_BOT(514, vp
, uio
->uio_offset
, uio
->uio_resid
, error
);
1115 printf(" NQNFSNONCACHE: type %x unexpected\n", vp
->v_type
);
1118 switch (vp
->v_type
) {
1120 lbn
= uio
->uio_offset
/ biosize
;
1123 * Copy directly from any cached pages without grabbing the bufs.
1125 if (uio
->uio_segflg
== UIO_USERSPACE
) {
1126 int io_resid
= uio
->uio_resid
;
1127 diff
= np
->n_size
- uio
->uio_offset
;
1128 if (diff
< io_resid
)
1131 error
= cluster_copy_ubc_data(vp
, uio
, &io_resid
, 0);
1133 FSDBG_BOT(514, vp
, uio
->uio_offset
, 0xcacefeed, error
);
1137 /* count any biocache reads that we just copied directly */
1138 if (lbn
!= uio
->uio_offset
/ biosize
) {
1139 nfsstats
.biocache_reads
+= (uio
->uio_offset
/ biosize
) - lbn
;
1140 FSDBG(514, vp
, 0xcacefeed, uio
->uio_offset
, error
);
1144 lbn
= uio
->uio_offset
/ biosize
;
1145 on
= uio
->uio_offset
% biosize
;
1148 * Start the read ahead(s), as required.
1150 if (nfs_numasync
> 0 && nmp
->nm_readahead
> 0) {
1151 for (nra
= 0; nra
< nmp
->nm_readahead
; nra
++) {
1152 rabn
= lbn
+ 1 + nra
;
1153 if (rabn
<= lastrabn
) {
1154 /* we've already (tried to) read this block */
1155 /* no need to try it again... */
1159 if ((off_t
)rabn
* biosize
>= np
->n_size
)
1161 /* check if block exists and is valid. */
1162 rabp
= nfs_buf_incore(vp
, rabn
);
1163 if (rabp
&& nfs_buf_upl_valid_range(rabp
, 0, rabp
->nb_bufsize
))
1165 rabp
= nfs_buf_get(vp
, rabn
, biosize
, p
, operation
);
1167 FSDBG_BOT(514, vp
, 0xd1e000b, 0, EINTR
);
1170 if (!ISSET(rabp
->nb_flags
, (NB_CACHE
|NB_DELWRI
))) {
1171 SET(rabp
->nb_flags
, (NB_READ
|NB_ASYNC
));
1172 if (nfs_asyncio(rabp
, cred
)) {
1173 SET(rabp
->nb_flags
, (NB_INVAL
|NB_ERROR
));
1174 rabp
->nb_error
= EIO
;
1175 nfs_buf_release(rabp
);
1178 nfs_buf_release(rabp
);
1182 if ((uio
->uio_resid
<= 0) || (uio
->uio_offset
>= np
->n_size
)) {
1183 FSDBG_BOT(514, vp
, uio
->uio_offset
, uio
->uio_resid
, 0xaaaaaaaa);
1187 nfsstats
.biocache_reads
++;
1190 * If the block is in the cache and has the required data
1191 * in a valid region, just copy it out.
1192 * Otherwise, get the block and write back/read in,
1197 n
= min((unsigned)(bufsize
- on
), uio
->uio_resid
);
1198 diff
= np
->n_size
- uio
->uio_offset
;
1202 bp
= nfs_buf_get(vp
, lbn
, bufsize
, p
, operation
);
1204 FSDBG_BOT(514, vp
, 0xd1e000c, 0, EINTR
);
1208 /* if any pages are valid... */
1210 /* ...check for any invalid pages in the read range */
1211 int pg
, firstpg
, lastpg
, dirtypg
;
1212 dirtypg
= firstpg
= lastpg
= -1;
1214 while (pg
<= (on
+ n
- 1)/PAGE_SIZE
) {
1215 if (!NBPGVALID(bp
,pg
)) {
1219 } else if (firstpg
>= 0 && dirtypg
< 0 && NBPGDIRTY(bp
,pg
))
1224 /* if there are no invalid pages, we're all set */
1226 if (bp
->nb_validoff
< 0) {
1227 /* valid range isn't set up, so */
1228 /* set it to what we know is valid */
1229 bp
->nb_validoff
= trunc_page_32(on
);
1230 bp
->nb_validend
= round_page_32(on
+n
);
1231 nfs_buf_normalize_valid_range(np
, bp
);
1236 /* there are invalid pages in the read range */
1237 if ((dirtypg
> firstpg
) && (dirtypg
< lastpg
)) {
1238 /* there are also dirty page(s) in the range, */
1239 /* so write the buffer out and try again */
1240 CLR(bp
->nb_flags
, (NB_DONE
| NB_ERROR
| NB_INVAL
));
1241 SET(bp
->nb_flags
, NB_ASYNC
);
1243 * NFS has embedded ucred so crhold() risks zone corruption
1245 if (bp
->nb_wcred
== NOCRED
)
1246 bp
->nb_wcred
= crdup(cred
);
1247 error
= nfs_buf_write(bp
);
1249 FSDBG_BOT(514, vp
, 0xd1e000d, 0, error
);
1254 if (!bp
->nb_dirty
&& bp
->nb_dirtyend
<= 0 &&
1255 (lastpg
- firstpg
+ 1) > (bufsize
/PAGE_SIZE
)/2) {
1256 /* we need to read in more than half the buffer and the */
1257 /* buffer's not dirty, so just fetch the whole buffer */
1260 /* read the page range in */
1263 auio
.uio_iov
= &iov
;
1264 auio
.uio_iovcnt
= 1;
1265 auio
.uio_offset
= NBOFF(bp
) + firstpg
* PAGE_SIZE_64
;
1266 auio
.uio_resid
= (lastpg
- firstpg
+ 1) * PAGE_SIZE
;
1267 auio
.uio_segflg
= UIO_SYSSPACE
;
1268 auio
.uio_rw
= UIO_READ
;
1271 iov
.iov_base
= bp
->nb_data
+ firstpg
* PAGE_SIZE
;
1272 iov
.iov_len
= auio
.uio_resid
;
1273 error
= nfs_readrpc(vp
, &auio
, cred
);
1275 nfs_buf_release(bp
);
1276 FSDBG_BOT(514, vp
, 0xd1e000e, 0, error
);
1279 /* Make sure that the valid range is set to cover this read. */
1280 bp
->nb_validoff
= trunc_page_32(on
);
1281 bp
->nb_validend
= round_page_32(on
+n
);
1282 nfs_buf_normalize_valid_range(np
, bp
);
1283 if (auio
.uio_resid
> 0) {
1284 /* if short read, must have hit EOF, */
1285 /* so zero the rest of the range */
1286 bzero(iov
.iov_base
, auio
.uio_resid
);
1288 /* mark the pages (successfully read) as valid */
1289 for (pg
=firstpg
; pg
<= lastpg
; pg
++)
1290 NBPGVALID_SET(bp
,pg
);
1293 /* if no pages are valid, read the whole block */
1294 if (!bp
->nb_valid
) {
1295 SET(bp
->nb_flags
, NB_READ
);
1296 CLR(bp
->nb_flags
, (NB_DONE
| NB_ERROR
| NB_INVAL
));
1297 error
= nfs_doio(bp
, cred
, p
);
1299 nfs_buf_release(bp
);
1300 FSDBG_BOT(514, vp
, 0xd1e000f, 0, error
);
1306 /* validate read range against valid range and clip */
1307 if (bp
->nb_validend
> 0) {
1308 diff
= (on
>= bp
->nb_validend
) ? 0 : (bp
->nb_validend
- on
);
1316 nfsstats
.biocache_readlinks
++;
1317 bp
= nfs_buf_get(vp
, (daddr_t
)0, NFS_MAXPATHLEN
, p
, operation
);
1319 FSDBG_BOT(514, vp
, 0xd1e0010, 0, EINTR
);
1322 if (!ISSET(bp
->nb_flags
, NB_CACHE
)) {
1323 SET(bp
->nb_flags
, NB_READ
);
1324 error
= nfs_doio(bp
, cred
, p
);
1326 SET(bp
->nb_flags
, NB_ERROR
);
1327 nfs_buf_release(bp
);
1328 FSDBG_BOT(514, vp
, 0xd1e0011, 0, error
);
1332 n
= min(uio
->uio_resid
, bp
->nb_validend
);
1336 nfsstats
.biocache_readdirs
++;
1337 if (np
->n_direofoffset
&& uio
->uio_offset
>= np
->n_direofoffset
) {
1338 FSDBG_BOT(514, vp
, 0xde0f0001, 0, 0);
1341 lbn
= uio
->uio_offset
/ NFS_DIRBLKSIZ
;
1342 on
= uio
->uio_offset
& (NFS_DIRBLKSIZ
- 1);
1343 bp
= nfs_buf_get(vp
, lbn
, NFS_DIRBLKSIZ
, p
, operation
);
1345 FSDBG_BOT(514, vp
, 0xd1e0012, 0, EINTR
);
1348 if (!ISSET(bp
->nb_flags
, NB_CACHE
)) {
1349 SET(bp
->nb_flags
, NB_READ
);
1350 error
= nfs_doio(bp
, cred
, p
);
1352 nfs_buf_release(bp
);
1354 while (error
== NFSERR_BAD_COOKIE
) {
1356 error
= nfs_vinvalbuf(vp
, 0, cred
, p
, 1);
1358 * Yuck! The directory has been modified on the
1359 * server. The only way to get the block is by
1360 * reading from the beginning to get all the
1363 for (i
= 0; i
<= lbn
&& !error
; i
++) {
1364 if (np
->n_direofoffset
1365 && (i
* NFS_DIRBLKSIZ
) >= np
->n_direofoffset
) {
1366 FSDBG_BOT(514, vp
, 0xde0f0002, 0, 0);
1369 bp
= nfs_buf_get(vp
, i
, NFS_DIRBLKSIZ
, p
, operation
);
1371 FSDBG_BOT(514, vp
, 0xd1e0013, 0, EINTR
);
1374 if (!ISSET(bp
->nb_flags
, NB_CACHE
)) {
1375 SET(bp
->nb_flags
, NB_READ
);
1376 error
= nfs_doio(bp
, cred
, p
);
1378 * no error + NB_INVAL == directory EOF,
1381 if (error
== 0 && (bp
->nb_flags
& NB_INVAL
))
1385 * An error will throw away the block and the
1386 * for loop will break out. If no error and this
1387 * is not the block we want, we throw away the
1388 * block and go for the next one via the for loop.
1390 if (error
|| i
< lbn
)
1391 nfs_buf_release(bp
);
1395 * The above while is repeated if we hit another cookie
1396 * error. If we hit an error and it wasn't a cookie error,
1400 FSDBG_BOT(514, vp
, 0xd1e0014, 0, error
);
1406 * If not eof and read aheads are enabled, start one.
1407 * (You need the current block first, so that you have the
1408 * directory offset cookie of the next block.)
1410 if (nfs_numasync
> 0 && nmp
->nm_readahead
> 0 &&
1411 (np
->n_direofoffset
== 0 ||
1412 (lbn
+ 1) * NFS_DIRBLKSIZ
< np
->n_direofoffset
) &&
1413 !(np
->n_flag
& NQNFSNONCACHE
) &&
1414 !nfs_buf_incore(vp
, lbn
+ 1)) {
1415 rabp
= nfs_buf_get(vp
, lbn
+ 1, NFS_DIRBLKSIZ
, p
,
1418 if (!ISSET(rabp
->nb_flags
, (NB_CACHE
))) {
1419 SET(rabp
->nb_flags
, (NB_READ
| NB_ASYNC
));
1420 if (nfs_asyncio(rabp
, cred
)) {
1421 SET(rabp
->nb_flags
, (NB_INVAL
|NB_ERROR
));
1422 rabp
->nb_error
= EIO
;
1423 nfs_buf_release(rabp
);
1426 nfs_buf_release(rabp
);
1431 * Make sure we use a signed variant of min() since
1432 * the second term may be negative.
1434 n
= lmin(uio
->uio_resid
, bp
->nb_validend
- on
);
1436 * We keep track of the directory eof in
1437 * np->n_direofoffset and chop it off as an
1438 * extra step right here.
1440 if (np
->n_direofoffset
&&
1441 n
> np
->n_direofoffset
- uio
->uio_offset
)
1442 n
= np
->n_direofoffset
- uio
->uio_offset
;
1444 * Make sure that we return an integral number of entries so
1445 * that any subsequent calls will start copying from the start
1446 * of the next entry.
1448 * If the current value of n has the last entry cut short,
1449 * set n to copy everything up to the last entry instead.
1452 dp
= bp
->nb_data
+ on
;
1453 while (dp
< (bp
->nb_data
+ on
+ n
)) {
1454 direntp
= (struct dirent
*)dp
;
1455 dp
+= direntp
->d_reclen
;
1457 if (dp
> (bp
->nb_data
+ on
+ n
))
1458 n
= (dp
- direntp
->d_reclen
) - (bp
->nb_data
+ on
);
1462 printf("nfs_bioread: type %x unexpected\n",vp
->v_type
);
1463 FSDBG_BOT(514, vp
, 0xd1e0015, 0, EINVAL
);
1468 error
= uiomove(bp
->nb_data
+ on
, (int)n
, uio
);
1470 switch (vp
->v_type
) {
1477 if (np
->n_flag
& NQNFSNONCACHE
)
1478 SET(bp
->nb_flags
, NB_INVAL
);
1481 nfs_buf_release(bp
);
1482 } while (error
== 0 && uio
->uio_resid
> 0 && n
> 0);
1483 FSDBG_BOT(514, vp
, uio
->uio_offset
, uio
->uio_resid
, error
);
1489 * Vnode op for write using bio
1493 struct vop_write_args
/* {
1497 struct ucred *a_cred;
1500 struct uio
*uio
= ap
->a_uio
;
1501 struct proc
*p
= uio
->uio_procp
;
1502 struct vnode
*vp
= ap
->a_vp
;
1503 struct nfsnode
*np
= VTONFS(vp
);
1504 struct ucred
*cred
= ap
->a_cred
;
1505 int ioflag
= ap
->a_ioflag
;
1508 struct nfsmount
*nmp
= VFSTONFS(vp
->v_mount
);
1510 int biosize
, bufsize
, writeop
;
1511 int n
, on
, error
= 0, iomode
, must_commit
;
1512 off_t boff
, start
, end
;
1516 FSDBG_TOP(515, vp
, uio
->uio_offset
, uio
->uio_resid
, ioflag
);
1519 if (uio
->uio_rw
!= UIO_WRITE
)
1520 panic("nfs_write mode");
1521 if (uio
->uio_segflg
== UIO_USERSPACE
&& uio
->uio_procp
!= current_proc())
1522 panic("nfs_write proc");
1524 if (vp
->v_type
!= VREG
)
1526 if (np
->n_flag
& NWRITEERR
) {
1527 np
->n_flag
&= ~NWRITEERR
;
1528 FSDBG_BOT(515, vp
, uio
->uio_offset
, uio
->uio_resid
, np
->n_error
);
1529 return (np
->n_error
);
1531 if ((nmp
->nm_flag
& NFSMNT_NFSV3
) &&
1532 !(nmp
->nm_state
& NFSSTA_GOTFSINFO
))
1533 (void)nfs_fsinfo(nmp
, vp
, cred
, p
);
1534 if (ioflag
& (IO_APPEND
| IO_SYNC
)) {
1535 if (np
->n_flag
& NMODIFIED
) {
1537 error
= nfs_vinvalbuf(vp
, V_SAVE
, cred
, p
, 1);
1539 FSDBG_BOT(515, vp
, uio
->uio_offset
, 0x10bad01, error
);
1543 if (ioflag
& IO_APPEND
) {
1545 error
= VOP_GETATTR(vp
, &vattr
, cred
, p
);
1547 FSDBG_BOT(515, vp
, uio
->uio_offset
, 0x10bad02, error
);
1550 uio
->uio_offset
= np
->n_size
;
1553 if (uio
->uio_offset
< 0) {
1554 FSDBG_BOT(515, vp
, uio
->uio_offset
, 0xbad0ff, EINVAL
);
1557 if (uio
->uio_resid
== 0) {
1558 FSDBG_BOT(515, vp
, uio
->uio_offset
, uio
->uio_resid
, 0);
1562 * Maybe this should be above the vnode op call, but so long as
1563 * file servers have no limits, i don't think it matters
1565 if (p
&& uio
->uio_offset
+ uio
->uio_resid
>
1566 p
->p_rlimit
[RLIMIT_FSIZE
].rlim_cur
) {
1567 psignal(p
, SIGXFSZ
);
1568 FSDBG_BOT(515, vp
, uio
->uio_offset
, 0x2b1f, EFBIG
);
1572 biosize
= vp
->v_mount
->mnt_stat
.f_iosize
;
1576 * Check for a valid write lease.
1578 if ((nmp
->nm_flag
& NFSMNT_NQNFS
) &&
1579 NQNFS_CKINVALID(vp
, np
, ND_WRITE
)) {
1581 error
= nqnfs_getlease(vp
, ND_WRITE
, cred
, p
);
1582 } while (error
== NQNFS_EXPIRED
);
1584 FSDBG_BOT(515, vp
, uio
->uio_offset
, 0x11110001, error
);
1587 if (np
->n_lrev
!= np
->n_brev
||
1588 (np
->n_flag
& NQNFSNONCACHE
)) {
1589 error
= nfs_vinvalbuf(vp
, V_SAVE
, cred
, p
, 1);
1591 FSDBG_BOT(515, vp
, uio
->uio_offset
, 0x11110002, error
);
1594 np
->n_brev
= np
->n_lrev
;
1597 if (ISSET(vp
->v_flag
, VNOCACHE_DATA
) &&
1598 (np
->n_dirtyblkhd
.lh_first
|| np
->n_cleanblkhd
.lh_first
)) {
1599 error
= nfs_vinvalbuf(vp
, V_SAVE
, cred
, p
, 1);
1601 FSDBG_BOT(515, vp
, 0, 0, error
);
1605 if (((np
->n_flag
& NQNFSNONCACHE
) ||
1606 ISSET(vp
->v_flag
, VNOCACHE_DATA
)) &&
1607 uio
->uio_iovcnt
== 1) {
1608 iomode
= NFSV3WRITE_FILESYNC
;
1609 error
= nfs_writerpc(vp
, uio
, cred
, &iomode
, &must_commit
);
1611 nfs_clearcommit(vp
->v_mount
);
1612 FSDBG_BOT(515, vp
, uio
->uio_offset
, uio
->uio_resid
, error
);
1615 nfsstats
.biocache_writes
++;
1616 lbn
= uio
->uio_offset
/ biosize
;
1617 on
= uio
->uio_offset
% biosize
;
1618 n
= min((unsigned)(biosize
- on
), uio
->uio_resid
);
1622 * Get a cache block for writing. The range to be written is
1623 * (off..off+n) within the block. We ensure that the block
1624 * either has no dirty region or that the given range is
1625 * contiguous with the existing dirty region.
1627 bp
= nfs_buf_get(vp
, lbn
, bufsize
, p
, BLK_WRITE
);
1629 FSDBG_BOT(515, vp
, uio
->uio_offset
, uio
->uio_resid
, EINTR
);
1632 /* map the block because we know we're going to write to it */
1635 if (ISSET(vp
->v_flag
, VNOCACHE_DATA
))
1636 SET(bp
->nb_flags
, (NB_NOCACHE
|NB_INVAL
));
1639 * NFS has embedded ucred so crhold() risks zone corruption
1641 if (bp
->nb_wcred
== NOCRED
)
1642 bp
->nb_wcred
= crdup(cred
);
1645 * If there's already a dirty range AND dirty pages in this block we
1646 * need to send a commit AND write the dirty pages before continuing.
1648 * If there's already a dirty range OR dirty pages in this block
1649 * and the new write range is not contiguous with the existing range,
1650 * then force the buffer to be written out now.
1651 * (We used to just extend the dirty range to cover the valid,
1652 * but unwritten, data in between also. But writing ranges
1653 * of data that weren't actually written by an application
1654 * risks overwriting some other client's data with stale data
1655 * that's just masquerading as new written data.)
1657 if (bp
->nb_dirtyend
> 0) {
1658 if (on
> bp
->nb_dirtyend
|| (on
+ n
) < bp
->nb_dirtyoff
|| bp
->nb_dirty
) {
1659 FSDBG(515, vp
, uio
->uio_offset
, bp
, 0xd15c001);
1660 /* write/commit buffer "synchronously" */
1661 /* (NB_STABLE indicates that data writes should be FILESYNC) */
1662 CLR(bp
->nb_flags
, (NB_DONE
| NB_ERROR
| NB_INVAL
));
1663 SET(bp
->nb_flags
, (NB_ASYNC
| NB_STABLE
));
1664 error
= nfs_buf_write(bp
);
1666 FSDBG_BOT(515, vp
, uio
->uio_offset
, uio
->uio_resid
, error
);
1671 } else if (bp
->nb_dirty
) {
1672 int firstpg
, lastpg
;
1674 /* calculate write range pagemask */
1675 firstpg
= on
/PAGE_SIZE
;
1676 lastpg
= (on
+n
-1)/PAGE_SIZE
;
1677 pagemask
= ((1 << (lastpg
+1)) - 1) & ~((1 << firstpg
) - 1);
1678 /* check if there are dirty pages outside the write range */
1679 if (bp
->nb_dirty
& ~pagemask
) {
1680 FSDBG(515, vp
, uio
->uio_offset
, bp
, 0xd15c002);
1681 /* write/commit buffer "synchronously" */
1682 /* (NB_STABLE indicates that data writes should be FILESYNC) */
1683 CLR(bp
->nb_flags
, (NB_DONE
| NB_ERROR
| NB_INVAL
));
1684 SET(bp
->nb_flags
, (NB_ASYNC
| NB_STABLE
));
1685 error
= nfs_buf_write(bp
);
1687 FSDBG_BOT(515, vp
, uio
->uio_offset
, uio
->uio_resid
, error
);
1692 /* if the first or last pages are already dirty */
1693 /* make sure that the dirty range encompasses those pages */
1694 if (NBPGDIRTY(bp
,firstpg
) || NBPGDIRTY(bp
,lastpg
)) {
1695 FSDBG(515, vp
, uio
->uio_offset
, bp
, 0xd15c003);
1696 bp
->nb_dirtyoff
= min(on
, firstpg
* PAGE_SIZE
);
1697 if (NBPGDIRTY(bp
,lastpg
)) {
1698 bp
->nb_dirtyend
= (lastpg
+1) * PAGE_SIZE
;
1700 if (NBOFF(bp
) + bp
->nb_dirtyend
> np
->n_size
)
1701 bp
->nb_dirtyend
= np
->n_size
- NBOFF(bp
);
1703 bp
->nb_dirtyend
= on
+n
;
1708 * Are we extending the size of the file with this write?
1709 * If so, update file size now that we have the block.
1710 * If there was a partial buf at the old eof, validate
1711 * and zero the new bytes.
1713 if (uio
->uio_offset
+ n
> np
->n_size
) {
1714 struct nfsbuf
*eofbp
= NULL
;
1715 daddr_t eofbn
= np
->n_size
/ biosize
;
1716 int eofoff
= np
->n_size
% biosize
;
1717 int neweofoff
= (uio
->uio_offset
+ n
) % biosize
;
1719 FSDBG(515, 0xb1ffa000, uio
->uio_offset
+ n
, eofoff
, neweofoff
);
1721 if (eofoff
&& eofbn
< lbn
&& nfs_buf_incore(vp
, eofbn
))
1722 eofbp
= nfs_buf_get(vp
, eofbn
, biosize
, p
, BLK_WRITE
);
1724 /* if we're extending within the same last block */
1725 /* and the block is flagged as being cached... */
1726 if ((lbn
== eofbn
) && ISSET(bp
->nb_flags
, NB_CACHE
)) {
1727 /* ...check that all pages in buffer are valid */
1728 int endpg
= ((neweofoff
? neweofoff
: biosize
) - 1)/PAGE_SIZE
;
1730 /* pagemask only has to extend to last page being written to */
1731 pagemask
= (1 << (endpg
+1)) - 1;
1732 FSDBG(515, 0xb1ffa001, bp
->nb_valid
, pagemask
, 0);
1733 if ((bp
->nb_valid
& pagemask
) != pagemask
) {
1734 /* zerofill any hole */
1735 if (on
> bp
->nb_validend
) {
1737 for (i
=bp
->nb_validend
/PAGE_SIZE
; i
<= (on
- 1)/PAGE_SIZE
; i
++)
1738 NBPGVALID_SET(bp
, i
);
1740 FSDBG(516, bp
, bp
->nb_validend
, on
- bp
->nb_validend
, 0xf01e);
1741 bzero((char *)bp
->nb_data
+ bp
->nb_validend
,
1742 on
- bp
->nb_validend
);
1744 /* zerofill any trailing data in the last page */
1747 FSDBG(516, bp
, neweofoff
, PAGE_SIZE
- (neweofoff
& PAGE_MASK
), 0xe0f);
1748 bzero((char *)bp
->nb_data
+ neweofoff
,
1749 PAGE_SIZE
- (neweofoff
& PAGE_MASK
));
1753 np
->n_flag
|= NMODIFIED
;
1754 np
->n_size
= uio
->uio_offset
+ n
;
1755 ubc_setsize(vp
, (off_t
)np
->n_size
); /* XXX errors */
1758 * We may need to zero any previously invalid data
1759 * after the old EOF in the previous EOF buffer.
1761 * For the old last page, don't zero bytes if there
1762 * are invalid bytes in that page (i.e. the page isn't
1764 * For pages after the old last page, zero them and
1765 * mark them as valid.
1769 if (ISSET(vp
->v_flag
, VNOCACHE_DATA
))
1770 SET(eofbp
->nb_flags
, (NB_NOCACHE
|NB_INVAL
));
1772 FSDBG(516, eofbp
, eofoff
, biosize
- eofoff
, 0xe0fff01e);
1774 i
= eofoff
/PAGE_SIZE
;
1775 while (eofoff
< biosize
) {
1776 int poff
= eofoff
& PAGE_MASK
;
1777 if (!poff
|| NBPGVALID(eofbp
,i
)) {
1778 bzero(d
+ eofoff
, PAGE_SIZE
- poff
);
1779 NBPGVALID_SET(eofbp
, i
);
1781 if (bp
->nb_validend
== eofoff
)
1782 bp
->nb_validend
+= PAGE_SIZE
- poff
;
1783 eofoff
+= PAGE_SIZE
- poff
;
1786 nfs_buf_release(eofbp
);
1790 * If dirtyend exceeds file size, chop it down. This should
1791 * not occur unless there is a race.
1793 if (NBOFF(bp
) + bp
->nb_dirtyend
> np
->n_size
)
1794 bp
->nb_dirtyend
= np
->n_size
- NBOFF(bp
);
1796 * UBC doesn't handle partial pages, so we need to make sure
1797 * that any pages left in the page cache are completely valid.
1799 * Writes that are smaller than a block are delayed if they
1800 * don't extend to the end of the block.
1802 * If the block isn't (completely) cached, we may need to read
1803 * in some parts of pages that aren't covered by the write.
1804 * If the write offset (on) isn't page aligned, we'll need to
1805 * read the start of the first page being written to. Likewise,
1806 * if the offset of the end of the write (on+n) isn't page aligned,
1807 * we'll need to read the end of the last page being written to.
1810 * We don't want to read anything we're just going to write over.
1811 * We don't want to issue multiple I/Os if we don't have to
1812 * (because they're synchronous rpcs).
1813 * We don't want to read anything we already have modified in the
1816 if (!ISSET(bp
->nb_flags
, NB_CACHE
) && n
< biosize
) {
1817 int firstpg
, lastpg
, dirtypg
;
1818 int firstpgoff
, lastpgoff
;
1820 firstpg
= on
/PAGE_SIZE
;
1821 firstpgoff
= on
& PAGE_MASK
;
1822 lastpg
= (on
+n
-1)/PAGE_SIZE
;
1823 lastpgoff
= (on
+n
) & PAGE_MASK
;
1824 if (firstpgoff
&& !NBPGVALID(bp
,firstpg
)) {
1825 /* need to read start of first page */
1826 start
= firstpg
* PAGE_SIZE
;
1827 end
= start
+ firstpgoff
;
1829 if (lastpgoff
&& !NBPGVALID(bp
,lastpg
)) {
1830 /* need to read end of last page */
1832 start
= (lastpg
* PAGE_SIZE
) + lastpgoff
;
1833 end
= (lastpg
+ 1) * PAGE_SIZE
;
1836 /* need to read the data in range: start...end-1 */
1839 * XXX: If we know any of these reads are beyond the
1840 * current EOF (what np->n_size was before we possibly
1841 * just modified it above), we could short-circuit the
1842 * reads and just zero buffer. No need to make a trip
1843 * across the network to read nothing.
1846 /* first, check for dirty pages in between */
1847 /* if there are, we'll have to do two reads because */
1848 /* we don't want to overwrite the dirty pages. */
1849 for (dirtypg
=start
/PAGE_SIZE
; dirtypg
<= (end
-1)/PAGE_SIZE
; dirtypg
++)
1850 if (NBPGDIRTY(bp
,dirtypg
))
1853 /* if start is at beginning of page, try */
1854 /* to get any preceeding pages as well. */
1855 if (!(start
& PAGE_MASK
)) {
1856 /* stop at next dirty/valid page or start of block */
1857 for (; start
> 0; start
-=PAGE_SIZE
)
1858 if (NBPGVALID(bp
,((start
-1)/PAGE_SIZE
)))
1863 /* setup uio for read(s) */
1865 auio
.uio_iov
= &iov
;
1866 auio
.uio_iovcnt
= 1;
1867 auio
.uio_segflg
= UIO_SYSSPACE
;
1868 auio
.uio_rw
= UIO_READ
;
1871 if (dirtypg
<= (end
-1)/PAGE_SIZE
) {
1872 /* there's a dirty page in the way, so just do two reads */
1873 /* we'll read the preceding data here */
1874 auio
.uio_offset
= boff
+ start
;
1875 auio
.uio_resid
= iov
.iov_len
= on
- start
;
1876 iov
.iov_base
= bp
->nb_data
+ start
;
1877 error
= nfs_readrpc(vp
, &auio
, cred
);
1879 bp
->nb_error
= error
;
1880 SET(bp
->nb_flags
, NB_ERROR
);
1881 printf("nfs_write: readrpc %d", error
);
1883 if (auio
.uio_resid
> 0) {
1884 FSDBG(516, bp
, iov
.iov_base
- bp
->nb_data
, auio
.uio_resid
, 0xd00dee01);
1885 bzero(iov
.iov_base
, auio
.uio_resid
);
1887 /* update validoff/validend if necessary */
1888 if ((bp
->nb_validoff
< 0) || (bp
->nb_validoff
> start
))
1889 bp
->nb_validoff
= start
;
1890 if ((bp
->nb_validend
< 0) || (bp
->nb_validend
< on
))
1891 bp
->nb_validend
= on
;
1892 if (np
->n_size
> boff
+ bp
->nb_validend
)
1893 bp
->nb_validend
= min(np
->n_size
- (boff
+ start
), biosize
);
1894 /* validate any pages before the write offset */
1895 for (; start
< on
/PAGE_SIZE
; start
+=PAGE_SIZE
)
1896 NBPGVALID_SET(bp
, start
/PAGE_SIZE
);
1897 /* adjust start to read any trailing data */
1901 /* if end is at end of page, try to */
1902 /* get any following pages as well. */
1903 if (!(end
& PAGE_MASK
)) {
1904 /* stop at next valid page or end of block */
1905 for (; end
< bufsize
; end
+=PAGE_SIZE
)
1906 if (NBPGVALID(bp
,end
/PAGE_SIZE
))
1910 /* now we'll read the (rest of the) data */
1911 auio
.uio_offset
= boff
+ start
;
1912 auio
.uio_resid
= iov
.iov_len
= end
- start
;
1913 iov
.iov_base
= bp
->nb_data
+ start
;
1914 error
= nfs_readrpc(vp
, &auio
, cred
);
1916 bp
->nb_error
= error
;
1917 SET(bp
->nb_flags
, NB_ERROR
);
1918 printf("nfs_write: readrpc %d", error
);
1920 if (auio
.uio_resid
> 0) {
1921 FSDBG(516, bp
, iov
.iov_base
- bp
->nb_data
, auio
.uio_resid
, 0xd00dee02);
1922 bzero(iov
.iov_base
, auio
.uio_resid
);
1924 /* update validoff/validend if necessary */
1925 if ((bp
->nb_validoff
< 0) || (bp
->nb_validoff
> start
))
1926 bp
->nb_validoff
= start
;
1927 if ((bp
->nb_validend
< 0) || (bp
->nb_validend
< end
))
1928 bp
->nb_validend
= end
;
1929 if (np
->n_size
> boff
+ bp
->nb_validend
)
1930 bp
->nb_validend
= min(np
->n_size
- (boff
+ start
), biosize
);
1931 /* validate any pages before the write offset's page */
1932 for (; start
< trunc_page_32(on
); start
+=PAGE_SIZE
)
1933 NBPGVALID_SET(bp
, start
/PAGE_SIZE
);
1934 /* validate any pages after the range of pages being written to */
1935 for (; (end
- 1) > round_page_32(on
+n
-1); end
-=PAGE_SIZE
)
1936 NBPGVALID_SET(bp
, (end
-1)/PAGE_SIZE
);
1937 /* Note: pages being written to will be validated when written */
1941 if (ISSET(bp
->nb_flags
, NB_ERROR
)) {
1942 error
= bp
->nb_error
;
1943 nfs_buf_release(bp
);
1944 FSDBG_BOT(515, vp
, uio
->uio_offset
, uio
->uio_resid
, error
);
1948 np
->n_flag
|= NMODIFIED
;
1951 * Check for valid write lease and get one as required.
1952 * In case nfs_buf_get() and/or nfs_buf_write() delayed us.
1954 if ((nmp
->nm_flag
& NFSMNT_NQNFS
) &&
1955 NQNFS_CKINVALID(vp
, np
, ND_WRITE
)) {
1957 error
= nqnfs_getlease(vp
, ND_WRITE
, cred
, p
);
1958 } while (error
== NQNFS_EXPIRED
);
1960 nfs_buf_release(bp
);
1961 FSDBG_BOT(515, vp
, uio
->uio_offset
, 0x11220001, error
);
1964 if (np
->n_lrev
!= np
->n_brev
||
1965 (np
->n_flag
& NQNFSNONCACHE
)) {
1966 nfs_buf_release(bp
);
1967 error
= nfs_vinvalbuf(vp
, V_SAVE
, cred
, p
, 1);
1969 FSDBG_BOT(515, vp
, uio
->uio_offset
, 0x11220002, error
);
1972 np
->n_brev
= np
->n_lrev
;
1977 error
= uiomove((char *)bp
->nb_data
+ on
, n
, uio
);
1979 SET(bp
->nb_flags
, NB_ERROR
);
1980 nfs_buf_release(bp
);
1981 FSDBG_BOT(515, vp
, uio
->uio_offset
, uio
->uio_resid
, error
);
1985 /* validate any pages written to */
1986 start
= on
& ~PAGE_MASK
;
1987 for (; start
< on
+n
; start
+= PAGE_SIZE
) {
1988 NBPGVALID_SET(bp
, start
/PAGE_SIZE
);
1990 * This may seem a little weird, but we don't actually set the
1991 * dirty bits for writes. This is because we keep the dirty range
1992 * in the nb_dirtyoff/nb_dirtyend fields. Also, particularly for
1993 * delayed writes, when we give the pages back to the VM we don't
1994 * want to keep them marked dirty, because when we later write the
1995 * buffer we won't be able to tell which pages were written dirty
1996 * and which pages were mmapped and dirtied.
1999 if (bp
->nb_dirtyend
> 0) {
2000 bp
->nb_dirtyoff
= min(on
, bp
->nb_dirtyoff
);
2001 bp
->nb_dirtyend
= max((on
+ n
), bp
->nb_dirtyend
);
2003 bp
->nb_dirtyoff
= on
;
2004 bp
->nb_dirtyend
= on
+ n
;
2006 if (bp
->nb_validend
<= 0 || bp
->nb_validend
< bp
->nb_dirtyoff
||
2007 bp
->nb_validoff
> bp
->nb_dirtyend
) {
2008 bp
->nb_validoff
= bp
->nb_dirtyoff
;
2009 bp
->nb_validend
= bp
->nb_dirtyend
;
2011 bp
->nb_validoff
= min(bp
->nb_validoff
, bp
->nb_dirtyoff
);
2012 bp
->nb_validend
= max(bp
->nb_validend
, bp
->nb_dirtyend
);
2014 if (!ISSET(bp
->nb_flags
, NB_CACHE
))
2015 nfs_buf_normalize_valid_range(np
, bp
);
2018 * Since this block is being modified, it must be written
2019 * again and not just committed.
2021 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
2022 np
->n_needcommitcnt
--;
2023 CHECK_NEEDCOMMITCNT(np
);
2025 CLR(bp
->nb_flags
, NB_NEEDCOMMIT
);
2027 if ((np
->n_flag
& NQNFSNONCACHE
) ||
2028 (ioflag
& IO_SYNC
) || (vp
->v_flag
& VNOCACHE_DATA
)) {
2030 error
= nfs_buf_write(bp
);
2032 FSDBG_BOT(515, vp
, uio
->uio_offset
,
2033 uio
->uio_resid
, error
);
2036 if (np
->n_flag
& NQNFSNONCACHE
) {
2037 error
= nfs_vinvalbuf(vp
, V_SAVE
, cred
, p
, 1);
2039 FSDBG_BOT(515, vp
, uio
->uio_offset
,
2040 uio
->uio_resid
, error
);
2044 } else if ((n
+ on
) == biosize
&& (nmp
->nm_flag
& NFSMNT_NQNFS
) == 0) {
2045 bp
->nb_proc
= (struct proc
*)0;
2046 SET(bp
->nb_flags
, NB_ASYNC
);
2049 nfs_buf_write_delayed(bp
);
2051 if (np
->n_needcommitcnt
> (nbuf
/16))
2052 nfs_flushcommits(vp
, p
);
2054 } while (uio
->uio_resid
> 0 && n
> 0);
2056 FSDBG_BOT(515, vp
, uio
->uio_offset
, uio
->uio_resid
, 0);
2061 * Flush out and invalidate all buffers associated with a vnode.
2062 * Called with the underlying object locked.
2065 nfs_vinvalbuf_internal(vp
, flags
, cred
, p
, slpflag
, slptimeo
)
2066 register struct vnode
*vp
;
2070 int slpflag
, slptimeo
;
2073 struct nfsbuf
*nbp
, *blist
;
2075 struct nfsnode
*np
= VTONFS(vp
);
2077 if (flags
& V_SAVE
) {
2078 if (error
= VOP_FSYNC(vp
, cred
, MNT_WAIT
, p
))
2080 if (np
->n_dirtyblkhd
.lh_first
)
2081 panic("nfs_vinvalbuf: dirty bufs (vp 0x%x, bp 0x%x)",
2082 vp
, np
->n_dirtyblkhd
.lh_first
);
2086 blist
= np
->n_cleanblkhd
.lh_first
;
2088 blist
= np
->n_dirtyblkhd
.lh_first
;
2092 for (bp
= blist
; bp
; bp
= nbp
) {
2093 nbp
= bp
->nb_vnbufs
.le_next
;
2095 if (ISSET(bp
->nb_flags
, NB_BUSY
)) {
2096 SET(bp
->nb_flags
, NB_WANTED
);
2097 FSDBG_TOP(556, vp
, bp
, NBOFF(bp
), bp
->nb_flags
);
2098 error
= tsleep((caddr_t
)bp
,
2099 slpflag
| (PRIBIO
+ 1), "nfs_vinvalbuf",
2101 FSDBG_BOT(556, vp
, bp
, NBOFF(bp
), bp
->nb_flags
);
2104 FSDBG(554, vp
, bp
, -1, error
);
2109 FSDBG(554, vp
, bp
, NBOFF(bp
), bp
->nb_flags
);
2110 nfs_buf_remfree(bp
);
2111 SET(bp
->nb_flags
, NB_BUSY
);
2113 if ((flags
& V_SAVE
) && UBCINFOEXISTS(vp
) && (NBOFF(bp
) < np
->n_size
)) {
2114 /* XXX extra paranoia: make sure we're not */
2115 /* somehow leaving any dirty data around */
2117 int end
= (NBOFF(bp
) + bp
->nb_bufsize
>= np
->n_size
) ?
2118 bp
->nb_bufsize
: (np
->n_size
- NBOFF(bp
));
2119 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
2120 error
= nfs_buf_upl_setup(bp
);
2121 if (error
== EINVAL
) {
2122 /* vm object must no longer exist */
2123 /* hopefully we don't need to do */
2124 /* anything for this buffer */
2126 printf("nfs_vinvalbuf: upl setup failed %d\n",
2128 bp
->nb_valid
= bp
->nb_dirty
= 0;
2130 nfs_buf_upl_check(bp
);
2131 /* check for any dirty data before the EOF */
2132 if (bp
->nb_dirtyend
&& bp
->nb_dirtyoff
< end
) {
2133 /* clip dirty range to EOF */
2134 if (bp
->nb_dirtyend
> end
)
2135 bp
->nb_dirtyend
= end
;
2138 bp
->nb_dirty
&= (1 << (round_page_32(end
)/PAGE_SIZE
)) - 1;
2142 FSDBG(554, vp
, bp
, 0xd00dee, bp
->nb_flags
);
2143 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
))
2144 panic("nfs_vinvalbuf: dirty buffer without upl");
2145 /* gotta write out dirty data before invalidating */
2146 /* (NB_STABLE indicates that data writes should be FILESYNC) */
2147 /* (NB_NOCACHE indicates buffer should be discarded) */
2148 CLR(bp
->nb_flags
, (NB_DONE
| NB_ERROR
| NB_INVAL
| NB_ASYNC
));
2149 SET(bp
->nb_flags
, NB_STABLE
| NB_NOCACHE
);
2151 * NFS has embedded ucred so crhold() risks zone corruption
2153 if (bp
->nb_wcred
== NOCRED
)
2154 bp
->nb_wcred
= crdup(cred
);
2155 error
= nfs_buf_write(bp
);
2156 // Note: bp has been released
2158 FSDBG(554, bp
, 0xd00dee, 0xbad, error
);
2159 np
->n_error
= error
;
2160 np
->n_flag
|= NWRITEERR
;
2166 SET(bp
->nb_flags
, NB_INVAL
);
2167 nfs_buf_release(bp
);
2170 if (np
->n_dirtyblkhd
.lh_first
|| np
->n_cleanblkhd
.lh_first
)
2171 panic("nfs_vinvalbuf: flush failed");
2177 * Flush and invalidate all dirty buffers. If another process is already
2178 * doing the flush, just wait for completion.
2181 nfs_vinvalbuf(vp
, flags
, cred
, p
, intrflg
)
2188 register struct nfsnode
*np
= VTONFS(vp
);
2189 struct nfsmount
*nmp
= VFSTONFS(vp
->v_mount
);
2190 int error
= 0, slpflag
, slptimeo
;
2193 FSDBG_TOP(554, vp
, flags
, intrflg
, 0);
2195 if (nmp
&& ((nmp
->nm_flag
& NFSMNT_INT
) == 0))
2205 * First wait for any other process doing a flush to complete.
2207 while (np
->n_flag
& NFLUSHINPROG
) {
2208 np
->n_flag
|= NFLUSHWANT
;
2209 FSDBG_TOP(555, vp
, flags
, intrflg
, np
->n_flag
);
2210 error
= tsleep((caddr_t
)&np
->n_flag
, PRIBIO
+ 2, "nfsvinval", slptimeo
);
2211 FSDBG_BOT(555, vp
, flags
, intrflg
, np
->n_flag
);
2212 if (error
&& (error
= nfs_sigintr(VFSTONFS(vp
->v_mount
), NULL
, p
))) {
2213 FSDBG_BOT(554, vp
, flags
, intrflg
, error
);
2219 * Now, flush as required.
2221 np
->n_flag
|= NFLUSHINPROG
;
2222 error
= nfs_vinvalbuf_internal(vp
, flags
, cred
, p
, slpflag
, 0);
2224 FSDBG(554, vp
, 0, 0, error
);
2225 error
= nfs_sigintr(VFSTONFS(vp
->v_mount
), NULL
, p
);
2227 np
->n_flag
&= ~NFLUSHINPROG
;
2228 if (np
->n_flag
& NFLUSHWANT
) {
2229 np
->n_flag
&= ~NFLUSHWANT
;
2230 wakeup((caddr_t
)&np
->n_flag
);
2232 FSDBG_BOT(554, vp
, flags
, intrflg
, error
);
2235 error
= nfs_vinvalbuf_internal(vp
, flags
, cred
, p
, 0, slptimeo
);
2237 np
->n_flag
&= ~(NMODIFIED
| NFLUSHINPROG
);
2238 if (np
->n_flag
& NFLUSHWANT
) {
2239 np
->n_flag
&= ~NFLUSHWANT
;
2240 wakeup((caddr_t
)&np
->n_flag
);
2242 didhold
= ubc_hold(vp
);
2244 int rv
= ubc_clean(vp
, 1); /* get the pages out of vm also */
2246 panic("nfs_vinvalbuf(): ubc_clean failed!");
2249 FSDBG_BOT(554, vp
, flags
, intrflg
, 0);
2254 * Initiate asynchronous I/O. Return an error if no nfsiods are available.
2255 * This is mainly to avoid queueing async I/O requests when the nfsiods
2256 * are all hung on a dead server.
2259 nfs_asyncio(bp
, cred
)
2263 struct nfsmount
*nmp
;
2270 if (nfs_numasync
== 0)
2273 FSDBG_TOP(552, bp
, bp
? NBOFF(bp
) : 0, bp
? bp
->nb_flags
: 0, 0);
2275 nmp
= ((bp
!= NULL
) ? VFSTONFS(bp
->nb_vp
->v_mount
) : NULL
);
2277 if (nmp
&& nmp
->nm_flag
& NFSMNT_INT
)
2281 /* no nfsbuf means tell nfsiod to process delwri list */
2286 * Find a free iod to process this request.
2288 for (i
= 0; i
< NFS_MAXASYNCDAEMON
; i
++)
2289 if (nfs_iodwant
[i
]) {
2291 * Found one, so wake it up and tell it which
2295 ("nfs_asyncio: waking iod %d for mount %p\n",
2297 nfs_iodwant
[i
] = (struct proc
*)0;
2298 nfs_iodmount
[i
] = nmp
;
2301 wakeup((caddr_t
)&nfs_iodwant
[i
]);
2306 /* if we're just poking the delwri list, we're done */
2311 * If none are free, we may already have an iod working on this mount
2312 * point. If so, it will process our request.
2315 if (nmp
->nm_bufqiods
> 0) {
2317 ("nfs_asyncio: %d iods are already processing mount %p\n",
2318 nmp
->nm_bufqiods
, nmp
));
2324 * If we have an iod which can process the request, then queue
2327 FSDBG(552, bp
, gotiod
, i
, nmp
->nm_bufqiods
);
2330 * Ensure that the queue never grows too large.
2332 while (nmp
->nm_bufqlen
>= 2*nfs_numasync
) {
2333 if (ISSET(bp
->nb_flags
, NB_IOD
)) {
2334 /* An nfsiod is attempting this async operation so */
2335 /* we must not fall asleep on the bufq because we */
2336 /* could be waiting on ourself. Just return error */
2337 /* and we'll do this operation syncrhonously. */
2340 FSDBG(552, bp
, nmp
->nm_bufqlen
, 2*nfs_numasync
, -1);
2342 ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp
));
2343 nmp
->nm_bufqwant
= TRUE
;
2344 error
= tsleep(&nmp
->nm_bufq
, slpflag
| PRIBIO
,
2345 "nfsaio", slptimeo
);
2347 error2
= nfs_sigintr(nmp
, NULL
, bp
->nb_proc
);
2349 FSDBG_BOT(552, bp
, NBOFF(bp
), bp
->nb_flags
, error2
);
2352 if (slpflag
== PCATCH
) {
2358 * We might have lost our iod while sleeping,
2359 * so check and loop if nescessary.
2361 if (nmp
->nm_bufqiods
== 0) {
2363 ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp
));
2368 if (ISSET(bp
->nb_flags
, NB_READ
)) {
2369 if (bp
->nb_rcred
== NOCRED
&& cred
!= NOCRED
) {
2371 * NFS has embedded ucred.
2372 * Can not crhold() here as that causes zone corruption
2374 bp
->nb_rcred
= crdup(cred
);
2377 SET(bp
->nb_flags
, NB_WRITEINPROG
);
2378 if (bp
->nb_wcred
== NOCRED
&& cred
!= NOCRED
) {
2380 * NFS has embedded ucred.
2381 * Can not crhold() here as that causes zone corruption
2383 bp
->nb_wcred
= crdup(cred
);
2387 TAILQ_INSERT_TAIL(&nmp
->nm_bufq
, bp
, nb_free
);
2389 FSDBG_BOT(552, bp
, NBOFF(bp
), bp
->nb_flags
, 0);
2395 * All the iods are busy on other mounts, so return EIO to
2396 * force the caller to process the i/o synchronously.
2398 NFS_DPF(ASYNCIO
, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
2399 FSDBG_BOT(552, bp
, NBOFF(bp
), bp
->nb_flags
, EIO
);
2404 * Do an I/O operation to/from a cache block. This may be called
2405 * synchronously or from an nfsiod.
2413 register struct uio
*uiop
;
2414 register struct vnode
*vp
;
2416 struct nfsmount
*nmp
;
2417 int error
= 0, diff
, len
, iomode
, must_commit
= 0;
2423 nmp
= VFSTONFS(vp
->v_mount
);
2425 uiop
->uio_iov
= &io
;
2426 uiop
->uio_iovcnt
= 1;
2427 uiop
->uio_segflg
= UIO_SYSSPACE
;
2428 uiop
->uio_procp
= p
;
2431 * we've decided to perform I/O for this block,
2432 * so we couldn't possibly NB_DONE. So, clear it.
2434 if (ISSET(bp
->nb_flags
, NB_DONE
)) {
2435 if (!ISSET(bp
->nb_flags
, NB_ASYNC
))
2436 panic("nfs_doio: done and not async");
2437 CLR(bp
->nb_flags
, NB_DONE
);
2439 FSDBG_TOP(256, np
->n_size
, NBOFF(bp
), bp
->nb_bufsize
, bp
->nb_flags
);
2440 FSDBG(257, bp
->nb_validoff
, bp
->nb_validend
, bp
->nb_dirtyoff
,
2443 if (ISSET(bp
->nb_flags
, NB_READ
)) {
2444 if (vp
->v_type
== VREG
)
2446 io
.iov_len
= uiop
->uio_resid
= bp
->nb_bufsize
;
2447 io
.iov_base
= bp
->nb_data
;
2448 uiop
->uio_rw
= UIO_READ
;
2449 switch (vp
->v_type
) {
2451 uiop
->uio_offset
= NBOFF(bp
);
2452 nfsstats
.read_bios
++;
2453 error
= nfs_readrpc(vp
, uiop
, cr
);
2454 FSDBG(262, np
->n_size
, NBOFF(bp
), uiop
->uio_resid
, error
);
2456 /* update valid range */
2457 bp
->nb_validoff
= 0;
2458 if (uiop
->uio_resid
) {
2460 * If len > 0, there is a hole in the file and
2461 * no writes after the hole have been pushed to
2463 * Just zero fill the rest of the valid area.
2465 diff
= bp
->nb_bufsize
- uiop
->uio_resid
;
2466 len
= np
->n_size
- (NBOFF(bp
) + diff
);
2468 len
= min(len
, uiop
->uio_resid
);
2469 bzero((char *)bp
->nb_data
+ diff
, len
);
2470 bp
->nb_validend
= diff
+ len
;
2471 FSDBG(258, diff
, len
, 0, 1);
2473 bp
->nb_validend
= diff
;
2475 bp
->nb_validend
= bp
->nb_bufsize
;
2476 bp
->nb_valid
= (1 << (round_page_32(bp
->nb_validend
)/PAGE_SIZE
)) - 1;
2477 if (bp
->nb_validend
& PAGE_MASK
) {
2478 /* valid range ends in the middle of a page so we */
2479 /* need to zero-fill any invalid data at the end */
2480 /* of the last page */
2481 bzero((caddr_t
)(bp
->nb_data
+ bp
->nb_validend
),
2482 bp
->nb_bufsize
- bp
->nb_validend
);
2483 FSDBG(258, bp
->nb_validend
,
2484 bp
->nb_bufsize
- bp
->nb_validend
, 0, 2);
2487 if (p
&& (vp
->v_flag
& VTEXT
) &&
2488 (((nmp
->nm_flag
& NFSMNT_NQNFS
) &&
2489 NQNFS_CKINVALID(vp
, np
, ND_READ
) &&
2490 np
->n_lrev
!= np
->n_brev
) ||
2491 (!(nmp
->nm_flag
& NFSMNT_NQNFS
) &&
2492 np
->n_mtime
!= np
->n_vattr
.va_mtime
.tv_sec
))) {
2493 uprintf("Process killed due to text file modification\n");
2494 psignal(p
, SIGKILL
);
2495 p
->p_flag
|= P_NOSWAP
;
2499 uiop
->uio_offset
= (off_t
)0;
2500 nfsstats
.readlink_bios
++;
2501 error
= nfs_readlinkrpc(vp
, uiop
, cr
);
2503 bp
->nb_validoff
= 0;
2504 bp
->nb_validend
= uiop
->uio_offset
;
2508 nfsstats
.readdir_bios
++;
2509 uiop
->uio_offset
= NBOFF(bp
);
2510 if (!(nmp
->nm_flag
& NFSMNT_NFSV3
))
2511 nmp
->nm_flag
&= ~NFSMNT_RDIRPLUS
; /* dk@farm.org */
2512 if (nmp
->nm_flag
& NFSMNT_RDIRPLUS
) {
2513 error
= nfs_readdirplusrpc(vp
, uiop
, cr
);
2514 if (error
== NFSERR_NOTSUPP
)
2515 nmp
->nm_flag
&= ~NFSMNT_RDIRPLUS
;
2517 if ((nmp
->nm_flag
& NFSMNT_RDIRPLUS
) == 0)
2518 error
= nfs_readdirrpc(vp
, uiop
, cr
);
2520 bp
->nb_validoff
= 0;
2521 bp
->nb_validend
= uiop
->uio_offset
- NBOFF(bp
);
2522 bp
->nb_valid
= (1 << (round_page_32(bp
->nb_validend
)/PAGE_SIZE
)) - 1;
2526 printf("nfs_doio: type %x unexpected\n", vp
->v_type
);
2530 SET(bp
->nb_flags
, NB_ERROR
);
2531 bp
->nb_error
= error
;
2535 /* we're doing a write */
2538 /* We need to make sure the pages are locked before doing I/O. */
2539 if (!ISSET(bp
->nb_flags
, NB_META
) && UBCISVALID(vp
)) {
2540 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
2541 error
= nfs_buf_upl_setup(bp
);
2543 printf("nfs_doio: upl create failed %d\n", error
);
2544 SET(bp
->nb_flags
, NB_ERROR
);
2548 nfs_buf_upl_check(bp
);
2552 if (ISSET(bp
->nb_flags
, NB_WASDIRTY
)) {
2553 FSDBG(256, bp
, NBOFF(bp
), bp
->nb_dirty
, 0xd00dee);
2555 * There are pages marked dirty that need to be written out.
2557 * We don't want to just combine the write range with the
2558 * range of pages that are dirty because that could cause us
2559 * to write data that wasn't actually written to.
2560 * We also don't want to write data more than once.
2562 * If the dirty range just needs to be committed, we do that.
2563 * Otherwise, we write the dirty range and clear the dirty bits
2564 * for any COMPLETE pages covered by that range.
2565 * If there are dirty pages left after that, we write out the
2566 * parts that we haven't written yet.
2571 * If NB_NEEDCOMMIT is set, a commit rpc may do the trick. If not
2572 * an actual write will have to be done.
2573 * If NB_WRITEINPROG is already set, then push it with a write anyhow.
2575 if ((bp
->nb_flags
& (NB_NEEDCOMMIT
| NB_WRITEINPROG
)) == NB_NEEDCOMMIT
) {
2576 doff
= NBOFF(bp
) + bp
->nb_dirtyoff
;
2577 SET(bp
->nb_flags
, NB_WRITEINPROG
);
2578 error
= nfs_commit(vp
, doff
, bp
->nb_dirtyend
- bp
->nb_dirtyoff
,
2579 bp
->nb_wcred
, bp
->nb_proc
);
2580 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
2582 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
2583 CLR(bp
->nb_flags
, NB_NEEDCOMMIT
);
2584 np
->n_needcommitcnt
--;
2585 CHECK_NEEDCOMMITCNT(np
);
2586 } else if (error
== NFSERR_STALEWRITEVERF
)
2587 nfs_clearcommit(vp
->v_mount
);
2590 if (!error
&& bp
->nb_dirtyend
> 0) {
2591 /* there's a dirty range that needs to be written out */
2593 int firstpg
, lastpg
;
2595 if (NBOFF(bp
) + bp
->nb_dirtyend
> np
->n_size
)
2596 bp
->nb_dirtyend
= np
->n_size
- NBOFF(bp
);
2600 doff
= bp
->nb_dirtyoff
;
2601 dend
= bp
->nb_dirtyend
;
2603 /* if doff page is dirty, move doff to start of page */
2604 if (NBPGDIRTY(bp
,doff
/PAGE_SIZE
))
2605 doff
-= doff
& PAGE_MASK
;
2606 /* try to expand write range to include preceding dirty pages */
2607 if (!(doff
& PAGE_MASK
))
2608 while (doff
> 0 && NBPGDIRTY(bp
,(doff
-1)/PAGE_SIZE
))
2610 /* if dend page is dirty, move dend to start of next page */
2611 if ((dend
& PAGE_MASK
) && NBPGDIRTY(bp
,dend
/PAGE_SIZE
))
2612 dend
= round_page_32(dend
);
2613 /* try to expand write range to include trailing dirty pages */
2614 if (!(dend
& PAGE_MASK
))
2615 while (dend
< bp
->nb_bufsize
&& NBPGDIRTY(bp
,dend
/PAGE_SIZE
))
2617 /* make sure to keep dend clipped to EOF */
2618 if (NBOFF(bp
) + dend
> np
->n_size
)
2619 dend
= np
->n_size
- NBOFF(bp
);
2620 /* calculate range of complete pages being written */
2621 firstpg
= round_page_32(doff
) / PAGE_SIZE
;
2622 lastpg
= (trunc_page_32(dend
) - 1)/ PAGE_SIZE
;
2623 /* calculate mask for that page range */
2624 pagemask
= ((1 << (lastpg
+1)) - 1) & ~((1 << firstpg
) - 1);
2626 /* compare page mask to nb_dirty; if there are other dirty pages */
2627 /* then write FILESYNC; otherwise, write UNSTABLE if async and */
2628 /* not needcommit/nocache/call; otherwise write FILESYNC */
2629 if (bp
->nb_dirty
& ~pagemask
)
2630 iomode
= NFSV3WRITE_FILESYNC
;
2631 else if ((bp
->nb_flags
& (NB_ASYNC
| NB_NEEDCOMMIT
| NB_NOCACHE
| NB_STABLE
)) == NB_ASYNC
)
2632 iomode
= NFSV3WRITE_UNSTABLE
;
2634 iomode
= NFSV3WRITE_FILESYNC
;
2636 /* write the dirty range */
2637 io
.iov_len
= uiop
->uio_resid
= dend
- doff
;
2638 uiop
->uio_offset
= NBOFF(bp
) + doff
;
2639 io
.iov_base
= (char *)bp
->nb_data
+ doff
;
2640 uiop
->uio_rw
= UIO_WRITE
;
2642 nfsstats
.write_bios
++;
2644 SET(bp
->nb_flags
, NB_WRITEINPROG
);
2645 error
= nfs_writerpc(vp
, uiop
, cr
, &iomode
, &must_commit
);
2647 nfs_clearcommit(vp
->v_mount
);
2648 /* clear dirty bits for pages we've written */
2650 bp
->nb_dirty
&= ~pagemask
;
2651 /* set/clear needcommit flag */
2652 if (!error
&& iomode
== NFSV3WRITE_UNSTABLE
) {
2653 if (!ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
))
2654 np
->n_needcommitcnt
++;
2655 SET(bp
->nb_flags
, NB_NEEDCOMMIT
);
2656 /* make sure nb_dirtyoff/nb_dirtyend reflect actual range written */
2657 bp
->nb_dirtyoff
= doff
;
2658 bp
->nb_dirtyend
= dend
;
2660 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
2661 np
->n_needcommitcnt
--;
2662 CHECK_NEEDCOMMITCNT(np
);
2664 CLR(bp
->nb_flags
, NB_NEEDCOMMIT
);
2666 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
2668 * For an interrupted write, the buffer is still valid and the write
2669 * hasn't been pushed to the server yet, so we can't set NB_ERROR and
2670 * report the interruption by setting NB_EINTR. For the NB_ASYNC case,
2671 * NB_EINTR is not relevant.
2673 * For the case of a V3 write rpc not being committed to stable
2674 * storage, the block is still dirty and requires either a commit rpc
2675 * or another write rpc with iomode == NFSV3WRITE_FILESYNC before the
2676 * block is reused. This is indicated by setting the NB_DELWRI and
2677 * NB_NEEDCOMMIT flags.
2679 if (error
== EINTR
|| (!error
&& bp
->nb_flags
& NB_NEEDCOMMIT
)) {
2680 CLR(bp
->nb_flags
, NB_INVAL
| NB_NOCACHE
);
2681 if (!ISSET(bp
->nb_flags
, NB_DELWRI
)) {
2682 SET(bp
->nb_flags
, NB_DELWRI
);
2686 FSDBG(261, bp
->nb_validoff
, bp
->nb_validend
,
2689 * Since for the NB_ASYNC case, nfs_bwrite() has
2690 * reassigned the buffer to the clean list, we have to
2691 * reassign it back to the dirty one. Ugh.
2693 if (ISSET(bp
->nb_flags
, NB_ASYNC
)) {
2694 /* move to dirty list */
2696 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
)
2697 LIST_REMOVE(bp
, nb_vnbufs
);
2698 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
2701 SET(bp
->nb_flags
, NB_EINTR
);
2704 /* either there's an error or we don't need to commit */
2706 SET(bp
->nb_flags
, NB_ERROR
);
2707 bp
->nb_error
= np
->n_error
= error
;
2708 np
->n_flag
|= NWRITEERR
;
2710 /* clear the dirty range */
2711 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
2715 if (!error
&& bp
->nb_dirty
) {
2716 /* there are pages marked dirty that need to be written out */
2717 int pg
, cnt
, npages
, off
, len
;
2719 nfsstats
.write_bios
++;
2724 * we do these writes synchronously because we can't really
2725 * support the unstable/needommit method. We could write
2726 * them unstable, clear the dirty bits, and then commit the
2727 * whole block later, but if we need to rewrite the data, we
2728 * won't have any idea which pages were written because that
2729 * info can't be stored in the nb_dirtyoff/nb_dirtyend. We
2730 * also can't leave the dirty bits set because then we wouldn't
2731 * be able to tell if the pages were re-dirtied between the end
2732 * of the write and the commit.
2734 iomode
= NFSV3WRITE_FILESYNC
;
2735 uiop
->uio_rw
= UIO_WRITE
;
2737 SET(bp
->nb_flags
, NB_WRITEINPROG
);
2738 npages
= bp
->nb_bufsize
/PAGE_SIZE
;
2739 for (pg
=0; pg
< npages
; pg
++) {
2740 if (!NBPGDIRTY(bp
,pg
))
2743 while (((pg
+cnt
) < npages
) && NBPGDIRTY(bp
,pg
+cnt
))
2745 /* write cnt pages starting with page pg */
2746 off
= pg
* PAGE_SIZE
;
2747 len
= cnt
* PAGE_SIZE
;
2749 /* clip writes to EOF */
2750 if (NBOFF(bp
) + off
+ len
> np
->n_size
)
2751 len
-= (NBOFF(bp
) + off
+ len
) - np
->n_size
;
2753 io
.iov_len
= uiop
->uio_resid
= len
;
2754 uiop
->uio_offset
= NBOFF(bp
) + off
;
2755 io
.iov_base
= (char *)bp
->nb_data
+ off
;
2756 error
= nfs_writerpc(vp
, uiop
, cr
, &iomode
, &must_commit
);
2758 nfs_clearcommit(vp
->v_mount
);
2762 /* clear dirty bits */
2764 bp
->nb_dirty
&= ~(1 << pg
);
2765 /* leave pg on last page */
2770 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
2771 np
->n_needcommitcnt
--;
2772 CHECK_NEEDCOMMITCNT(np
);
2774 CLR(bp
->nb_flags
, NB_NEEDCOMMIT
);
2776 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
2777 FSDBG_BOT(256, bp
->nb_validoff
, bp
->nb_validend
, bp
->nb_bufsize
,
2782 SET(bp
->nb_flags
, NB_ERROR
);
2783 bp
->nb_error
= error
;
2787 FSDBG_BOT(256, bp
->nb_validoff
, bp
->nb_validend
, bp
->nb_bufsize
, error
);