2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $
68 #include <nfs/nfs_conf.h>
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/resourcevar.h>
74 #include <sys/signalvar.h>
75 #include <sys/proc_internal.h>
76 #include <sys/kauth.h>
77 #include <sys/malloc.h>
78 #include <sys/vnode.h>
79 #include <sys/dirent.h>
80 #include <sys/mount_internal.h>
81 #include <sys/kernel.h>
82 #include <sys/ubc_internal.h>
83 #include <sys/uio_internal.h>
84 #include <sys/kpi_mbuf.h>
87 #include <sys/vmparam.h>
90 #include <kern/clock.h>
91 #include <libkern/OSAtomic.h>
92 #include <kern/kalloc.h>
93 #include <kern/thread_call.h>
95 #include <nfs/rpcv2.h>
96 #include <nfs/nfsproto.h>
98 #include <nfs/nfs_gss.h>
99 #include <nfs/nfsmount.h>
100 #include <nfs/nfsnode.h>
101 #include <sys/buf_internal.h>
102 #include <libkern/OSAtomic.h>
103 #include <os/refcnt.h>
105 #define NFS_BIO_DBG(...) NFS_DBG(NFS_FAC_BIO, 7, ## __VA_ARGS__)
107 kern_return_t
thread_terminate(thread_t
); /* XXX */
109 #define NFSBUFHASH(np, lbn) \
110 (&nfsbufhashtbl[((long)(np) / sizeof(*(np)) + (int)(lbn)) & nfsbufhash])
111 LIST_HEAD(nfsbufhashhead
, nfsbuf
) * nfsbufhashtbl
;
112 struct nfsbuffreehead nfsbuffree
, nfsbuffreemeta
, nfsbufdelwri
;
114 int nfsbufcnt
, nfsbufmin
, nfsbufmax
, nfsbufmetacnt
, nfsbufmetamax
;
115 int nfsbuffreecnt
, nfsbuffreemetacnt
, nfsbufdelwricnt
, nfsneedbuffer
;
117 int nfs_buf_timer_on
= 0;
118 thread_t nfsbufdelwrithd
= NULL
;
120 lck_grp_t
*nfs_buf_lck_grp
;
121 lck_mtx_t
*nfs_buf_mutex
;
123 #define NFSBUF_FREE_PERIOD 30 /* seconds */
124 #define NFSBUF_LRU_STALE 120
125 #define NFSBUF_META_STALE 240
127 /* number of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffree list */
128 #define LRU_TO_FREEUP 6
129 /* number of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffreemeta list */
130 #define META_TO_FREEUP 3
131 /* total number of nfsbufs nfs_buf_freeup() should attempt to free */
132 #define TOTAL_TO_FREEUP (LRU_TO_FREEUP+META_TO_FREEUP)
133 /* fraction of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffree list when called from timer */
134 #define LRU_FREEUP_FRAC_ON_TIMER 8
135 /* fraction of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffreemeta list when called from timer */
136 #define META_FREEUP_FRAC_ON_TIMER 16
137 /* fraction of total nfsbufs that nfsbuffreecnt should exceed before bothering to call nfs_buf_freeup() */
138 #define LRU_FREEUP_MIN_FRAC 4
139 /* fraction of total nfsbufs that nfsbuffreemetacnt should exceed before bothering to call nfs_buf_freeup() */
140 #define META_FREEUP_MIN_FRAC 2
142 #define NFS_BUF_FREEUP() \
144 /* only call nfs_buf_freeup() if it has work to do: */ \
145 if (((nfsbuffreecnt > nfsbufcnt/LRU_FREEUP_MIN_FRAC) || \
146 (nfsbuffreemetacnt > nfsbufcnt/META_FREEUP_MIN_FRAC)) && \
147 ((nfsbufcnt - TOTAL_TO_FREEUP) > nfsbufmin)) \
152 * Initialize nfsbuf lists
157 nfs_buf_lck_grp
= lck_grp_alloc_init("nfs_buf", LCK_GRP_ATTR_NULL
);
158 nfs_buf_mutex
= lck_mtx_alloc_init(nfs_buf_lck_grp
, LCK_ATTR_NULL
);
160 nfsbufcnt
= nfsbufmetacnt
=
161 nfsbuffreecnt
= nfsbuffreemetacnt
= nfsbufdelwricnt
= 0;
163 /* size nfsbufmax to cover at most half sane_size (w/default buf size) */
164 nfsbufmax
= (sane_size
>> PAGE_SHIFT
) / (2 * (NFS_RWSIZE
>> PAGE_SHIFT
));
165 nfsbufmetamax
= nfsbufmax
/ 4;
169 nfsbufhashtbl
= hashinit(nfsbufmax
/ 4, M_TEMP
, &nfsbufhash
);
170 TAILQ_INIT(&nfsbuffree
);
171 TAILQ_INIT(&nfsbuffreemeta
);
172 TAILQ_INIT(&nfsbufdelwri
);
176 * Check periodically for stale/unused nfs bufs
179 nfs_buf_timer(__unused
void *param0
, __unused
void *param1
)
183 lck_mtx_lock(nfs_buf_mutex
);
184 if (nfsbufcnt
<= nfsbufmin
) {
185 nfs_buf_timer_on
= 0;
186 lck_mtx_unlock(nfs_buf_mutex
);
189 lck_mtx_unlock(nfs_buf_mutex
);
191 nfs_interval_timer_start(nfs_buf_timer_call
,
192 NFSBUF_FREE_PERIOD
* 1000);
196 * try to free up some excess, unused nfsbufs
199 nfs_buf_freeup(int timer
)
204 struct nfsbuffreehead nfsbuffreeup
;
206 TAILQ_INIT(&nfsbuffreeup
);
208 lck_mtx_lock(nfs_buf_mutex
);
212 FSDBG(320, nfsbufcnt
, nfsbuffreecnt
, nfsbuffreemetacnt
, 0);
214 count
= timer
? nfsbuffreecnt
/ LRU_FREEUP_FRAC_ON_TIMER
: LRU_TO_FREEUP
;
215 while ((nfsbufcnt
> nfsbufmin
) && (count
-- > 0)) {
216 fbp
= TAILQ_FIRST(&nfsbuffree
);
220 if (os_ref_get_count(&fbp
->nb_refs
) > 1) {
223 if (NBUFSTAMPVALID(fbp
) &&
224 (fbp
->nb_timestamp
+ (2 * NFSBUF_LRU_STALE
)) > now
.tv_sec
) {
227 nfs_buf_remfree(fbp
);
228 /* disassociate buffer from any nfsnode */
230 if (fbp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
231 LIST_REMOVE(fbp
, nb_vnbufs
);
232 fbp
->nb_vnbufs
.le_next
= NFSNOLIST
;
236 LIST_REMOVE(fbp
, nb_hash
);
237 TAILQ_INSERT_TAIL(&nfsbuffreeup
, fbp
, nb_free
);
241 count
= timer
? nfsbuffreemetacnt
/ META_FREEUP_FRAC_ON_TIMER
: META_TO_FREEUP
;
242 while ((nfsbufcnt
> nfsbufmin
) && (count
-- > 0)) {
243 fbp
= TAILQ_FIRST(&nfsbuffreemeta
);
247 if (os_ref_get_count(&fbp
->nb_refs
) > 1) {
250 if (NBUFSTAMPVALID(fbp
) &&
251 (fbp
->nb_timestamp
+ (2 * NFSBUF_META_STALE
)) > now
.tv_sec
) {
254 nfs_buf_remfree(fbp
);
255 /* disassociate buffer from any nfsnode */
257 if (fbp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
258 LIST_REMOVE(fbp
, nb_vnbufs
);
259 fbp
->nb_vnbufs
.le_next
= NFSNOLIST
;
263 LIST_REMOVE(fbp
, nb_hash
);
264 TAILQ_INSERT_TAIL(&nfsbuffreeup
, fbp
, nb_free
);
269 FSDBG(320, nfsbufcnt
, nfsbuffreecnt
, nfsbuffreemetacnt
, 0);
272 lck_mtx_unlock(nfs_buf_mutex
);
274 while ((fbp
= TAILQ_FIRST(&nfsbuffreeup
))) {
275 TAILQ_REMOVE(&nfsbuffreeup
, fbp
, nb_free
);
277 if (IS_VALID_CRED(fbp
->nb_rcred
)) {
278 kauth_cred_unref(&fbp
->nb_rcred
);
280 if (IS_VALID_CRED(fbp
->nb_wcred
)) {
281 kauth_cred_unref(&fbp
->nb_wcred
);
283 /* if buf was NB_META, dump buffer */
284 if (ISSET(fbp
->nb_flags
, NB_META
) && fbp
->nb_data
) {
285 kfree(fbp
->nb_data
, fbp
->nb_bufsize
);
292 * remove a buffer from the freelist
293 * (must be called with nfs_buf_mutex held)
296 nfs_buf_remfree(struct nfsbuf
*bp
)
298 if (bp
->nb_free
.tqe_next
== NFSNOLIST
) {
299 panic("nfsbuf not on free list");
301 if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
303 TAILQ_REMOVE(&nfsbufdelwri
, bp
, nb_free
);
304 } else if (ISSET(bp
->nb_flags
, NB_META
)) {
306 TAILQ_REMOVE(&nfsbuffreemeta
, bp
, nb_free
);
309 TAILQ_REMOVE(&nfsbuffree
, bp
, nb_free
);
311 bp
->nb_free
.tqe_next
= NFSNOLIST
;
316 * check for existence of nfsbuf in cache
319 nfs_buf_is_incore(nfsnode_t np
, daddr64_t blkno
)
322 lck_mtx_lock(nfs_buf_mutex
);
323 if (nfs_buf_incore(np
, blkno
)) {
328 lck_mtx_unlock(nfs_buf_mutex
);
333 * return incore buffer (must be called with nfs_buf_mutex held)
336 nfs_buf_incore(nfsnode_t np
, daddr64_t blkno
)
338 /* Search hash chain */
339 struct nfsbuf
* bp
= NFSBUFHASH(np
, blkno
)->lh_first
;
340 for (; bp
!= NULL
; bp
= bp
->nb_hash
.le_next
) {
341 if ((bp
->nb_lblkno
== blkno
) && (bp
->nb_np
== np
)) {
342 if (!ISSET(bp
->nb_flags
, NB_INVAL
)) {
343 FSDBG(547, bp
, blkno
, bp
->nb_flags
, bp
->nb_np
);
352 * Check if it's OK to drop a page.
354 * Called by vnode_pager() on pageout request of non-dirty page.
355 * We need to make sure that it's not part of a delayed write.
356 * If it is, we can't let the VM drop it because we may need it
357 * later when/if we need to write the data (again).
360 nfs_buf_page_inval(vnode_t vp
, off_t offset
)
362 struct nfsmount
*nmp
= VTONMP(vp
);
366 if (nfs_mount_gone(nmp
)) {
370 lck_mtx_lock(nfs_buf_mutex
);
371 bp
= nfs_buf_incore(VTONFS(vp
), (daddr64_t
)(offset
/ nmp
->nm_biosize
));
375 FSDBG(325, bp
, bp
->nb_flags
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
);
376 if (ISSET(bp
->nb_lflags
, NBL_BUSY
)) {
381 * If there's a dirty range in the buffer, check to
382 * see if this page intersects with the dirty range.
383 * If it does, we can't let the pager drop the page.
385 if (bp
->nb_dirtyend
> 0) {
386 int start
= offset
- NBOFF(bp
);
387 if ((bp
->nb_dirtyend
> start
) &&
388 (bp
->nb_dirtyoff
< (start
+ PAGE_SIZE
))) {
390 * Before returning the bad news, move the
391 * buffer to the start of the delwri list and
392 * give the list a push to try to flush the
397 TAILQ_INSERT_HEAD(&nfsbufdelwri
, bp
, nb_free
);
399 nfs_buf_delwri_push(1);
403 lck_mtx_unlock(nfs_buf_mutex
);
408 * set up the UPL for a buffer
409 * (must NOT be called with nfs_buf_mutex held)
412 nfs_buf_upl_setup(struct nfsbuf
*bp
)
418 if (ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
422 upl_flags
= UPL_PRECIOUS
;
423 if (!ISSET(bp
->nb_flags
, NB_READ
)) {
425 * We're doing a "write", so we intend to modify
426 * the pages we're gathering.
428 upl_flags
|= UPL_WILL_MODIFY
;
430 kret
= ubc_create_upl_kernel(NFSTOV(bp
->nb_np
), NBOFF(bp
), bp
->nb_bufsize
,
431 &upl
, NULL
, upl_flags
, VM_KERN_MEMORY_FILE
);
432 if (kret
== KERN_INVALID_ARGUMENT
) {
433 /* vm object probably doesn't exist any more */
434 bp
->nb_pagelist
= NULL
;
437 if (kret
!= KERN_SUCCESS
) {
438 printf("nfs_buf_upl_setup(): failed to get pagelist %d\n", kret
);
439 bp
->nb_pagelist
= NULL
;
443 FSDBG(538, bp
, NBOFF(bp
), bp
->nb_bufsize
, bp
->nb_np
);
445 bp
->nb_pagelist
= upl
;
446 SET(bp
->nb_flags
, NB_PAGELIST
);
451 * update buffer's valid/dirty info from UBC
452 * (must NOT be called with nfs_buf_mutex held)
455 nfs_buf_upl_check(struct nfsbuf
*bp
)
458 off_t filesize
, fileoffset
;
461 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
465 npages
= round_page_32(bp
->nb_bufsize
) / PAGE_SIZE
;
466 filesize
= ubc_getsize(NFSTOV(bp
->nb_np
));
467 fileoffset
= NBOFF(bp
);
468 if (fileoffset
< filesize
) {
469 SET(bp
->nb_flags
, NB_CACHE
);
471 CLR(bp
->nb_flags
, NB_CACHE
);
474 pl
= ubc_upl_pageinfo(bp
->nb_pagelist
);
475 bp
->nb_valid
= bp
->nb_dirty
= 0;
477 for (i
= 0; i
< npages
; i
++, fileoffset
+= PAGE_SIZE_64
) {
478 /* anything beyond the end of the file is not valid or dirty */
479 if (fileoffset
>= filesize
) {
482 if (!upl_valid_page(pl
, i
)) {
483 CLR(bp
->nb_flags
, NB_CACHE
);
486 NBPGVALID_SET(bp
, i
);
487 if (upl_dirty_page(pl
, i
)) {
488 NBPGDIRTY_SET(bp
, i
);
491 fileoffset
= NBOFF(bp
);
492 if (ISSET(bp
->nb_flags
, NB_CACHE
)) {
494 bp
->nb_validend
= bp
->nb_bufsize
;
495 if (fileoffset
+ bp
->nb_validend
> filesize
) {
496 bp
->nb_validend
= filesize
- fileoffset
;
499 bp
->nb_validoff
= bp
->nb_validend
= -1;
501 FSDBG(539, bp
, fileoffset
, bp
->nb_valid
, bp
->nb_dirty
);
502 FSDBG(539, bp
->nb_validoff
, bp
->nb_validend
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
);
506 * make sure that a buffer is mapped
507 * (must NOT be called with nfs_buf_mutex held)
510 nfs_buf_map(struct nfsbuf
*bp
)
517 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
521 kret
= ubc_upl_map(bp
->nb_pagelist
, (vm_offset_t
*)&(bp
->nb_data
));
522 if (kret
!= KERN_SUCCESS
) {
523 panic("nfs_buf_map: ubc_upl_map() failed with (%d)", kret
);
525 if (bp
->nb_data
== 0) {
526 panic("ubc_upl_map mapped 0");
528 FSDBG(540, bp
, bp
->nb_flags
, NBOFF(bp
), bp
->nb_data
);
533 * normalize an nfsbuf's valid range
535 * the read/write code guarantees that we'll always have a valid
536 * region that is an integral number of pages. If either end
537 * of the valid range isn't page-aligned, it gets corrected
538 * here as we extend the valid range through all of the
539 * contiguous valid pages.
542 nfs_buf_normalize_valid_range(nfsnode_t np
, struct nfsbuf
*bp
)
545 /* pull validoff back to start of contiguous valid page range */
546 pg
= bp
->nb_validoff
/ PAGE_SIZE
;
547 while (pg
>= 0 && NBPGVALID(bp
, pg
)) {
550 bp
->nb_validoff
= (pg
+ 1) * PAGE_SIZE
;
551 /* push validend forward to end of contiguous valid page range */
552 npg
= bp
->nb_bufsize
/ PAGE_SIZE
;
553 pg
= bp
->nb_validend
/ PAGE_SIZE
;
554 while (pg
< npg
&& NBPGVALID(bp
, pg
)) {
557 bp
->nb_validend
= pg
* PAGE_SIZE
;
559 if (NBOFF(bp
) + bp
->nb_validend
> (off_t
)np
->n_size
) {
560 bp
->nb_validend
= np
->n_size
% bp
->nb_bufsize
;
565 * process some entries on the delayed write queue
566 * (must be called with nfs_buf_mutex held)
569 nfs_buf_delwri_service(void)
575 while (i
< 8 && (bp
= TAILQ_FIRST(&nfsbufdelwri
)) != NULL
) {
579 while ((error
= nfs_buf_acquire(bp
, 0, 0, 0)) == EAGAIN
) {
587 /* buffer is no longer valid */
591 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
592 nfs_buf_check_write_verifier(np
, bp
);
594 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
595 /* put buffer at end of delwri list */
596 TAILQ_INSERT_TAIL(&nfsbufdelwri
, bp
, nb_free
);
599 lck_mtx_unlock(nfs_buf_mutex
);
600 nfs_flushcommits(np
, 1);
602 SET(bp
->nb_flags
, NB_ASYNC
);
603 lck_mtx_unlock(nfs_buf_mutex
);
607 lck_mtx_lock(nfs_buf_mutex
);
612 * thread to service the delayed write queue when asked
615 nfs_buf_delwri_thread(__unused
void *arg
, __unused wait_result_t wr
)
617 struct timespec ts
= { .tv_sec
= 30, .tv_nsec
= 0 };
620 lck_mtx_lock(nfs_buf_mutex
);
622 nfs_buf_delwri_service();
623 error
= msleep(&nfsbufdelwrithd
, nfs_buf_mutex
, 0, "nfsbufdelwri", &ts
);
625 nfsbufdelwrithd
= NULL
;
626 lck_mtx_unlock(nfs_buf_mutex
);
627 thread_terminate(nfsbufdelwrithd
);
631 * try to push out some delayed/uncommitted writes
632 * ("locked" indicates whether nfs_buf_mutex is already held)
635 nfs_buf_delwri_push(int locked
)
637 if (TAILQ_EMPTY(&nfsbufdelwri
)) {
641 lck_mtx_lock(nfs_buf_mutex
);
643 /* wake up the delayed write service thread */
644 if (nfsbufdelwrithd
) {
645 wakeup(&nfsbufdelwrithd
);
646 } else if (kernel_thread_start(nfs_buf_delwri_thread
, NULL
, &nfsbufdelwrithd
) == KERN_SUCCESS
) {
647 thread_deallocate(nfsbufdelwrithd
);
649 /* otherwise, try to do some of the work ourselves */
650 if (!nfsbufdelwrithd
) {
651 nfs_buf_delwri_service();
654 lck_mtx_unlock(nfs_buf_mutex
);
661 * Returns errno on error, 0 otherwise.
662 * Any buffer is returned in *bpp.
664 * If NBLK_ONLYVALID is set, only return buffer if found in cache.
665 * If NBLK_NOWAIT is set, don't wait for the buffer if it's marked BUSY.
667 * Check for existence of buffer in cache.
668 * Or attempt to reuse a buffer from one of the free lists.
669 * Or allocate a new buffer if we haven't already hit max allocation.
670 * Or wait for a free buffer.
672 * If available buffer found, prepare it, and return it.
674 * If the calling process is interrupted by a signal for
675 * an interruptible mount point, return EINTR.
686 vnode_t vp
= NFSTOV(np
);
687 struct nfsmount
*nmp
= VTONMP(vp
);
690 int slpflag
= PCATCH
;
691 int operation
= (flags
& NBLK_OPMASK
);
695 FSDBG_TOP(541, np
, blkno
, size
, flags
);
699 if (bufsize
> NFS_MAXBSIZE
) {
700 panic("nfs_buf_get: buffer larger than NFS_MAXBSIZE requested");
703 if (nfs_mount_gone(nmp
)) {
704 FSDBG_BOT(541, np
, blkno
, 0, ENXIO
);
708 if (!UBCINFOEXISTS(vp
)) {
709 operation
= NBLK_META
;
710 } else if (bufsize
< (uint32_t)nmp
->nm_biosize
) {
711 /* reg files should always have biosize blocks */
712 bufsize
= nmp
->nm_biosize
;
715 /* if NBLK_WRITE, check for too many delayed/uncommitted writes */
716 if ((operation
== NBLK_WRITE
) && (nfs_nbdwrite
> NFS_A_LOT_OF_DELAYED_WRITES
)) {
717 FSDBG_TOP(542, np
, blkno
, nfs_nbdwrite
, NFS_A_LOT_OF_DELAYED_WRITES
);
719 /* poke the delwri list */
720 nfs_buf_delwri_push(0);
722 /* sleep to let other threads run... */
723 tsleep(&nfs_nbdwrite
, PCATCH
, "nfs_nbdwrite", 1);
724 FSDBG_BOT(542, np
, blkno
, nfs_nbdwrite
, NFS_A_LOT_OF_DELAYED_WRITES
);
728 lck_mtx_lock(nfs_buf_mutex
);
730 /* wait for any buffer invalidation/flushing to complete */
731 while (np
->n_bflag
& NBINVALINPROG
) {
732 np
->n_bflag
|= NBINVALWANT
;
735 msleep(&np
->n_bflag
, nfs_buf_mutex
, slpflag
, "nfs_buf_get_invalwait", &ts
);
736 if ((error
= nfs_sigintr(VTONMP(vp
), NULL
, thd
, 0))) {
737 lck_mtx_unlock(nfs_buf_mutex
);
738 FSDBG_BOT(541, np
, blkno
, 0, error
);
741 if (np
->n_bflag
& NBINVALINPROG
) {
746 /* check for existence of nfsbuf in cache */
747 if ((bp
= nfs_buf_incore(np
, blkno
))) {
748 /* if busy, set wanted and wait */
749 if (ISSET(bp
->nb_lflags
, NBL_BUSY
)) {
750 if (flags
& NBLK_NOWAIT
) {
751 lck_mtx_unlock(nfs_buf_mutex
);
752 FSDBG_BOT(541, np
, blkno
, bp
, 0xbcbcbcbc);
755 FSDBG_TOP(543, np
, blkno
, bp
, bp
->nb_flags
);
756 SET(bp
->nb_lflags
, NBL_WANTED
);
760 msleep(bp
, nfs_buf_mutex
, slpflag
| (PRIBIO
+ 1) | PDROP
,
761 "nfsbufget", (slpflag
== PCATCH
) ? NULL
: &ts
);
763 FSDBG_BOT(543, np
, blkno
, bp
, bp
->nb_flags
);
764 if ((error
= nfs_sigintr(VTONMP(vp
), NULL
, thd
, 0))) {
765 FSDBG_BOT(541, np
, blkno
, 0, error
);
770 if (bp
->nb_bufsize
!= bufsize
) {
771 panic("nfsbuf size mismatch");
773 SET(bp
->nb_lflags
, NBL_BUSY
);
774 SET(bp
->nb_flags
, NB_CACHE
);
776 /* additional paranoia: */
777 if (ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
778 panic("pagelist buffer was not busy");
783 if (flags
& NBLK_ONLYVALID
) {
784 lck_mtx_unlock(nfs_buf_mutex
);
785 FSDBG_BOT(541, np
, blkno
, 0, 0x0000cace);
790 * where to get a free buffer:
791 * - if meta and maxmeta reached, must reuse meta
792 * - alloc new if we haven't reached min bufs
793 * - if free lists are NOT empty
794 * - if free list is stale, use it
795 * - else if freemeta list is stale, use it
796 * - else if max bufs allocated, use least-time-to-stale
797 * - alloc new if we haven't reached max allowed
798 * - start clearing out delwri list and try again
801 if ((operation
== NBLK_META
) && (nfsbufmetacnt
>= nfsbufmetamax
)) {
802 /* if we've hit max meta buffers, must reuse a meta buffer */
803 bp
= TAILQ_FIRST(&nfsbuffreemeta
);
804 } else if ((nfsbufcnt
> nfsbufmin
) &&
805 (!TAILQ_EMPTY(&nfsbuffree
) || !TAILQ_EMPTY(&nfsbuffreemeta
))) {
806 /* try to pull an nfsbuf off a free list */
807 struct nfsbuf
*lrubp
, *metabp
;
811 /* if the next LRU or META buffer is invalid or stale, use it */
812 lrubp
= TAILQ_FIRST(&nfsbuffree
);
813 if (lrubp
&& (!NBUFSTAMPVALID(lrubp
) ||
814 ((lrubp
->nb_timestamp
+ NFSBUF_LRU_STALE
) < now
.tv_sec
))) {
817 metabp
= TAILQ_FIRST(&nfsbuffreemeta
);
818 if (!bp
&& metabp
&& (!NBUFSTAMPVALID(metabp
) ||
819 ((metabp
->nb_timestamp
+ NFSBUF_META_STALE
) < now
.tv_sec
))) {
823 if (!bp
&& (nfsbufcnt
>= nfsbufmax
)) {
824 /* we've already allocated all bufs, so */
825 /* choose the buffer that'll go stale first */
831 int32_t lru_stale_time
, meta_stale_time
;
832 lru_stale_time
= lrubp
->nb_timestamp
+ NFSBUF_LRU_STALE
;
833 meta_stale_time
= metabp
->nb_timestamp
+ NFSBUF_META_STALE
;
834 if (lru_stale_time
<= meta_stale_time
) {
844 /* we have a buffer to reuse */
845 FSDBG(544, np
, blkno
, bp
, bp
->nb_flags
);
847 if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
848 panic("nfs_buf_get: delwri");
850 SET(bp
->nb_lflags
, NBL_BUSY
);
851 /* disassociate buffer from previous nfsnode */
853 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
854 LIST_REMOVE(bp
, nb_vnbufs
);
855 bp
->nb_vnbufs
.le_next
= NFSNOLIST
;
859 LIST_REMOVE(bp
, nb_hash
);
860 /* nuke any creds we're holding */
861 if (IS_VALID_CRED(bp
->nb_rcred
)) {
862 kauth_cred_unref(&bp
->nb_rcred
);
864 if (IS_VALID_CRED(bp
->nb_wcred
)) {
865 kauth_cred_unref(&bp
->nb_wcred
);
867 /* if buf will no longer be NB_META, dump old buffer */
868 if (operation
== NBLK_META
) {
869 if (!ISSET(bp
->nb_flags
, NB_META
)) {
872 } else if (ISSET(bp
->nb_flags
, NB_META
)) {
874 kfree(bp
->nb_data
, bp
->nb_bufsize
);
879 /* re-init buf fields */
881 bp
->nb_validoff
= bp
->nb_validend
= -1;
882 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
887 /* no buffer to reuse */
888 if ((nfsbufcnt
< nfsbufmax
) &&
889 ((operation
!= NBLK_META
) || (nfsbufmetacnt
< nfsbufmetamax
))) {
890 /* just alloc a new one */
891 MALLOC(bp
, struct nfsbuf
*, sizeof(struct nfsbuf
), M_TEMP
, M_WAITOK
);
893 lck_mtx_unlock(nfs_buf_mutex
);
894 FSDBG_BOT(541, np
, blkno
, 0, error
);
900 * If any excess bufs, make sure the timer
901 * is running to free them up later.
903 if (nfsbufcnt
> nfsbufmin
&& !nfs_buf_timer_on
) {
904 nfs_buf_timer_on
= 1;
905 nfs_interval_timer_start(nfs_buf_timer_call
,
906 NFSBUF_FREE_PERIOD
* 1000);
909 if (operation
== NBLK_META
) {
914 bzero(bp
, sizeof(*bp
));
915 os_ref_init(&bp
->nb_refs
, NULL
);
917 bp
->nb_free
.tqe_next
= NFSNOLIST
;
918 bp
->nb_validoff
= bp
->nb_validend
= -1;
919 FSDBG(545, np
, blkno
, bp
, 0);
921 /* too many bufs... wait for buffers to free up */
922 FSDBG_TOP(546, np
, blkno
, nfsbufcnt
, nfsbufmax
);
924 /* poke the delwri list */
925 nfs_buf_delwri_push(1);
928 msleep(&nfsneedbuffer
, nfs_buf_mutex
, PCATCH
| PDROP
, "nfsbufget", NULL
);
929 FSDBG_BOT(546, np
, blkno
, nfsbufcnt
, nfsbufmax
);
930 if ((error
= nfs_sigintr(VTONMP(vp
), NULL
, thd
, 0))) {
931 FSDBG_BOT(541, np
, blkno
, 0, error
);
939 SET(bp
->nb_lflags
, NBL_BUSY
);
941 bp
->nb_lblkno
= blkno
;
942 /* insert buf in hash */
943 LIST_INSERT_HEAD(NFSBUFHASH(np
, blkno
), bp
, nb_hash
);
944 /* associate buffer with new nfsnode */
946 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
951 lck_mtx_unlock(nfs_buf_mutex
);
955 SET(bp
->nb_flags
, NB_META
);
956 if ((bp
->nb_bufsize
!= bufsize
) && bp
->nb_data
) {
957 kfree(bp
->nb_data
, bp
->nb_bufsize
);
959 bp
->nb_validoff
= bp
->nb_validend
= -1;
960 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
963 CLR(bp
->nb_flags
, NB_CACHE
);
966 bp
->nb_data
= kalloc(bufsize
);
969 /* Ack! couldn't allocate the data buffer! */
970 /* clean up buffer and return error */
971 lck_mtx_lock(nfs_buf_mutex
);
972 LIST_REMOVE(bp
, nb_vnbufs
);
973 bp
->nb_vnbufs
.le_next
= NFSNOLIST
;
975 /* invalidate usage timestamp to allow immediate freeing */
976 NBUFSTAMPINVALIDATE(bp
);
977 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
) {
978 panic("nfsbuf on freelist");
980 TAILQ_INSERT_HEAD(&nfsbuffree
, bp
, nb_free
);
982 lck_mtx_unlock(nfs_buf_mutex
);
983 FSDBG_BOT(541, np
, blkno
, 0xb00, ENOMEM
);
986 bp
->nb_bufsize
= bufsize
;
992 * Set or clear NB_READ now to let the UPL subsystem know
993 * if we intend to modify the pages or not.
995 if (operation
== NBLK_READ
) {
996 SET(bp
->nb_flags
, NB_READ
);
998 CLR(bp
->nb_flags
, NB_READ
);
1000 if (bufsize
< PAGE_SIZE
) {
1001 bufsize
= PAGE_SIZE
;
1003 bp
->nb_bufsize
= bufsize
;
1004 bp
->nb_validoff
= bp
->nb_validend
= -1;
1006 if (UBCINFOEXISTS(vp
)) {
1008 if (nfs_buf_upl_setup(bp
)) {
1009 /* unable to create upl */
1010 /* vm object must no longer exist */
1011 /* clean up buffer and return error */
1012 lck_mtx_lock(nfs_buf_mutex
);
1013 LIST_REMOVE(bp
, nb_vnbufs
);
1014 bp
->nb_vnbufs
.le_next
= NFSNOLIST
;
1016 /* invalidate usage timestamp to allow immediate freeing */
1017 NBUFSTAMPINVALIDATE(bp
);
1018 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
) {
1019 panic("nfsbuf on freelist");
1021 TAILQ_INSERT_HEAD(&nfsbuffree
, bp
, nb_free
);
1023 lck_mtx_unlock(nfs_buf_mutex
);
1024 FSDBG_BOT(541, np
, blkno
, 0x2bc, EIO
);
1027 nfs_buf_upl_check(bp
);
1032 panic("nfs_buf_get: %d unknown operation", operation
);
1037 FSDBG_BOT(541, np
, blkno
, bp
, bp
->nb_flags
);
1043 nfs_buf_release(struct nfsbuf
*bp
, int freeup
)
1045 nfsnode_t np
= bp
->nb_np
;
1048 int wakeup_needbuffer
, wakeup_buffer
, wakeup_nbdwrite
;
1050 FSDBG_TOP(548, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_data
);
1051 FSDBG(548, bp
->nb_validoff
, bp
->nb_validend
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
);
1052 FSDBG(548, bp
->nb_valid
, 0, bp
->nb_dirty
, 0);
1054 vp
= np
? NFSTOV(np
) : NULL
;
1055 if (vp
&& UBCINFOEXISTS(vp
) && bp
->nb_bufsize
) {
1060 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
) && !ISSET(bp
->nb_flags
, NB_INVAL
)) {
1061 rv
= nfs_buf_upl_setup(bp
);
1063 printf("nfs_buf_release: upl create failed %d\n", rv
);
1065 nfs_buf_upl_check(bp
);
1068 upl
= bp
->nb_pagelist
;
1070 goto pagelist_cleanup_done
;
1073 if (ubc_upl_unmap(upl
) != KERN_SUCCESS
) {
1074 panic("ubc_upl_unmap failed");
1079 * Abort the pages on error or: if this is an invalid or
1080 * non-needcommit nocache buffer AND no pages are dirty.
1082 if (ISSET(bp
->nb_flags
, NB_ERROR
) || (!bp
->nb_dirty
&& (ISSET(bp
->nb_flags
, NB_INVAL
) ||
1083 (ISSET(bp
->nb_flags
, NB_NOCACHE
) && !ISSET(bp
->nb_flags
, (NB_NEEDCOMMIT
| NB_DELWRI
)))))) {
1084 if (ISSET(bp
->nb_flags
, (NB_READ
| NB_INVAL
| NB_NOCACHE
))) {
1085 upl_flags
= UPL_ABORT_DUMP_PAGES
;
1089 ubc_upl_abort(upl
, upl_flags
);
1090 goto pagelist_cleanup_done
;
1092 for (i
= 0; i
<= (bp
->nb_bufsize
- 1) / PAGE_SIZE
; i
++) {
1093 if (!NBPGVALID(bp
, i
)) {
1094 ubc_upl_abort_range(upl
,
1095 i
* PAGE_SIZE
, PAGE_SIZE
,
1096 UPL_ABORT_DUMP_PAGES
|
1097 UPL_ABORT_FREE_ON_EMPTY
);
1099 if (NBPGDIRTY(bp
, i
)) {
1100 upl_flags
= UPL_COMMIT_SET_DIRTY
;
1102 upl_flags
= UPL_COMMIT_CLEAR_DIRTY
;
1105 if (!ISSET(bp
->nb_flags
, (NB_NEEDCOMMIT
| NB_DELWRI
))) {
1106 upl_flags
|= UPL_COMMIT_CLEAR_PRECIOUS
;
1109 ubc_upl_commit_range(upl
,
1110 i
* PAGE_SIZE
, PAGE_SIZE
,
1112 UPL_COMMIT_INACTIVATE
|
1113 UPL_COMMIT_FREE_ON_EMPTY
);
1116 pagelist_cleanup_done
:
1117 /* invalidate any pages past EOF */
1118 if (NBOFF(bp
) + bp
->nb_bufsize
> (off_t
)(np
->n_size
)) {
1120 start
= trunc_page_64(np
->n_size
) + PAGE_SIZE_64
;
1121 end
= trunc_page_64(NBOFF(bp
) + bp
->nb_bufsize
);
1122 if (start
< NBOFF(bp
)) {
1126 if ((rv
= ubc_msync(vp
, start
, end
, NULL
, UBC_INVALIDATE
))) {
1127 printf("nfs_buf_release(): ubc_msync failed!, error %d\n", rv
);
1131 CLR(bp
->nb_flags
, NB_PAGELIST
);
1132 bp
->nb_pagelist
= NULL
;
1135 lck_mtx_lock(nfs_buf_mutex
);
1137 wakeup_needbuffer
= wakeup_buffer
= wakeup_nbdwrite
= 0;
1139 /* Wake up any processes waiting for any buffer to become free. */
1140 if (nfsneedbuffer
) {
1142 wakeup_needbuffer
= 1;
1144 /* Wake up any processes waiting for _this_ buffer to become free. */
1145 if (ISSET(bp
->nb_lflags
, NBL_WANTED
)) {
1146 CLR(bp
->nb_lflags
, NBL_WANTED
);
1150 /* If it's non-needcommit nocache, or an error, mark it invalid. */
1151 if (ISSET(bp
->nb_flags
, NB_ERROR
) ||
1152 (ISSET(bp
->nb_flags
, NB_NOCACHE
) && !ISSET(bp
->nb_flags
, (NB_NEEDCOMMIT
| NB_DELWRI
)))) {
1153 SET(bp
->nb_flags
, NB_INVAL
);
1156 if ((bp
->nb_bufsize
<= 0) || ISSET(bp
->nb_flags
, NB_INVAL
)) {
1157 /* If it's invalid or empty, dissociate it from its nfsnode */
1158 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
1159 LIST_REMOVE(bp
, nb_vnbufs
);
1160 bp
->nb_vnbufs
.le_next
= NFSNOLIST
;
1163 /* if this was a delayed write, wakeup anyone */
1164 /* waiting for delayed writes to complete */
1165 if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
1166 CLR(bp
->nb_flags
, NB_DELWRI
);
1169 wakeup_nbdwrite
= 1;
1171 /* invalidate usage timestamp to allow immediate freeing */
1172 NBUFSTAMPINVALIDATE(bp
);
1173 /* put buffer at head of free list */
1174 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
) {
1175 panic("nfsbuf on freelist");
1177 SET(bp
->nb_flags
, NB_INVAL
);
1178 if (ISSET(bp
->nb_flags
, NB_META
)) {
1179 TAILQ_INSERT_HEAD(&nfsbuffreemeta
, bp
, nb_free
);
1180 nfsbuffreemetacnt
++;
1182 TAILQ_INSERT_HEAD(&nfsbuffree
, bp
, nb_free
);
1185 } else if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
1186 /* put buffer at end of delwri list */
1187 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
) {
1188 panic("nfsbuf on freelist");
1190 TAILQ_INSERT_TAIL(&nfsbufdelwri
, bp
, nb_free
);
1194 /* update usage timestamp */
1196 bp
->nb_timestamp
= now
.tv_sec
;
1197 /* put buffer at end of free list */
1198 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
) {
1199 panic("nfsbuf on freelist");
1201 if (ISSET(bp
->nb_flags
, NB_META
)) {
1202 TAILQ_INSERT_TAIL(&nfsbuffreemeta
, bp
, nb_free
);
1203 nfsbuffreemetacnt
++;
1205 TAILQ_INSERT_TAIL(&nfsbuffree
, bp
, nb_free
);
1212 /* Unlock the buffer. */
1213 CLR(bp
->nb_flags
, (NB_ASYNC
| NB_STABLE
));
1214 CLR(bp
->nb_lflags
, NBL_BUSY
);
1216 FSDBG_BOT(548, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_data
);
1218 lck_mtx_unlock(nfs_buf_mutex
);
1220 if (wakeup_needbuffer
) {
1221 wakeup(&nfsneedbuffer
);
1223 if (wakeup_buffer
) {
1226 if (wakeup_nbdwrite
) {
1227 wakeup(&nfs_nbdwrite
);
1235 * Wait for operations on the buffer to complete.
1236 * When they do, extract and return the I/O's error value.
1239 nfs_buf_iowait(struct nfsbuf
*bp
)
1241 FSDBG_TOP(549, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
1243 lck_mtx_lock(nfs_buf_mutex
);
1245 while (!ISSET(bp
->nb_flags
, NB_DONE
)) {
1246 msleep(bp
, nfs_buf_mutex
, PRIBIO
+ 1, "nfs_buf_iowait", NULL
);
1249 lck_mtx_unlock(nfs_buf_mutex
);
1251 FSDBG_BOT(549, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
1253 /* check for interruption of I/O, then errors. */
1254 if (ISSET(bp
->nb_flags
, NB_EINTR
)) {
1255 CLR(bp
->nb_flags
, NB_EINTR
);
1257 } else if (ISSET(bp
->nb_flags
, NB_ERROR
)) {
1258 return bp
->nb_error
? bp
->nb_error
: EIO
;
1264 * Mark I/O complete on a buffer.
1267 nfs_buf_iodone(struct nfsbuf
*bp
)
1269 FSDBG_TOP(550, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
1271 if (ISSET(bp
->nb_flags
, NB_DONE
)) {
1272 panic("nfs_buf_iodone already");
1275 if (!ISSET(bp
->nb_flags
, NB_READ
)) {
1276 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
1278 * vnode_writedone() takes care of waking up
1279 * any throttled write operations
1281 vnode_writedone(NFSTOV(bp
->nb_np
));
1282 nfs_node_lock_force(bp
->nb_np
);
1283 bp
->nb_np
->n_numoutput
--;
1284 nfs_node_unlock(bp
->nb_np
);
1286 if (ISSET(bp
->nb_flags
, NB_ASYNC
)) { /* if async, release it */
1287 SET(bp
->nb_flags
, NB_DONE
); /* note that it's done */
1288 nfs_buf_release(bp
, 1);
1289 } else { /* or just wakeup the buffer */
1290 lck_mtx_lock(nfs_buf_mutex
);
1291 SET(bp
->nb_flags
, NB_DONE
); /* note that it's done */
1292 CLR(bp
->nb_lflags
, NBL_WANTED
);
1293 lck_mtx_unlock(nfs_buf_mutex
);
1297 FSDBG_BOT(550, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
1301 nfs_buf_write_delayed(struct nfsbuf
*bp
)
1303 nfsnode_t np
= bp
->nb_np
;
1305 FSDBG_TOP(551, bp
, NBOFF(bp
), bp
->nb_flags
, 0);
1306 FSDBG(551, bp
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
, bp
->nb_dirty
);
1309 * If the block hasn't been seen before:
1310 * (1) Mark it as having been seen,
1311 * (2) Make sure it's on its node's correct block list,
1313 if (!ISSET(bp
->nb_flags
, NB_DELWRI
)) {
1314 SET(bp
->nb_flags
, NB_DELWRI
);
1315 /* move to dirty list */
1316 lck_mtx_lock(nfs_buf_mutex
);
1319 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
1320 LIST_REMOVE(bp
, nb_vnbufs
);
1322 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
1323 lck_mtx_unlock(nfs_buf_mutex
);
1327 * If the vnode has "too many" write operations in progress
1328 * wait for them to finish the IO
1330 vnode_waitforwrites(NFSTOV(np
), VNODE_ASYNC_THROTTLE
, 0, 0, "nfs_buf_write_delayed");
1332 /* the file is in a modified state, so make sure the flag's set */
1333 nfs_node_lock_force(np
);
1334 np
->n_flag
|= NMODIFIED
;
1335 nfs_node_unlock(np
);
1338 * If we have too many delayed write buffers,
1339 * just fall back to doing the async write.
1341 if (nfs_nbdwrite
< 0) {
1342 panic("nfs_buf_write_delayed: Negative nfs_nbdwrite");
1344 if (nfs_nbdwrite
> NFS_A_LOT_OF_DELAYED_WRITES
) {
1345 /* issue async write */
1346 SET(bp
->nb_flags
, NB_ASYNC
);
1348 FSDBG_BOT(551, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
1352 /* Otherwise, the "write" is done, so mark and release the buffer. */
1353 SET(bp
->nb_flags
, NB_DONE
);
1354 nfs_buf_release(bp
, 1);
1355 FSDBG_BOT(551, bp
, NBOFF(bp
), bp
->nb_flags
, 0);
1360 * Check that a "needcommit" buffer can still be committed.
1361 * If the write verifier has changed, we need to clear the
1362 * the needcommit flag.
1365 nfs_buf_check_write_verifier(nfsnode_t np
, struct nfsbuf
*bp
)
1367 struct nfsmount
*nmp
;
1369 if (!ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
1374 if (nfs_mount_gone(nmp
)) {
1377 if (!ISSET(bp
->nb_flags
, NB_STALEWVERF
) && (bp
->nb_verf
== nmp
->nm_verf
)) {
1381 /* write verifier changed, clear commit/wverf flags */
1382 CLR(bp
->nb_flags
, (NB_NEEDCOMMIT
| NB_STALEWVERF
));
1384 nfs_node_lock_force(np
);
1385 np
->n_needcommitcnt
--;
1386 CHECK_NEEDCOMMITCNT(np
);
1387 nfs_node_unlock(np
);
1391 * add a reference to a buffer so it doesn't disappear while being used
1392 * (must be called with nfs_buf_mutex held)
1395 nfs_buf_refget(struct nfsbuf
*bp
)
1397 os_ref_retain_locked(&bp
->nb_refs
);
1400 * release a reference on a buffer
1401 * (must be called with nfs_buf_mutex held)
1404 nfs_buf_refrele(struct nfsbuf
*bp
)
1406 (void) os_ref_release_locked(&bp
->nb_refs
);
1410 * mark a particular buffer as BUSY
1411 * (must be called with nfs_buf_mutex held)
1414 nfs_buf_acquire(struct nfsbuf
*bp
, int flags
, int slpflag
, int slptimeo
)
1419 if (ISSET(bp
->nb_lflags
, NBL_BUSY
)) {
1421 * since the lck_mtx_lock may block, the buffer
1422 * may become BUSY, so we need to recheck for
1425 if (flags
& NBAC_NOWAIT
) {
1428 SET(bp
->nb_lflags
, NBL_WANTED
);
1430 ts
.tv_sec
= (slptimeo
/ 100);
1431 /* the hz value is 100; which leads to 10ms */
1432 ts
.tv_nsec
= (slptimeo
% 100) * 10 * NSEC_PER_USEC
* 1000;
1434 error
= msleep(bp
, nfs_buf_mutex
, slpflag
| (PRIBIO
+ 1),
1435 "nfs_buf_acquire", &ts
);
1441 if (flags
& NBAC_REMOVE
) {
1442 nfs_buf_remfree(bp
);
1444 SET(bp
->nb_lflags
, NBL_BUSY
);
1450 * simply drop the BUSY status of a buffer
1451 * (must be called with nfs_buf_mutex held)
1454 nfs_buf_drop(struct nfsbuf
*bp
)
1456 int need_wakeup
= 0;
1458 if (!ISSET(bp
->nb_lflags
, NBL_BUSY
)) {
1459 panic("nfs_buf_drop: buffer not busy!");
1461 if (ISSET(bp
->nb_lflags
, NBL_WANTED
)) {
1462 /* delay the actual wakeup until after we clear NBL_BUSY */
1465 /* Unlock the buffer. */
1466 CLR(bp
->nb_lflags
, (NBL_BUSY
| NBL_WANTED
));
1474 * prepare for iterating over an nfsnode's buffer list
1475 * this lock protects the queue manipulation
1476 * (must be called with nfs_buf_mutex held)
1479 nfs_buf_iterprepare(nfsnode_t np
, struct nfsbuflists
*iterheadp
, int flags
)
1481 struct nfsbuflists
*listheadp
;
1483 if (flags
& NBI_DIRTY
) {
1484 listheadp
= &np
->n_dirtyblkhd
;
1486 listheadp
= &np
->n_cleanblkhd
;
1489 if ((flags
& NBI_NOWAIT
) && (np
->n_bufiterflags
& NBI_ITER
)) {
1490 LIST_INIT(iterheadp
);
1494 while (np
->n_bufiterflags
& NBI_ITER
) {
1495 np
->n_bufiterflags
|= NBI_ITERWANT
;
1496 msleep(&np
->n_bufiterflags
, nfs_buf_mutex
, 0, "nfs_buf_iterprepare", NULL
);
1498 if (LIST_EMPTY(listheadp
)) {
1499 LIST_INIT(iterheadp
);
1502 np
->n_bufiterflags
|= NBI_ITER
;
1504 iterheadp
->lh_first
= listheadp
->lh_first
;
1505 listheadp
->lh_first
->nb_vnbufs
.le_prev
= &iterheadp
->lh_first
;
1506 LIST_INIT(listheadp
);
1512 * clean up after iterating over an nfsnode's buffer list
1513 * this lock protects the queue manipulation
1514 * (must be called with nfs_buf_mutex held)
1517 nfs_buf_itercomplete(nfsnode_t np
, struct nfsbuflists
*iterheadp
, int flags
)
1519 struct nfsbuflists
* listheadp
;
1522 if (flags
& NBI_DIRTY
) {
1523 listheadp
= &np
->n_dirtyblkhd
;
1525 listheadp
= &np
->n_cleanblkhd
;
1528 while (!LIST_EMPTY(iterheadp
)) {
1529 bp
= LIST_FIRST(iterheadp
);
1530 LIST_REMOVE(bp
, nb_vnbufs
);
1531 LIST_INSERT_HEAD(listheadp
, bp
, nb_vnbufs
);
1534 np
->n_bufiterflags
&= ~NBI_ITER
;
1535 if (np
->n_bufiterflags
& NBI_ITERWANT
) {
1536 np
->n_bufiterflags
&= ~NBI_ITERWANT
;
1537 wakeup(&np
->n_bufiterflags
);
1543 * Read an NFS buffer for a file.
1546 nfs_buf_read(struct nfsbuf
*bp
)
1554 cred
= bp
->nb_rcred
;
1555 if (IS_VALID_CRED(cred
)) {
1556 kauth_cred_ref(cred
);
1558 thd
= ISSET(bp
->nb_flags
, NB_ASYNC
) ? NULL
: current_thread();
1561 if (!ISSET(bp
->nb_flags
, NB_READ
)) {
1562 panic("nfs_buf_read: !NB_READ");
1564 if (ISSET(bp
->nb_flags
, NB_DONE
)) {
1565 CLR(bp
->nb_flags
, NB_DONE
);
1570 OSAddAtomic64(1, &nfsstats
.read_bios
);
1572 error
= nfs_buf_read_rpc(bp
, thd
, cred
);
1574 * For async I/O, the callbacks will finish up the
1575 * read. Otherwise, the read has already been finished.
1578 if (IS_VALID_CRED(cred
)) {
1579 kauth_cred_unref(&cred
);
1585 * finish the reading of a buffer
1588 nfs_buf_read_finish(struct nfsbuf
*bp
)
1590 nfsnode_t np
= bp
->nb_np
;
1591 struct nfsmount
*nmp
;
1593 if (!ISSET(bp
->nb_flags
, NB_ERROR
)) {
1594 /* update valid range */
1595 bp
->nb_validoff
= 0;
1596 bp
->nb_validend
= bp
->nb_endio
;
1597 if (bp
->nb_endio
< (int)bp
->nb_bufsize
) {
1599 * The read may be short because we have unflushed writes
1600 * that are extending the file size and the reads hit the
1601 * (old) EOF on the server. So, just make sure nb_validend
1602 * correctly tracks EOF.
1603 * Note that the missing data should have already been zeroed
1604 * in nfs_buf_read_rpc_finish().
1606 off_t boff
= NBOFF(bp
);
1607 if ((off_t
)np
->n_size
>= (boff
+ bp
->nb_bufsize
)) {
1608 bp
->nb_validend
= bp
->nb_bufsize
;
1609 } else if ((off_t
)np
->n_size
>= boff
) {
1610 bp
->nb_validend
= np
->n_size
- boff
;
1612 bp
->nb_validend
= 0;
1615 if ((nmp
= NFSTONMP(np
)) && (nmp
->nm_vers
== NFS_VER2
) &&
1616 ((NBOFF(bp
) + bp
->nb_validend
) > 0x100000000LL
)) {
1617 bp
->nb_validend
= 0x100000000LL
- NBOFF(bp
);
1619 bp
->nb_valid
= (uint32_t)(1LLU << (round_page_32(bp
->nb_validend
) / PAGE_SIZE
)) - 1;
1620 if (bp
->nb_validend
& PAGE_MASK
) {
1621 /* zero-fill remainder of last page */
1622 bzero(bp
->nb_data
+ bp
->nb_validend
, PAGE_SIZE
- (bp
->nb_validend
& PAGE_MASK
));
1629 * initiate the NFS READ RPC(s) for a buffer
1632 nfs_buf_read_rpc(struct nfsbuf
*bp
, thread_t thd
, kauth_cred_t cred
)
1634 struct nfsmount
*nmp
;
1635 nfsnode_t np
= bp
->nb_np
;
1636 int error
= 0, nfsvers
, async
;
1638 uint32_t nmrsize
, length
, len
;
1641 struct nfsreq_cbinfo cb
;
1644 if (nfs_mount_gone(nmp
)) {
1645 bp
->nb_error
= error
= ENXIO
;
1646 SET(bp
->nb_flags
, NB_ERROR
);
1650 nfsvers
= nmp
->nm_vers
;
1651 nmrsize
= nmp
->nm_rsize
;
1655 length
= bp
->nb_bufsize
;
1657 if (nfsvers
== NFS_VER2
) {
1658 if (boff
> 0xffffffffLL
) {
1659 bp
->nb_error
= error
= EFBIG
;
1660 SET(bp
->nb_flags
, NB_ERROR
);
1664 if ((boff
+ length
- 1) > 0xffffffffLL
) {
1665 length
= 0x100000000LL
- boff
;
1669 /* Note: Can only do async I/O if nfsiods are configured. */
1670 async
= (bp
->nb_flags
& NB_ASYNC
);
1671 cb
.rcb_func
= async
? nfs_buf_read_rpc_finish
: NULL
;
1674 bp
->nb_offio
= bp
->nb_endio
= 0;
1675 bp
->nb_rpcs
= nrpcs
= (length
+ nmrsize
- 1) / nmrsize
;
1676 if (async
&& (nrpcs
> 1)) {
1677 SET(bp
->nb_flags
, NB_MULTASYNCRPC
);
1679 CLR(bp
->nb_flags
, NB_MULTASYNCRPC
);
1682 while (length
> 0) {
1683 if (ISSET(bp
->nb_flags
, NB_ERROR
)) {
1684 error
= bp
->nb_error
;
1687 len
= (length
> nmrsize
) ? nmrsize
: length
;
1688 cb
.rcb_args
[0] = offset
;
1689 cb
.rcb_args
[1] = len
;
1691 if (nmp
->nm_vers
>= NFS_VER4
) {
1692 cb
.rcb_args
[2] = nmp
->nm_stategenid
;
1696 error
= nmp
->nm_funcs
->nf_read_rpc_async(np
, boff
+ offset
, len
, thd
, cred
, &cb
, &req
);
1705 nfs_buf_read_rpc_finish(req
);
1706 if (ISSET(bp
->nb_flags
, NB_ERROR
)) {
1707 error
= bp
->nb_error
;
1714 * Something bad happened while trying to send the RPC(s).
1715 * Wait for any outstanding requests to complete.
1717 bp
->nb_error
= error
;
1718 SET(bp
->nb_flags
, NB_ERROR
);
1719 if (ISSET(bp
->nb_flags
, NB_MULTASYNCRPC
)) {
1720 nrpcs
= (length
+ nmrsize
- 1) / nmrsize
;
1721 lck_mtx_lock(nfs_buf_mutex
);
1722 bp
->nb_rpcs
-= nrpcs
;
1723 if (bp
->nb_rpcs
== 0) {
1724 /* No RPCs left, so the buffer's done */
1725 lck_mtx_unlock(nfs_buf_mutex
);
1728 /* wait for the last RPC to mark it done */
1729 while (bp
->nb_rpcs
> 0) {
1730 msleep(&bp
->nb_rpcs
, nfs_buf_mutex
, 0,
1731 "nfs_buf_read_rpc_cancel", NULL
);
1733 lck_mtx_unlock(nfs_buf_mutex
);
1744 * finish up an NFS READ RPC on a buffer
1747 nfs_buf_read_rpc_finish(struct nfsreq
*req
)
1749 struct nfsmount
*nmp
;
1751 struct nfsreq_cbinfo cb
;
1753 int error
= 0, nfsvers
, offset
, length
, eof
= 0, multasyncrpc
, finished
;
1754 void *wakeme
= NULL
;
1755 struct nfsreq
*rreq
= NULL
;
1760 char uio_buf
[UIO_SIZEOF(1)];
1764 thd
= req
->r_thread
;
1766 if (IS_VALID_CRED(cred
)) {
1767 kauth_cred_ref(cred
);
1769 cb
= req
->r_callback
;
1771 if (cb
.rcb_func
) { /* take an extra reference on the nfsreq in case we want to resend it later due to grace error */
1772 nfs_request_ref(req
, 0);
1776 if (nfs_mount_gone(nmp
)) {
1777 SET(bp
->nb_flags
, NB_ERROR
);
1778 bp
->nb_error
= error
= ENXIO
;
1780 if (error
|| ISSET(bp
->nb_flags
, NB_ERROR
)) {
1782 nfs_request_async_cancel(req
);
1786 nfsvers
= nmp
->nm_vers
;
1787 offset
= cb
.rcb_args
[0];
1788 rlen
= length
= cb
.rcb_args
[1];
1790 auio
= uio_createwithbuffer(1, NBOFF(bp
) + offset
, UIO_SYSSPACE
,
1791 UIO_READ
, &uio_buf
, sizeof(uio_buf
));
1792 uio_addiov(auio
, CAST_USER_ADDR_T(bp
->nb_data
+ offset
), length
);
1794 /* finish the RPC */
1795 error
= nmp
->nm_funcs
->nf_read_rpc_async_finish(np
, req
, auio
, &rlen
, &eof
);
1796 if ((error
== EINPROGRESS
) && cb
.rcb_func
) {
1797 /* async request restarted */
1799 nfs_request_rele(req
);
1801 if (IS_VALID_CRED(cred
)) {
1802 kauth_cred_unref(&cred
);
1807 if ((nmp
->nm_vers
>= NFS_VER4
) && nfs_mount_state_error_should_restart(error
) && !ISSET(bp
->nb_flags
, NB_ERROR
)) {
1808 lck_mtx_lock(&nmp
->nm_lock
);
1809 if ((error
!= NFSERR_OLD_STATEID
) && (error
!= NFSERR_GRACE
) && (cb
.rcb_args
[2] == nmp
->nm_stategenid
)) {
1810 NP(np
, "nfs_buf_read_rpc_finish: error %d @ 0x%llx, 0x%x 0x%x, initiating recovery",
1811 error
, NBOFF(bp
) + offset
, cb
.rcb_args
[2], nmp
->nm_stategenid
);
1812 nfs_need_recover(nmp
, error
);
1814 lck_mtx_unlock(&nmp
->nm_lock
);
1815 if (np
->n_flag
& NREVOKE
) {
1818 if (error
== NFSERR_GRACE
) {
1821 * For an async I/O request, handle a grace delay just like
1822 * jukebox errors. Set the resend time and queue it up.
1825 if (req
->r_nmrep
.nmc_mhead
) {
1826 mbuf_freem(req
->r_nmrep
.nmc_mhead
);
1827 req
->r_nmrep
.nmc_mhead
= NULL
;
1831 lck_mtx_lock(&req
->r_mtx
);
1832 req
->r_resendtime
= now
.tv_sec
+ 2;
1833 req
->r_xid
= 0; // get a new XID
1834 req
->r_flags
|= R_RESTART
;
1836 nfs_asyncio_resend(req
);
1837 lck_mtx_unlock(&req
->r_mtx
);
1838 if (IS_VALID_CRED(cred
)) {
1839 kauth_cred_unref(&cred
);
1841 /* Note: nfsreq reference taken will be dropped later when finished */
1844 /* otherwise, just pause a couple seconds and retry */
1845 tsleep(&nmp
->nm_state
, (PZERO
- 1), "nfsgrace", 2 * hz
);
1847 if (!(error
= nfs_mount_state_wait_for_recovery(nmp
))) {
1855 SET(bp
->nb_flags
, NB_ERROR
);
1856 bp
->nb_error
= error
;
1860 if ((rlen
> 0) && (bp
->nb_endio
< (offset
+ (int)rlen
))) {
1861 bp
->nb_endio
= offset
+ rlen
;
1864 if ((nfsvers
== NFS_VER2
) || eof
|| (rlen
== 0)) {
1865 /* zero out the remaining data (up to EOF) */
1866 off_t rpcrem
, eofrem
, rem
;
1867 rpcrem
= (length
- rlen
);
1868 eofrem
= np
->n_size
- (NBOFF(bp
) + offset
+ rlen
);
1869 rem
= (rpcrem
< eofrem
) ? rpcrem
: eofrem
;
1871 bzero(bp
->nb_data
+ offset
+ rlen
, rem
);
1873 } else if (((int)rlen
< length
) && !ISSET(bp
->nb_flags
, NB_ERROR
)) {
1877 * We haven't hit EOF and we didn't get all the data
1878 * requested, so we need to issue another read for the rest.
1879 * (Don't bother if the buffer already hit an error.)
1886 cb
.rcb_args
[0] = offset
;
1887 cb
.rcb_args
[1] = length
;
1889 if (nmp
->nm_vers
>= NFS_VER4
) {
1890 cb
.rcb_args
[2] = nmp
->nm_stategenid
;
1893 error
= nmp
->nm_funcs
->nf_read_rpc_async(np
, NBOFF(bp
) + offset
, length
, thd
, cred
, &cb
, &rreq
);
1895 if (IS_VALID_CRED(cred
)) {
1896 kauth_cred_unref(&cred
);
1899 /* if !async we'll need to wait for this RPC to finish */
1904 nfs_request_rele(req
);
1907 * Outstanding RPC count is unchanged.
1908 * Callback will be called when RPC is done.
1912 SET(bp
->nb_flags
, NB_ERROR
);
1913 bp
->nb_error
= error
;
1918 nfs_request_rele(req
);
1920 if (IS_VALID_CRED(cred
)) {
1921 kauth_cred_unref(&cred
);
1925 * Decrement outstanding RPC count on buffer
1926 * and call nfs_buf_read_finish on last RPC.
1928 * (Note: when there are multiple async RPCs issued for a
1929 * buffer we need nfs_buffer_mutex to avoid problems when
1930 * aborting a partially-initiated set of RPCs)
1933 multasyncrpc
= ISSET(bp
->nb_flags
, NB_MULTASYNCRPC
);
1935 lck_mtx_lock(nfs_buf_mutex
);
1939 finished
= (bp
->nb_rpcs
== 0);
1942 lck_mtx_unlock(nfs_buf_mutex
);
1947 wakeme
= &bp
->nb_rpcs
;
1949 nfs_buf_read_finish(bp
);
1957 * Do buffer readahead.
1958 * Initiate async I/O to read buffers not in cache.
1961 nfs_buf_readahead(nfsnode_t np
, int ioflag
, daddr64_t
*rabnp
, daddr64_t lastrabn
, thread_t thd
, kauth_cred_t cred
)
1963 struct nfsmount
*nmp
= NFSTONMP(np
);
1968 if (nfs_mount_gone(nmp
)) {
1971 if (nmp
->nm_readahead
<= 0) {
1974 if (*rabnp
> lastrabn
) {
1978 for (nra
= 0; (nra
< nmp
->nm_readahead
) && (*rabnp
<= lastrabn
); nra
++, *rabnp
= *rabnp
+ 1) {
1979 /* check if block exists and is valid. */
1980 if ((*rabnp
* nmp
->nm_biosize
) >= (off_t
)np
->n_size
) {
1981 /* stop reading ahead if we're beyond EOF */
1985 error
= nfs_buf_get(np
, *rabnp
, nmp
->nm_biosize
, thd
, NBLK_READ
| NBLK_NOWAIT
, &bp
);
1989 nfs_node_lock_force(np
);
1990 np
->n_lastrahead
= *rabnp
;
1991 nfs_node_unlock(np
);
1995 if ((ioflag
& IO_NOCACHE
) && ISSET(bp
->nb_flags
, NB_CACHE
) &&
1996 !bp
->nb_dirty
&& !ISSET(bp
->nb_flags
, (NB_DELWRI
| NB_NCRDAHEAD
))) {
1997 CLR(bp
->nb_flags
, NB_CACHE
);
1999 bp
->nb_validoff
= bp
->nb_validend
= -1;
2001 if ((bp
->nb_dirtyend
<= 0) && !bp
->nb_dirty
&&
2002 !ISSET(bp
->nb_flags
, (NB_CACHE
| NB_DELWRI
))) {
2003 SET(bp
->nb_flags
, (NB_READ
| NB_ASYNC
));
2004 if (ioflag
& IO_NOCACHE
) {
2005 SET(bp
->nb_flags
, NB_NCRDAHEAD
);
2007 if (!IS_VALID_CRED(bp
->nb_rcred
) && IS_VALID_CRED(cred
)) {
2008 kauth_cred_ref(cred
);
2009 bp
->nb_rcred
= cred
;
2011 if ((error
= nfs_buf_read(bp
))) {
2016 nfs_buf_release(bp
, 1);
2022 * NFS buffer I/O for reading files.
2025 nfs_bioread(nfsnode_t np
, uio_t uio
, int ioflag
, vfs_context_t ctx
)
2027 vnode_t vp
= NFSTOV(np
);
2028 struct nfsbuf
*bp
= NULL
;
2029 struct nfsmount
*nmp
= VTONMP(vp
);
2030 daddr64_t lbn
, rabn
= 0, lastrabn
, maxrabn
= -1;
2032 int error
= 0, n
= 0, on
= 0;
2033 int nfsvers
, biosize
, modified
, readaheads
= 0;
2038 FSDBG_TOP(514, np
, uio_offset(uio
), uio_resid(uio
), ioflag
);
2040 nfsvers
= nmp
->nm_vers
;
2041 biosize
= nmp
->nm_biosize
;
2042 thd
= vfs_context_thread(ctx
);
2043 cred
= vfs_context_ucred(ctx
);
2045 if (vnode_vtype(vp
) != VREG
) {
2046 printf("nfs_bioread: type %x unexpected\n", vnode_vtype(vp
));
2047 FSDBG_BOT(514, np
, 0xd1e0016, 0, EINVAL
);
2052 * For NFS, cache consistency can only be maintained approximately.
2053 * Although RFC1094 does not specify the criteria, the following is
2054 * believed to be compatible with the reference port.
2056 * If the file has changed since the last read RPC or you have
2057 * written to the file, you may have lost data cache consistency
2058 * with the server. So, check for a change, and flush all of the
2059 * file's data out of the cache.
2060 * NB: This implies that cache data can be read when up to
2061 * NFS_MAXATTRTIMO seconds out of date. If you find that you
2062 * need current attributes, nfs_getattr() can be forced to fetch
2063 * new attributes (via NATTRINVALIDATE() or NGA_UNCACHED).
2066 if (ISSET(np
->n_flag
, NUPDATESIZE
)) {
2067 nfs_data_update_size(np
, 0);
2070 if ((error
= nfs_node_lock(np
))) {
2071 FSDBG_BOT(514, np
, 0xd1e0222, 0, error
);
2075 if (np
->n_flag
& NNEEDINVALIDATE
) {
2076 np
->n_flag
&= ~NNEEDINVALIDATE
;
2077 nfs_node_unlock(np
);
2078 error
= nfs_vinvalbuf(vp
, V_SAVE
| V_IGNORE_WRITEERR
, ctx
, 1);
2080 error
= nfs_node_lock(np
);
2083 FSDBG_BOT(514, np
, 0xd1e0322, 0, error
);
2088 modified
= (np
->n_flag
& NMODIFIED
);
2089 nfs_node_unlock(np
);
2090 /* nfs_getattr() will check changed and purge caches */
2091 error
= nfs_getattr(np
, NULL
, ctx
, modified
? NGA_UNCACHED
: NGA_CACHED
);
2093 FSDBG_BOT(514, np
, 0xd1e0004, 0, error
);
2097 if (uio_resid(uio
) == 0) {
2098 FSDBG_BOT(514, np
, 0xd1e0001, 0, 0);
2101 if (uio_offset(uio
) < 0) {
2102 FSDBG_BOT(514, np
, 0xd1e0002, 0, EINVAL
);
2107 * set up readahead - which may be limited by:
2108 * + current request length (for IO_NOCACHE)
2109 * + readahead setting
2112 if (nmp
->nm_readahead
> 0) {
2113 off_t end
= uio_offset(uio
) + uio_resid(uio
);
2114 if (end
> (off_t
)np
->n_size
) {
2117 rabn
= uio_offset(uio
) / biosize
;
2118 maxrabn
= (end
- 1) / biosize
;
2119 nfs_node_lock_force(np
);
2120 if (!(ioflag
& IO_NOCACHE
) &&
2121 (!rabn
|| (rabn
== np
->n_lastread
) || (rabn
== (np
->n_lastread
+ 1)))) {
2122 maxrabn
+= nmp
->nm_readahead
;
2123 if ((maxrabn
* biosize
) >= (off_t
)np
->n_size
) {
2124 maxrabn
= ((off_t
)np
->n_size
- 1) / biosize
;
2127 if (maxrabn
< np
->n_lastrahead
) {
2128 np
->n_lastrahead
= -1;
2130 if (rabn
< np
->n_lastrahead
) {
2131 rabn
= np
->n_lastrahead
+ 1;
2133 nfs_node_unlock(np
);
2139 nfs_data_lock(np
, NFS_DATA_LOCK_SHARED
);
2140 lbn
= uio_offset(uio
) / biosize
;
2143 * Copy directly from any cached pages without grabbing the bufs.
2144 * (If we are NOCACHE and we've issued readahead requests, we need
2145 * to grab the NB_NCRDAHEAD bufs to drop them.)
2147 if ((!(ioflag
& IO_NOCACHE
) || !readaheads
) &&
2148 ((uio
->uio_segflg
== UIO_USERSPACE32
||
2149 uio
->uio_segflg
== UIO_USERSPACE64
||
2150 uio
->uio_segflg
== UIO_USERSPACE
))) {
2151 io_resid
= uio_resid(uio
);
2152 diff
= np
->n_size
- uio_offset(uio
);
2153 if (diff
< io_resid
) {
2157 int count
= (io_resid
> INT_MAX
) ? INT_MAX
: io_resid
;
2158 error
= cluster_copy_ubc_data(vp
, uio
, &count
, 0);
2160 nfs_data_unlock(np
);
2161 FSDBG_BOT(514, np
, uio_offset(uio
), 0xcacefeed, error
);
2165 /* count any biocache reads that we just copied directly */
2166 if (lbn
!= (uio_offset(uio
) / biosize
)) {
2167 OSAddAtomic64((uio_offset(uio
) / biosize
) - lbn
, &nfsstats
.biocache_reads
);
2168 FSDBG(514, np
, 0xcacefeed, uio_offset(uio
), error
);
2172 lbn
= uio_offset(uio
) / biosize
;
2173 on
= uio_offset(uio
) % biosize
;
2174 nfs_node_lock_force(np
);
2175 np
->n_lastread
= (uio_offset(uio
) - 1) / biosize
;
2176 nfs_node_unlock(np
);
2178 if ((uio_resid(uio
) <= 0) || (uio_offset(uio
) >= (off_t
)np
->n_size
)) {
2179 nfs_data_unlock(np
);
2180 FSDBG_BOT(514, np
, uio_offset(uio
), uio_resid(uio
), 0xaaaaaaaa);
2184 /* adjust readahead block number, if necessary */
2188 lastrabn
= MIN(maxrabn
, lbn
+ nmp
->nm_readahead
);
2189 if (rabn
<= lastrabn
) { /* start readaheads */
2190 error
= nfs_buf_readahead(np
, ioflag
, &rabn
, lastrabn
, thd
, cred
);
2192 nfs_data_unlock(np
);
2193 FSDBG_BOT(514, np
, 0xd1e000b, 1, error
);
2199 OSAddAtomic64(1, &nfsstats
.biocache_reads
);
2202 * If the block is in the cache and has the required data
2203 * in a valid region, just copy it out.
2204 * Otherwise, get the block and write back/read in,
2208 io_resid
= uio_resid(uio
);
2209 n
= (io_resid
> (biosize
- on
)) ? (biosize
- on
) : io_resid
;
2210 diff
= np
->n_size
- uio_offset(uio
);
2215 error
= nfs_buf_get(np
, lbn
, biosize
, thd
, NBLK_READ
, &bp
);
2217 nfs_data_unlock(np
);
2218 FSDBG_BOT(514, np
, 0xd1e000c, 0, error
);
2222 if ((ioflag
& IO_NOCACHE
) && ISSET(bp
->nb_flags
, NB_CACHE
)) {
2224 * IO_NOCACHE found a cached buffer.
2225 * Flush the buffer if it's dirty.
2226 * Invalidate the data if it wasn't just read
2227 * in as part of a "nocache readahead".
2229 if (bp
->nb_dirty
|| (bp
->nb_dirtyend
> 0)) {
2230 /* so write the buffer out and try again */
2231 SET(bp
->nb_flags
, NB_NOCACHE
);
2234 if (ISSET(bp
->nb_flags
, NB_NCRDAHEAD
)) {
2235 CLR(bp
->nb_flags
, NB_NCRDAHEAD
);
2236 SET(bp
->nb_flags
, NB_NOCACHE
);
2240 /* if any pages are valid... */
2242 /* ...check for any invalid pages in the read range */
2243 int pg
, firstpg
, lastpg
, dirtypg
;
2244 dirtypg
= firstpg
= lastpg
= -1;
2245 pg
= on
/ PAGE_SIZE
;
2246 while (pg
<= (on
+ n
- 1) / PAGE_SIZE
) {
2247 if (!NBPGVALID(bp
, pg
)) {
2252 } else if (firstpg
>= 0 && dirtypg
< 0 && NBPGDIRTY(bp
, pg
)) {
2258 /* if there are no invalid pages, we're all set */
2260 if (bp
->nb_validoff
< 0) {
2261 /* valid range isn't set up, so */
2262 /* set it to what we know is valid */
2263 bp
->nb_validoff
= trunc_page(on
);
2264 bp
->nb_validend
= round_page(on
+ n
);
2265 nfs_buf_normalize_valid_range(np
, bp
);
2270 /* there are invalid pages in the read range */
2271 if (((dirtypg
> firstpg
) && (dirtypg
< lastpg
)) ||
2272 (((firstpg
* PAGE_SIZE
) < bp
->nb_dirtyend
) && (((lastpg
+ 1) * PAGE_SIZE
) > bp
->nb_dirtyoff
))) {
2273 /* there are also dirty page(s) (or range) in the read range, */
2274 /* so write the buffer out and try again */
2276 CLR(bp
->nb_flags
, (NB_DONE
| NB_ERROR
| NB_INVAL
));
2277 SET(bp
->nb_flags
, NB_ASYNC
);
2278 if (!IS_VALID_CRED(bp
->nb_wcred
)) {
2279 kauth_cred_ref(cred
);
2280 bp
->nb_wcred
= cred
;
2282 error
= nfs_buf_write(bp
);
2284 nfs_data_unlock(np
);
2285 FSDBG_BOT(514, np
, 0xd1e000d, 0, error
);
2290 if (!bp
->nb_dirty
&& bp
->nb_dirtyend
<= 0 &&
2291 (lastpg
- firstpg
+ 1) > (biosize
/ PAGE_SIZE
) / 2) {
2292 /* we need to read in more than half the buffer and the */
2293 /* buffer's not dirty, so just fetch the whole buffer */
2296 /* read the page range in */
2298 char uio_buf
[UIO_SIZEOF(1)];
2301 auio
= uio_createwithbuffer(1, (NBOFF(bp
) + firstpg
* PAGE_SIZE_64
),
2302 UIO_SYSSPACE
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
2306 uio_addiov(auio
, CAST_USER_ADDR_T(bp
->nb_data
+ (firstpg
* PAGE_SIZE
)),
2307 ((lastpg
- firstpg
+ 1) * PAGE_SIZE
));
2308 error
= nfs_read_rpc(np
, auio
, ctx
);
2311 if (ioflag
& IO_NOCACHE
) {
2312 SET(bp
->nb_flags
, NB_NOCACHE
);
2314 nfs_buf_release(bp
, 1);
2315 nfs_data_unlock(np
);
2316 FSDBG_BOT(514, np
, 0xd1e000e, 0, error
);
2319 /* Make sure that the valid range is set to cover this read. */
2320 bp
->nb_validoff
= trunc_page_32(on
);
2321 bp
->nb_validend
= round_page_32(on
+ n
);
2322 nfs_buf_normalize_valid_range(np
, bp
);
2323 if (uio_resid(auio
) > 0) {
2324 /* if short read, must have hit EOF, */
2325 /* so zero the rest of the range */
2326 bzero(CAST_DOWN(caddr_t
, uio_curriovbase(auio
)), uio_resid(auio
));
2328 /* mark the pages (successfully read) as valid */
2329 for (pg
= firstpg
; pg
<= lastpg
; pg
++) {
2330 NBPGVALID_SET(bp
, pg
);
2334 /* if no pages are valid, read the whole block */
2335 if (!bp
->nb_valid
) {
2336 if (!IS_VALID_CRED(bp
->nb_rcred
) && IS_VALID_CRED(cred
)) {
2337 kauth_cred_ref(cred
);
2338 bp
->nb_rcred
= cred
;
2340 SET(bp
->nb_flags
, NB_READ
);
2341 CLR(bp
->nb_flags
, (NB_DONE
| NB_ERROR
| NB_INVAL
));
2342 error
= nfs_buf_read(bp
);
2343 if (ioflag
& IO_NOCACHE
) {
2344 SET(bp
->nb_flags
, NB_NOCACHE
);
2347 nfs_data_unlock(np
);
2348 nfs_buf_release(bp
, 1);
2349 FSDBG_BOT(514, np
, 0xd1e000f, 0, error
);
2354 /* validate read range against valid range and clip */
2355 if (bp
->nb_validend
> 0) {
2356 diff
= (on
>= bp
->nb_validend
) ? 0 : (bp
->nb_validend
- on
);
2363 error
= uiomove(bp
->nb_data
+ on
, n
, uio
);
2367 nfs_buf_release(bp
, 1);
2368 nfs_data_unlock(np
);
2369 nfs_node_lock_force(np
);
2370 np
->n_lastread
= (uio_offset(uio
) - 1) / biosize
;
2371 nfs_node_unlock(np
);
2372 } while (error
== 0 && uio_resid(uio
) > 0 && n
> 0);
2373 FSDBG_BOT(514, np
, uio_offset(uio
), uio_resid(uio
), error
);
2378 * limit the number of outstanding async I/O writes
2381 nfs_async_write_start(struct nfsmount
*nmp
)
2383 int error
= 0, slpflag
= NMFLAG(nmp
, INTR
) ? PCATCH
: 0;
2384 struct timespec ts
= { .tv_sec
= 1, .tv_nsec
= 0 };
2386 if (nfs_max_async_writes
<= 0) {
2389 lck_mtx_lock(&nmp
->nm_lock
);
2390 while ((nfs_max_async_writes
> 0) && (nmp
->nm_asyncwrites
>= nfs_max_async_writes
)) {
2391 if ((error
= nfs_sigintr(nmp
, NULL
, current_thread(), 1))) {
2394 msleep(&nmp
->nm_asyncwrites
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfsasyncwrites", &ts
);
2398 nmp
->nm_asyncwrites
++;
2400 lck_mtx_unlock(&nmp
->nm_lock
);
2404 nfs_async_write_done(struct nfsmount
*nmp
)
2406 if (nmp
->nm_asyncwrites
<= 0) {
2409 lck_mtx_lock(&nmp
->nm_lock
);
2410 if (nmp
->nm_asyncwrites
-- >= nfs_max_async_writes
) {
2411 wakeup(&nmp
->nm_asyncwrites
);
2413 lck_mtx_unlock(&nmp
->nm_lock
);
2417 * write (or commit) the given NFS buffer
2419 * Commit the buffer if we can.
2420 * Write out any dirty range.
2421 * If any dirty pages remain, write them out.
2424 * For async requests, all the work beyond sending the initial
2425 * write RPC is handled in the RPC callback(s).
2428 nfs_buf_write(struct nfsbuf
*bp
)
2430 int error
= 0, oldflags
, async
;
2434 proc_t p
= current_proc();
2435 int iomode
, doff
, dend
, firstpg
, lastpg
;
2438 FSDBG_TOP(553, bp
, NBOFF(bp
), bp
->nb_flags
, 0);
2440 if (!ISSET(bp
->nb_lflags
, NBL_BUSY
)) {
2441 panic("nfs_buf_write: buffer is not busy???");
2445 async
= ISSET(bp
->nb_flags
, NB_ASYNC
);
2446 oldflags
= bp
->nb_flags
;
2448 CLR(bp
->nb_flags
, (NB_READ
| NB_DONE
| NB_ERROR
| NB_DELWRI
));
2449 if (ISSET(oldflags
, NB_DELWRI
)) {
2450 lck_mtx_lock(nfs_buf_mutex
);
2453 lck_mtx_unlock(nfs_buf_mutex
);
2454 wakeup(&nfs_nbdwrite
);
2457 /* move to clean list */
2458 if (ISSET(oldflags
, (NB_ASYNC
| NB_DELWRI
))) {
2459 lck_mtx_lock(nfs_buf_mutex
);
2460 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
2461 LIST_REMOVE(bp
, nb_vnbufs
);
2463 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
2464 lck_mtx_unlock(nfs_buf_mutex
);
2466 nfs_node_lock_force(np
);
2468 nfs_node_unlock(np
);
2469 vnode_startwrite(NFSTOV(np
));
2471 if (p
&& p
->p_stats
) {
2472 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_oublock
);
2475 cred
= bp
->nb_wcred
;
2476 if (!IS_VALID_CRED(cred
) && ISSET(bp
->nb_flags
, NB_READ
)) {
2477 cred
= bp
->nb_rcred
; /* shouldn't really happen, but... */
2479 if (IS_VALID_CRED(cred
)) {
2480 kauth_cred_ref(cred
);
2482 thd
= async
? NULL
: current_thread();
2484 /* We need to make sure the pages are locked before doing I/O. */
2485 if (!ISSET(bp
->nb_flags
, NB_META
)) {
2486 if (UBCINFOEXISTS(NFSTOV(np
))) {
2487 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
2488 error
= nfs_buf_upl_setup(bp
);
2490 printf("nfs_buf_write: upl create failed %d\n", error
);
2491 SET(bp
->nb_flags
, NB_ERROR
);
2492 bp
->nb_error
= error
= EIO
;
2496 nfs_buf_upl_check(bp
);
2499 /* We should never be in nfs_buf_write() with no UBCINFO. */
2500 printf("nfs_buf_write: ubcinfo already gone\n");
2501 SET(bp
->nb_flags
, NB_ERROR
);
2502 bp
->nb_error
= error
= EIO
;
2508 /* If NB_NEEDCOMMIT is set, a commit RPC may do the trick. */
2509 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
2510 nfs_buf_check_write_verifier(np
, bp
);
2512 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
2513 struct nfsmount
*nmp
= NFSTONMP(np
);
2514 if (nfs_mount_gone(nmp
)) {
2515 SET(bp
->nb_flags
, NB_ERROR
);
2516 bp
->nb_error
= error
= EIO
;
2520 SET(bp
->nb_flags
, NB_WRITEINPROG
);
2521 error
= nmp
->nm_funcs
->nf_commit_rpc(np
, NBOFF(bp
) + bp
->nb_dirtyoff
,
2522 bp
->nb_dirtyend
- bp
->nb_dirtyoff
, bp
->nb_wcred
, bp
->nb_verf
);
2523 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
2525 if (error
!= NFSERR_STALEWRITEVERF
) {
2526 SET(bp
->nb_flags
, NB_ERROR
);
2527 bp
->nb_error
= error
;
2532 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
2533 CLR(bp
->nb_flags
, NB_NEEDCOMMIT
);
2534 nfs_node_lock_force(np
);
2535 np
->n_needcommitcnt
--;
2536 CHECK_NEEDCOMMITCNT(np
);
2537 nfs_node_unlock(np
);
2539 if (!error
&& (bp
->nb_dirtyend
> 0)) {
2540 /* sanity check the dirty range */
2541 if (NBOFF(bp
) + bp
->nb_dirtyend
> (off_t
) np
->n_size
) {
2542 bp
->nb_dirtyend
= np
->n_size
- NBOFF(bp
);
2543 if (bp
->nb_dirtyoff
>= bp
->nb_dirtyend
) {
2544 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
2548 if (!error
&& (bp
->nb_dirtyend
> 0)) {
2549 /* there's a dirty range that needs to be written out */
2552 doff
= bp
->nb_dirtyoff
;
2553 dend
= bp
->nb_dirtyend
;
2555 /* if doff page is dirty, move doff to start of page */
2556 if (NBPGDIRTY(bp
, doff
/ PAGE_SIZE
)) {
2557 doff
-= doff
& PAGE_MASK
;
2559 /* try to expand write range to include preceding dirty pages */
2560 if (!(doff
& PAGE_MASK
)) {
2561 while ((doff
> 0) && NBPGDIRTY(bp
, (doff
- 1) / PAGE_SIZE
)) {
2565 /* if dend page is dirty, move dend to start of next page */
2566 if ((dend
& PAGE_MASK
) && NBPGDIRTY(bp
, dend
/ PAGE_SIZE
)) {
2567 dend
= round_page_32(dend
);
2569 /* try to expand write range to include trailing dirty pages */
2570 if (!(dend
& PAGE_MASK
)) {
2571 while ((dend
< (int)bp
->nb_bufsize
) && NBPGDIRTY(bp
, dend
/ PAGE_SIZE
)) {
2575 /* make sure to keep dend clipped to EOF */
2576 if ((NBOFF(bp
) + dend
) > (off_t
) np
->n_size
) {
2577 dend
= np
->n_size
- NBOFF(bp
);
2579 /* calculate range of complete pages being written */
2580 firstpg
= round_page_32(doff
) / PAGE_SIZE
;
2581 lastpg
= (trunc_page_32(dend
) - 1) / PAGE_SIZE
;
2582 /* calculate mask for that page range */
2583 pagemask
= ((1 << (lastpg
+ 1)) - 1) & ~((1 << firstpg
) - 1);
2586 * compare page mask to nb_dirty; if there are other dirty pages
2587 * then write FILESYNC; otherwise, write UNSTABLE if async and
2588 * not needcommit/stable; otherwise write FILESYNC
2590 if (bp
->nb_dirty
& ~pagemask
) {
2591 iomode
= NFS_WRITE_FILESYNC
;
2592 } else if ((bp
->nb_flags
& (NB_ASYNC
| NB_NEEDCOMMIT
| NB_STABLE
)) == NB_ASYNC
) {
2593 iomode
= NFS_WRITE_UNSTABLE
;
2595 iomode
= NFS_WRITE_FILESYNC
;
2598 /* write the whole contiguous dirty range */
2599 bp
->nb_offio
= doff
;
2600 bp
->nb_endio
= dend
;
2602 OSAddAtomic64(1, &nfsstats
.write_bios
);
2604 SET(bp
->nb_flags
, NB_WRITEINPROG
);
2605 error
= nfs_buf_write_rpc(bp
, iomode
, thd
, cred
);
2607 * For async I/O, the callbacks will finish up the
2608 * write and push out any dirty pages. Otherwise,
2609 * the write has already been finished and any dirty
2613 if (!error
&& bp
->nb_dirty
) { /* write out any dirty pages */
2614 error
= nfs_buf_write_dirty_pages(bp
, thd
, cred
);
2618 /* note: bp is still valid only for !async case */
2621 error
= nfs_buf_iowait(bp
);
2622 /* move to clean list */
2623 if (oldflags
& NB_DELWRI
) {
2624 lck_mtx_lock(nfs_buf_mutex
);
2625 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
2626 LIST_REMOVE(bp
, nb_vnbufs
);
2628 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
2629 lck_mtx_unlock(nfs_buf_mutex
);
2631 FSDBG_BOT(553, bp
, NBOFF(bp
), bp
->nb_flags
, error
);
2632 nfs_buf_release(bp
, 1);
2633 /* check if we need to invalidate (and we can) */
2634 if ((np
->n_flag
& NNEEDINVALIDATE
) &&
2635 !(np
->n_bflag
& (NBINVALINPROG
| NBFLUSHINPROG
))) {
2637 nfs_node_lock_force(np
);
2638 if (np
->n_flag
& NNEEDINVALIDATE
) {
2640 np
->n_flag
&= ~NNEEDINVALIDATE
;
2642 nfs_node_unlock(np
);
2645 * There was a write error and we need to
2646 * invalidate attrs and flush buffers in
2647 * order to sync up with the server.
2648 * (if this write was extending the file,
2649 * we may no longer know the correct size)
2651 * But we couldn't call vinvalbuf while holding
2652 * the buffer busy. So we call vinvalbuf() after
2653 * releasing the buffer.
2655 nfs_vinvalbuf2(NFSTOV(np
), V_SAVE
| V_IGNORE_WRITEERR
, thd
, cred
, 1);
2660 if (IS_VALID_CRED(cred
)) {
2661 kauth_cred_unref(&cred
);
2667 * finish the writing of a buffer
2670 nfs_buf_write_finish(struct nfsbuf
*bp
, thread_t thd
, kauth_cred_t cred
)
2672 nfsnode_t np
= bp
->nb_np
;
2673 int error
= (bp
->nb_flags
& NB_ERROR
) ? bp
->nb_error
: 0;
2674 int firstpg
, lastpg
;
2677 if ((error
== EINTR
) || (error
== ERESTART
)) {
2678 CLR(bp
->nb_flags
, NB_ERROR
);
2679 SET(bp
->nb_flags
, NB_EINTR
);
2683 /* calculate range of complete pages being written */
2684 firstpg
= round_page_32(bp
->nb_offio
) / PAGE_SIZE
;
2685 lastpg
= (trunc_page_32(bp
->nb_endio
) - 1) / PAGE_SIZE
;
2686 /* calculate mask for that page range written */
2687 pagemask
= ((1 << (lastpg
+ 1)) - 1) & ~((1 << firstpg
) - 1);
2688 /* clear dirty bits for pages we've written */
2689 bp
->nb_dirty
&= ~pagemask
;
2692 /* manage needcommit state */
2693 if (!error
&& (bp
->nb_commitlevel
== NFS_WRITE_UNSTABLE
)) {
2694 if (!ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
2695 nfs_node_lock_force(np
);
2696 np
->n_needcommitcnt
++;
2697 nfs_node_unlock(np
);
2698 SET(bp
->nb_flags
, NB_NEEDCOMMIT
);
2700 /* make sure nb_dirtyoff/nb_dirtyend reflect actual range written */
2701 bp
->nb_dirtyoff
= bp
->nb_offio
;
2702 bp
->nb_dirtyend
= bp
->nb_endio
;
2703 } else if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
2704 nfs_node_lock_force(np
);
2705 np
->n_needcommitcnt
--;
2706 CHECK_NEEDCOMMITCNT(np
);
2707 nfs_node_unlock(np
);
2708 CLR(bp
->nb_flags
, NB_NEEDCOMMIT
);
2711 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
2714 * For an unstable write, the buffer is still treated as dirty until
2715 * a commit (or stable (re)write) is performed. Buffers needing only
2716 * a commit are marked with the NB_DELWRI and NB_NEEDCOMMIT flags.
2718 * If the write was interrupted we set NB_EINTR. Don't set NB_ERROR
2719 * because that would cause the buffer to be dropped. The buffer is
2720 * still valid and simply needs to be written again.
2722 if ((error
== EINTR
) || (error
== ERESTART
) || (!error
&& (bp
->nb_flags
& NB_NEEDCOMMIT
))) {
2723 CLR(bp
->nb_flags
, NB_INVAL
);
2724 if (!ISSET(bp
->nb_flags
, NB_DELWRI
)) {
2725 SET(bp
->nb_flags
, NB_DELWRI
);
2726 lck_mtx_lock(nfs_buf_mutex
);
2729 lck_mtx_unlock(nfs_buf_mutex
);
2732 * Since for the NB_ASYNC case, we've reassigned the buffer to the
2733 * clean list, we have to reassign it back to the dirty one. Ugh.
2735 if (ISSET(bp
->nb_flags
, NB_ASYNC
)) {
2736 /* move to dirty list */
2737 lck_mtx_lock(nfs_buf_mutex
);
2738 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
2739 LIST_REMOVE(bp
, nb_vnbufs
);
2741 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
2742 lck_mtx_unlock(nfs_buf_mutex
);
2745 /* either there's an error or we don't need to commit */
2748 * There was a write error and we need to invalidate
2749 * attrs and flush buffers in order to sync up with the
2750 * server. (if this write was extending the file, we
2751 * may no longer know the correct size)
2753 * But we can't call vinvalbuf while holding this
2754 * buffer busy. Set a flag to do it after releasing
2757 nfs_node_lock_force(np
);
2758 np
->n_error
= error
;
2759 np
->n_flag
|= (NWRITEERR
| NNEEDINVALIDATE
);
2760 NATTRINVALIDATE(np
);
2761 nfs_node_unlock(np
);
2763 /* clear the dirty range */
2764 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
2767 if (!error
&& bp
->nb_dirty
) {
2768 nfs_buf_write_dirty_pages(bp
, thd
, cred
);
2774 * write out any pages marked dirty in a buffer
2776 * We do use unstable writes and follow up with a commit.
2777 * If we catch the write verifier changing we'll restart
2778 * do the writes filesync.
2781 nfs_buf_write_dirty_pages(struct nfsbuf
*bp
, thread_t thd
, kauth_cred_t cred
)
2783 nfsnode_t np
= bp
->nb_np
;
2784 struct nfsmount
*nmp
= NFSTONMP(np
);
2785 int error
= 0, commit
, iomode
, iomode2
, len
, pg
, count
, npages
, off
;
2786 uint32_t dirty
= bp
->nb_dirty
;
2789 char uio_buf
[UIO_SIZEOF(1)];
2791 if (!bp
->nb_dirty
) {
2795 /* there are pages marked dirty that need to be written out */
2796 OSAddAtomic64(1, &nfsstats
.write_bios
);
2798 SET(bp
->nb_flags
, NB_WRITEINPROG
);
2799 npages
= bp
->nb_bufsize
/ PAGE_SIZE
;
2800 iomode
= NFS_WRITE_UNSTABLE
;
2802 auio
= uio_createwithbuffer(1, 0, UIO_SYSSPACE
, UIO_WRITE
,
2803 &uio_buf
, sizeof(uio_buf
));
2806 dirty
= bp
->nb_dirty
;
2807 wverf
= bp
->nb_verf
;
2808 commit
= NFS_WRITE_FILESYNC
;
2809 for (pg
= 0; pg
< npages
; pg
++) {
2810 if (!NBPGDIRTY(bp
, pg
)) {
2814 while (((pg
+ count
) < npages
) && NBPGDIRTY(bp
, pg
+ count
)) {
2817 /* write count pages starting with page pg */
2818 off
= pg
* PAGE_SIZE
;
2819 len
= count
* PAGE_SIZE
;
2820 /* clip writes to EOF */
2821 if (NBOFF(bp
) + off
+ len
> (off_t
) np
->n_size
) {
2822 len
-= (NBOFF(bp
) + off
+ len
) - np
->n_size
;
2826 uio_reset(auio
, NBOFF(bp
) + off
, UIO_SYSSPACE
, UIO_WRITE
);
2827 uio_addiov(auio
, CAST_USER_ADDR_T(bp
->nb_data
+ off
), len
);
2828 error
= nfs_write_rpc2(np
, auio
, thd
, cred
, &iomode2
, &bp
->nb_verf
);
2832 if (iomode2
< commit
) { /* Retain the lowest commitment level returned. */
2835 if ((commit
!= NFS_WRITE_FILESYNC
) && (wverf
!= bp
->nb_verf
)) {
2836 /* verifier changed, redo all the writes filesync */
2837 iomode
= NFS_WRITE_FILESYNC
;
2841 /* clear dirty bits */
2843 dirty
&= ~(1 << pg
);
2844 if (count
) { /* leave pg on last page */
2849 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
2851 if (!error
&& (commit
!= NFS_WRITE_FILESYNC
)) {
2852 error
= nmp
->nm_funcs
->nf_commit_rpc(np
, NBOFF(bp
), bp
->nb_bufsize
, cred
, wverf
);
2853 if (error
== NFSERR_STALEWRITEVERF
) {
2854 /* verifier changed, so we need to restart all the writes */
2855 iomode
= NFS_WRITE_FILESYNC
;
2860 bp
->nb_dirty
= dirty
;
2862 SET(bp
->nb_flags
, NB_ERROR
);
2863 bp
->nb_error
= error
;
2869 * initiate the NFS WRITE RPC(s) for a buffer
2872 nfs_buf_write_rpc(struct nfsbuf
*bp
, int iomode
, thread_t thd
, kauth_cred_t cred
)
2874 struct nfsmount
*nmp
;
2875 nfsnode_t np
= bp
->nb_np
;
2876 int error
= 0, nfsvers
, async
;
2878 uint32_t nmwsize
, length
, len
;
2880 struct nfsreq_cbinfo cb
;
2882 char uio_buf
[UIO_SIZEOF(1)];
2885 if (nfs_mount_gone(nmp
)) {
2886 bp
->nb_error
= error
= ENXIO
;
2887 SET(bp
->nb_flags
, NB_ERROR
);
2891 nfsvers
= nmp
->nm_vers
;
2892 nmwsize
= nmp
->nm_wsize
;
2894 offset
= bp
->nb_offio
;
2895 length
= bp
->nb_endio
- bp
->nb_offio
;
2897 /* Note: Can only do async I/O if nfsiods are configured. */
2898 async
= (bp
->nb_flags
& NB_ASYNC
) && (NFSIOD_MAX
> 0);
2899 bp
->nb_commitlevel
= NFS_WRITE_FILESYNC
;
2900 cb
.rcb_func
= async
? nfs_buf_write_rpc_finish
: NULL
;
2903 if ((nfsvers
== NFS_VER2
) && ((NBOFF(bp
) + bp
->nb_endio
) > 0xffffffffLL
)) {
2904 bp
->nb_error
= error
= EFBIG
;
2905 SET(bp
->nb_flags
, NB_ERROR
);
2910 auio
= uio_createwithbuffer(1, NBOFF(bp
) + offset
, UIO_SYSSPACE
,
2911 UIO_WRITE
, &uio_buf
, sizeof(uio_buf
));
2912 uio_addiov(auio
, CAST_USER_ADDR_T(bp
->nb_data
+ offset
), length
);
2914 bp
->nb_rpcs
= nrpcs
= (length
+ nmwsize
- 1) / nmwsize
;
2915 if (async
&& (nrpcs
> 1)) {
2916 SET(bp
->nb_flags
, NB_MULTASYNCRPC
);
2918 CLR(bp
->nb_flags
, NB_MULTASYNCRPC
);
2921 while (length
> 0) {
2922 if (ISSET(bp
->nb_flags
, NB_ERROR
)) {
2923 error
= bp
->nb_error
;
2926 len
= (length
> nmwsize
) ? nmwsize
: length
;
2927 cb
.rcb_args
[0] = offset
;
2928 cb
.rcb_args
[1] = len
;
2930 if (nmp
->nm_vers
>= NFS_VER4
) {
2931 cb
.rcb_args
[2] = nmp
->nm_stategenid
;
2934 if (async
&& ((error
= nfs_async_write_start(nmp
)))) {
2938 error
= nmp
->nm_funcs
->nf_write_rpc_async(np
, auio
, len
, thd
, cred
,
2942 nfs_async_write_done(nmp
);
2951 nfs_buf_write_rpc_finish(req
);
2956 * Something bad happened while trying to send the RPCs.
2957 * Wait for any outstanding requests to complete.
2959 bp
->nb_error
= error
;
2960 SET(bp
->nb_flags
, NB_ERROR
);
2961 if (ISSET(bp
->nb_flags
, NB_MULTASYNCRPC
)) {
2962 nrpcs
= (length
+ nmwsize
- 1) / nmwsize
;
2963 lck_mtx_lock(nfs_buf_mutex
);
2964 bp
->nb_rpcs
-= nrpcs
;
2965 if (bp
->nb_rpcs
== 0) {
2966 /* No RPCs left, so the buffer's done */
2967 lck_mtx_unlock(nfs_buf_mutex
);
2968 nfs_buf_write_finish(bp
, thd
, cred
);
2970 /* wait for the last RPC to mark it done */
2971 while (bp
->nb_rpcs
> 0) {
2972 msleep(&bp
->nb_rpcs
, nfs_buf_mutex
, 0,
2973 "nfs_buf_write_rpc_cancel", NULL
);
2975 lck_mtx_unlock(nfs_buf_mutex
);
2978 nfs_buf_write_finish(bp
, thd
, cred
);
2980 /* It may have just been an interrupt... that's OK */
2981 if (!ISSET(bp
->nb_flags
, NB_ERROR
)) {
2990 * finish up an NFS WRITE RPC on a buffer
2993 nfs_buf_write_rpc_finish(struct nfsreq
*req
)
2995 int error
= 0, nfsvers
, offset
, length
, multasyncrpc
, finished
;
2996 int committed
= NFS_WRITE_FILESYNC
;
2999 void *wakeme
= NULL
;
3000 struct nfsreq_cbinfo cb
;
3001 struct nfsreq
*wreq
= NULL
;
3003 struct nfsmount
*nmp
;
3008 char uio_buf
[UIO_SIZEOF(1)];
3012 thd
= req
->r_thread
;
3014 if (IS_VALID_CRED(cred
)) {
3015 kauth_cred_ref(cred
);
3017 cb
= req
->r_callback
;
3019 if (cb
.rcb_func
) { /* take an extra reference on the nfsreq in case we want to resend it later due to grace error */
3020 nfs_request_ref(req
, 0);
3024 if (nfs_mount_gone(nmp
)) {
3025 SET(bp
->nb_flags
, NB_ERROR
);
3026 bp
->nb_error
= error
= ENXIO
;
3028 if (error
|| ISSET(bp
->nb_flags
, NB_ERROR
)) {
3030 nfs_request_async_cancel(req
);
3033 nfsvers
= nmp
->nm_vers
;
3035 offset
= cb
.rcb_args
[0];
3036 rlen
= length
= cb
.rcb_args
[1];
3038 /* finish the RPC */
3039 error
= nmp
->nm_funcs
->nf_write_rpc_async_finish(np
, req
, &committed
, &rlen
, &wverf
);
3040 if ((error
== EINPROGRESS
) && cb
.rcb_func
) {
3041 /* async request restarted */
3043 nfs_request_rele(req
);
3045 if (IS_VALID_CRED(cred
)) {
3046 kauth_cred_unref(&cred
);
3051 if ((nmp
->nm_vers
>= NFS_VER4
) && nfs_mount_state_error_should_restart(error
) && !ISSET(bp
->nb_flags
, NB_ERROR
)) {
3052 lck_mtx_lock(&nmp
->nm_lock
);
3053 if ((error
!= NFSERR_OLD_STATEID
) && (error
!= NFSERR_GRACE
) && (cb
.rcb_args
[2] == nmp
->nm_stategenid
)) {
3054 NP(np
, "nfs_buf_write_rpc_finish: error %d @ 0x%llx, 0x%x 0x%x, initiating recovery",
3055 error
, NBOFF(bp
) + offset
, cb
.rcb_args
[2], nmp
->nm_stategenid
);
3056 nfs_need_recover(nmp
, error
);
3058 lck_mtx_unlock(&nmp
->nm_lock
);
3059 if (np
->n_flag
& NREVOKE
) {
3062 if (error
== NFSERR_GRACE
) {
3065 * For an async I/O request, handle a grace delay just like
3066 * jukebox errors. Set the resend time and queue it up.
3069 if (req
->r_nmrep
.nmc_mhead
) {
3070 mbuf_freem(req
->r_nmrep
.nmc_mhead
);
3071 req
->r_nmrep
.nmc_mhead
= NULL
;
3075 lck_mtx_lock(&req
->r_mtx
);
3076 req
->r_resendtime
= now
.tv_sec
+ 2;
3077 req
->r_xid
= 0; // get a new XID
3078 req
->r_flags
|= R_RESTART
;
3080 nfs_asyncio_resend(req
);
3081 lck_mtx_unlock(&req
->r_mtx
);
3082 if (IS_VALID_CRED(cred
)) {
3083 kauth_cred_unref(&cred
);
3085 /* Note: nfsreq reference taken will be dropped later when finished */
3088 /* otherwise, just pause a couple seconds and retry */
3089 tsleep(&nmp
->nm_state
, (PZERO
- 1), "nfsgrace", 2 * hz
);
3091 if (!(error
= nfs_mount_state_wait_for_recovery(nmp
))) {
3099 SET(bp
->nb_flags
, NB_ERROR
);
3100 bp
->nb_error
= error
;
3102 if (error
|| (nfsvers
== NFS_VER2
)) {
3106 SET(bp
->nb_flags
, NB_ERROR
);
3107 bp
->nb_error
= error
= EIO
;
3111 /* save lowest commit level returned */
3112 if (committed
< bp
->nb_commitlevel
) {
3113 bp
->nb_commitlevel
= committed
;
3116 /* check the write verifier */
3118 bp
->nb_verf
= wverf
;
3119 } else if (bp
->nb_verf
!= wverf
) {
3120 /* verifier changed, so buffer will need to be rewritten */
3121 bp
->nb_flags
|= NB_STALEWVERF
;
3122 bp
->nb_commitlevel
= NFS_WRITE_UNSTABLE
;
3123 bp
->nb_verf
= wverf
;
3127 * check for a short write
3129 * If the server didn't write all the data, then we
3130 * need to issue another write for the rest of it.
3131 * (Don't bother if the buffer hit an error or stale wverf.)
3133 if (((int)rlen
< length
) && !(bp
->nb_flags
& (NB_STALEWVERF
| NB_ERROR
))) {
3140 auio
= uio_createwithbuffer(1, NBOFF(bp
) + offset
, UIO_SYSSPACE
,
3141 UIO_WRITE
, &uio_buf
, sizeof(uio_buf
));
3142 uio_addiov(auio
, CAST_USER_ADDR_T(bp
->nb_data
+ offset
), length
);
3144 cb
.rcb_args
[0] = offset
;
3145 cb
.rcb_args
[1] = length
;
3147 if (nmp
->nm_vers
>= NFS_VER4
) {
3148 cb
.rcb_args
[2] = nmp
->nm_stategenid
;
3151 // XXX iomode should really match the original request
3152 error
= nmp
->nm_funcs
->nf_write_rpc_async(np
, auio
, length
, thd
, cred
,
3153 NFS_WRITE_FILESYNC
, &cb
, &wreq
);
3155 if (IS_VALID_CRED(cred
)) {
3156 kauth_cred_unref(&cred
);
3159 /* if !async we'll need to wait for this RPC to finish */
3164 nfs_request_rele(req
);
3167 * Outstanding RPC count is unchanged.
3168 * Callback will be called when RPC is done.
3172 SET(bp
->nb_flags
, NB_ERROR
);
3173 bp
->nb_error
= error
;
3178 nfs_async_write_done(nmp
);
3179 nfs_request_rele(req
);
3182 * Decrement outstanding RPC count on buffer
3183 * and call nfs_buf_write_finish on last RPC.
3185 * (Note: when there are multiple async RPCs issued for a
3186 * buffer we need nfs_buffer_mutex to avoid problems when
3187 * aborting a partially-initiated set of RPCs)
3189 multasyncrpc
= ISSET(bp
->nb_flags
, NB_MULTASYNCRPC
);
3191 lck_mtx_lock(nfs_buf_mutex
);
3195 finished
= (bp
->nb_rpcs
== 0);
3198 lck_mtx_unlock(nfs_buf_mutex
);
3203 wakeme
= &bp
->nb_rpcs
;
3205 nfs_buf_write_finish(bp
, thd
, cred
);
3211 if (IS_VALID_CRED(cred
)) {
3212 kauth_cred_unref(&cred
);
3217 * Send commit(s) for the given node's "needcommit" buffers
3220 nfs_flushcommits(nfsnode_t np
, int nowait
)
3222 struct nfsmount
*nmp
;
3223 struct nfsbuf
*bp
, *prevlbp
, *lbp
;
3224 struct nfsbuflists blist
, commitlist
;
3225 int error
= 0, retv
, wcred_set
, flags
, dirty
;
3226 u_quad_t off
, endoff
, toff
;
3229 kauth_cred_t wcred
= NULL
;
3231 FSDBG_TOP(557, np
, 0, 0, 0);
3234 * A nb_flags == (NB_DELWRI | NB_NEEDCOMMIT) block has been written to the
3235 * server, but nas not been committed to stable storage on the server
3236 * yet. The byte range is worked out for as many nfsbufs as we can handle
3237 * and the commit rpc is done.
3239 if (!LIST_EMPTY(&np
->n_dirtyblkhd
)) {
3240 error
= nfs_node_lock(np
);
3244 np
->n_flag
|= NMODIFIED
;
3245 nfs_node_unlock(np
);
3251 LIST_INIT(&commitlist
);
3254 if (nfs_mount_gone(nmp
)) {
3258 if (nmp
->nm_vers
== NFS_VER2
) {
3265 flags
|= NBI_NOWAIT
;
3267 lck_mtx_lock(nfs_buf_mutex
);
3268 wverf
= nmp
->nm_verf
;
3269 if (!nfs_buf_iterprepare(np
, &blist
, flags
)) {
3270 while ((bp
= LIST_FIRST(&blist
))) {
3271 LIST_REMOVE(bp
, nb_vnbufs
);
3272 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
3273 error
= nfs_buf_acquire(bp
, NBAC_NOWAIT
, 0, 0);
3277 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
3278 nfs_buf_check_write_verifier(np
, bp
);
3280 if (((bp
->nb_flags
& (NB_DELWRI
| NB_NEEDCOMMIT
)) != (NB_DELWRI
| NB_NEEDCOMMIT
)) ||
3281 (bp
->nb_verf
!= wverf
)) {
3285 nfs_buf_remfree(bp
);
3287 /* buffer UPLs will be grabbed *in order* below */
3289 FSDBG(557, bp
, bp
->nb_flags
, bp
->nb_valid
, bp
->nb_dirty
);
3290 FSDBG(557, bp
->nb_validoff
, bp
->nb_validend
,
3291 bp
->nb_dirtyoff
, bp
->nb_dirtyend
);
3294 * Work out if all buffers are using the same cred
3295 * so we can deal with them all with one commit.
3297 * Note: creds in bp's must be obtained by kauth_cred_ref
3298 * on the same original cred in order for them to be equal.
3300 if (wcred_set
== 0) {
3301 wcred
= bp
->nb_wcred
;
3302 if (!IS_VALID_CRED(wcred
)) {
3303 panic("nfs: needcommit w/out wcred");
3306 } else if ((wcred_set
== 1) && wcred
!= bp
->nb_wcred
) {
3309 SET(bp
->nb_flags
, NB_WRITEINPROG
);
3312 * Add this buffer to the list of buffers we are committing.
3313 * Buffers are inserted into the list in ascending order so that
3314 * we can take the UPLs in order after the list is complete.
3317 LIST_FOREACH(lbp
, &commitlist
, nb_vnbufs
) {
3318 if (bp
->nb_lblkno
< lbp
->nb_lblkno
) {
3323 LIST_REMOVE(bp
, nb_vnbufs
);
3325 LIST_INSERT_AFTER(prevlbp
, bp
, nb_vnbufs
);
3327 LIST_INSERT_HEAD(&commitlist
, bp
, nb_vnbufs
);
3330 /* update commit range start, end */
3331 toff
= NBOFF(bp
) + bp
->nb_dirtyoff
;
3335 toff
+= (u_quad_t
)(bp
->nb_dirtyend
- bp
->nb_dirtyoff
);
3336 if (toff
> endoff
) {
3340 nfs_buf_itercomplete(np
, &blist
, NBI_DIRTY
);
3342 lck_mtx_unlock(nfs_buf_mutex
);
3344 if (LIST_EMPTY(&commitlist
)) {
3350 * We need a UPL to prevent others from accessing the buffers during
3351 * our commit RPC(s).
3353 * We used to also check for dirty pages here; if there were any we'd
3354 * abort the commit and force the entire buffer to be written again.
3355 * Instead of doing that, we just go ahead and commit the dirty range,
3356 * and then leave the buffer around with dirty pages that will be
3357 * written out later.
3359 LIST_FOREACH(bp
, &commitlist
, nb_vnbufs
) {
3360 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
3361 retv
= nfs_buf_upl_setup(bp
);
3363 /* Unable to create the UPL, the VM object probably no longer exists. */
3364 printf("nfs_flushcommits: upl create failed %d\n", retv
);
3365 bp
->nb_valid
= bp
->nb_dirty
= 0;
3368 nfs_buf_upl_check(bp
);
3372 * Commit data on the server, as required.
3373 * If all bufs are using the same wcred, then use that with
3374 * one call for all of them, otherwise commit each one
3377 if (wcred_set
== 1) {
3379 * Note, it's possible the commit range could be >2^32-1.
3380 * If it is, we'll send one commit that covers the whole file.
3382 if ((endoff
- off
) > 0xffffffff) {
3385 count
= (endoff
- off
);
3387 retv
= nmp
->nm_funcs
->nf_commit_rpc(np
, off
, count
, wcred
, wverf
);
3390 LIST_FOREACH(bp
, &commitlist
, nb_vnbufs
) {
3391 toff
= NBOFF(bp
) + bp
->nb_dirtyoff
;
3392 count
= bp
->nb_dirtyend
- bp
->nb_dirtyoff
;
3393 retv
= nmp
->nm_funcs
->nf_commit_rpc(np
, toff
, count
, bp
->nb_wcred
, wverf
);
3401 * Now, either mark the blocks I/O done or mark the
3402 * blocks dirty, depending on whether the commit
3405 while ((bp
= LIST_FIRST(&commitlist
))) {
3406 LIST_REMOVE(bp
, nb_vnbufs
);
3407 FSDBG(557, bp
, retv
, bp
->nb_flags
, bp
->nb_dirty
);
3408 nfs_node_lock_force(np
);
3409 CLR(bp
->nb_flags
, (NB_NEEDCOMMIT
| NB_WRITEINPROG
));
3410 np
->n_needcommitcnt
--;
3411 CHECK_NEEDCOMMITCNT(np
);
3412 nfs_node_unlock(np
);
3415 /* move back to dirty list */
3416 lck_mtx_lock(nfs_buf_mutex
);
3417 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
3418 lck_mtx_unlock(nfs_buf_mutex
);
3419 nfs_buf_release(bp
, 1);
3423 nfs_node_lock_force(np
);
3425 nfs_node_unlock(np
);
3426 vnode_startwrite(NFSTOV(np
));
3427 if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
3428 lck_mtx_lock(nfs_buf_mutex
);
3431 lck_mtx_unlock(nfs_buf_mutex
);
3432 wakeup(&nfs_nbdwrite
);
3434 CLR(bp
->nb_flags
, (NB_READ
| NB_DONE
| NB_ERROR
| NB_DELWRI
));
3435 /* if block still has dirty pages, we don't want it to */
3436 /* be released in nfs_buf_iodone(). So, don't set NB_ASYNC. */
3437 if (!(dirty
= bp
->nb_dirty
)) {
3438 SET(bp
->nb_flags
, NB_ASYNC
);
3440 CLR(bp
->nb_flags
, NB_ASYNC
);
3443 /* move to clean list */
3444 lck_mtx_lock(nfs_buf_mutex
);
3445 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
3446 lck_mtx_unlock(nfs_buf_mutex
);
3448 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
3452 /* throw it back in as a delayed write buffer */
3453 CLR(bp
->nb_flags
, NB_DONE
);
3454 nfs_buf_write_delayed(bp
);
3459 FSDBG_BOT(557, np
, 0, 0, error
);
3464 * Flush all the blocks associated with a vnode.
3465 * Walk through the buffer pool and push any dirty pages
3466 * associated with the vnode.
3469 nfs_flush(nfsnode_t np
, int waitfor
, thread_t thd
, int ignore_writeerr
)
3472 struct nfsbuflists blist
;
3473 struct nfsmount
*nmp
= NFSTONMP(np
);
3474 int error
= 0, error2
, slptimeo
= 0, slpflag
= 0;
3475 int nfsvers
, flags
, passone
= 1;
3477 FSDBG_TOP(517, np
, waitfor
, ignore_writeerr
, 0);
3479 if (nfs_mount_gone(nmp
)) {
3483 nfsvers
= nmp
->nm_vers
;
3484 if (NMFLAG(nmp
, INTR
)) {
3488 if (!LIST_EMPTY(&np
->n_dirtyblkhd
)) {
3489 nfs_node_lock_force(np
);
3490 np
->n_flag
|= NMODIFIED
;
3491 nfs_node_unlock(np
);
3494 lck_mtx_lock(nfs_buf_mutex
);
3495 while (np
->n_bflag
& NBFLUSHINPROG
) {
3496 np
->n_bflag
|= NBFLUSHWANT
;
3497 error
= msleep(&np
->n_bflag
, nfs_buf_mutex
, slpflag
, "nfs_flush", NULL
);
3498 if ((error
&& (error
!= EWOULDBLOCK
)) ||
3499 ((error
= nfs_sigintr(NFSTONMP(np
), NULL
, thd
, 0)))) {
3500 lck_mtx_unlock(nfs_buf_mutex
);
3504 np
->n_bflag
|= NBFLUSHINPROG
;
3507 * On the first pass, start async/unstable writes on all
3508 * delayed write buffers. Then wait for all writes to complete
3509 * and call nfs_flushcommits() to commit any uncommitted buffers.
3510 * On all subsequent passes, start STABLE writes on any remaining
3511 * dirty buffers. Then wait for all writes to complete.
3514 FSDBG(518, LIST_FIRST(&np
->n_dirtyblkhd
), np
->n_flag
, 0, 0);
3515 if (!NFSTONMP(np
)) {
3516 lck_mtx_unlock(nfs_buf_mutex
);
3521 /* Start/do any write(s) that are required. */
3522 if (!nfs_buf_iterprepare(np
, &blist
, NBI_DIRTY
)) {
3523 while ((bp
= LIST_FIRST(&blist
))) {
3524 LIST_REMOVE(bp
, nb_vnbufs
);
3525 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
3526 flags
= (passone
|| !(waitfor
== MNT_WAIT
|| waitfor
== MNT_DWAIT
)) ? NBAC_NOWAIT
: 0;
3527 if (flags
!= NBAC_NOWAIT
) {
3530 while ((error
= nfs_buf_acquire(bp
, flags
, slpflag
, slptimeo
))) {
3531 FSDBG(524, bp
, flags
, bp
->nb_lflags
, bp
->nb_flags
);
3532 if (error
== EBUSY
) {
3536 error2
= nfs_sigintr(NFSTONMP(np
), NULL
, thd
, 0);
3538 if (flags
!= NBAC_NOWAIT
) {
3539 nfs_buf_refrele(bp
);
3541 nfs_buf_itercomplete(np
, &blist
, NBI_DIRTY
);
3542 lck_mtx_unlock(nfs_buf_mutex
);
3546 if (slpflag
== PCATCH
) {
3552 if (flags
!= NBAC_NOWAIT
) {
3553 nfs_buf_refrele(bp
);
3555 if (error
== EBUSY
) {
3559 /* buffer is no longer valid */
3563 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
3564 nfs_buf_check_write_verifier(np
, bp
);
3566 if (!ISSET(bp
->nb_flags
, NB_DELWRI
)) {
3567 /* buffer is no longer dirty */
3571 FSDBG(525, bp
, passone
, bp
->nb_lflags
, bp
->nb_flags
);
3572 if ((passone
|| !(waitfor
== MNT_WAIT
|| waitfor
== MNT_DWAIT
)) &&
3573 ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
3577 nfs_buf_remfree(bp
);
3578 lck_mtx_unlock(nfs_buf_mutex
);
3579 if (ISSET(bp
->nb_flags
, NB_ERROR
)) {
3580 nfs_node_lock_force(np
);
3581 np
->n_error
= bp
->nb_error
? bp
->nb_error
: EIO
;
3582 np
->n_flag
|= NWRITEERR
;
3583 nfs_node_unlock(np
);
3584 nfs_buf_release(bp
, 1);
3585 lck_mtx_lock(nfs_buf_mutex
);
3588 SET(bp
->nb_flags
, NB_ASYNC
);
3590 /* NB_STABLE forces this to be written FILESYNC */
3591 SET(bp
->nb_flags
, NB_STABLE
);
3594 lck_mtx_lock(nfs_buf_mutex
);
3596 nfs_buf_itercomplete(np
, &blist
, NBI_DIRTY
);
3598 lck_mtx_unlock(nfs_buf_mutex
);
3600 if (waitfor
== MNT_WAIT
|| waitfor
== MNT_DWAIT
) {
3601 while ((error
= vnode_waitforwrites(NFSTOV(np
), 0, slpflag
, slptimeo
, "nfsflush"))) {
3602 error2
= nfs_sigintr(NFSTONMP(np
), NULL
, thd
, 0);
3607 if (slpflag
== PCATCH
) {
3614 if (nfsvers
!= NFS_VER2
) {
3615 /* loop while it looks like there are still buffers to be */
3616 /* commited and nfs_flushcommits() seems to be handling them. */
3617 while (np
->n_needcommitcnt
) {
3618 if (nfs_flushcommits(np
, 0)) {
3626 if (!LIST_EMPTY(&np
->n_dirtyblkhd
)) {
3627 nfs_node_lock_force(np
);
3628 np
->n_flag
|= NMODIFIED
;
3629 nfs_node_unlock(np
);
3631 lck_mtx_lock(nfs_buf_mutex
);
3635 if (waitfor
== MNT_WAIT
|| waitfor
== MNT_DWAIT
) {
3636 if (!LIST_EMPTY(&np
->n_dirtyblkhd
)) {
3637 nfs_node_lock_force(np
);
3638 np
->n_flag
|= NMODIFIED
;
3639 nfs_node_unlock(np
);
3641 lck_mtx_lock(nfs_buf_mutex
);
3642 if (!LIST_EMPTY(&np
->n_dirtyblkhd
)) {
3645 lck_mtx_unlock(nfs_buf_mutex
);
3646 nfs_node_lock_force(np
);
3648 * OK, it looks like there are no dirty blocks. If we have no
3649 * writes in flight and no one in the write code, we can clear
3650 * the modified flag. In order to make sure we see the latest
3651 * attributes and size, we also invalidate the attributes and
3652 * advance the attribute cache XID to guarantee that attributes
3653 * newer than our clearing of NMODIFIED will get loaded next.
3654 * (If we don't do this, it's possible for the flush's final
3655 * write/commit (xid1) to be executed in parallel with a subsequent
3656 * getattr request (xid2). The getattr could return attributes
3657 * from *before* the write/commit completed but the stale attributes
3658 * would be preferred because of the xid ordering.)
3660 if (!np
->n_wrbusy
&& !np
->n_numoutput
) {
3661 np
->n_flag
&= ~NMODIFIED
;
3662 NATTRINVALIDATE(np
);
3663 nfs_get_xid(&np
->n_xid
);
3666 nfs_node_lock_force(np
);
3669 FSDBG(526, np
->n_flag
, np
->n_error
, 0, 0);
3670 if (!ignore_writeerr
&& (np
->n_flag
& NWRITEERR
)) {
3671 error
= np
->n_error
;
3672 np
->n_flag
&= ~NWRITEERR
;
3674 nfs_node_unlock(np
);
3676 lck_mtx_lock(nfs_buf_mutex
);
3677 flags
= np
->n_bflag
;
3678 np
->n_bflag
&= ~(NBFLUSHINPROG
| NBFLUSHWANT
);
3679 lck_mtx_unlock(nfs_buf_mutex
);
3680 if (flags
& NBFLUSHWANT
) {
3681 wakeup(&np
->n_bflag
);
3684 FSDBG_BOT(517, np
, error
, ignore_writeerr
, 0);
3689 * Flush out and invalidate all buffers associated with a vnode.
3690 * Called with the underlying object locked.
3693 nfs_vinvalbuf_internal(
3702 struct nfsbuflists blist
;
3703 int list
, error
= 0;
3705 if (flags
& V_SAVE
) {
3706 if ((error
= nfs_flush(np
, MNT_WAIT
, thd
, (flags
& V_IGNORE_WRITEERR
)))) {
3711 lck_mtx_lock(nfs_buf_mutex
);
3714 if (nfs_buf_iterprepare(np
, &blist
, list
)) {
3716 if (nfs_buf_iterprepare(np
, &blist
, list
)) {
3720 while ((bp
= LIST_FIRST(&blist
))) {
3721 LIST_REMOVE(bp
, nb_vnbufs
);
3722 if (list
== NBI_CLEAN
) {
3723 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
3725 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
3728 while ((error
= nfs_buf_acquire(bp
, NBAC_REMOVE
, slpflag
, slptimeo
))) {
3729 FSDBG(556, np
, bp
, NBOFF(bp
), bp
->nb_flags
);
3730 if (error
!= EAGAIN
) {
3731 FSDBG(554, np
, bp
, -1, error
);
3732 nfs_buf_refrele(bp
);
3733 nfs_buf_itercomplete(np
, &blist
, list
);
3734 lck_mtx_unlock(nfs_buf_mutex
);
3738 nfs_buf_refrele(bp
);
3739 FSDBG(554, np
, bp
, NBOFF(bp
), bp
->nb_flags
);
3740 lck_mtx_unlock(nfs_buf_mutex
);
3741 if ((flags
& V_SAVE
) && UBCINFOEXISTS(NFSTOV(np
)) && bp
->nb_np
&&
3742 (NBOFF(bp
) < (off_t
)np
->n_size
)) {
3743 /* extra paranoia: make sure we're not */
3744 /* somehow leaving any dirty data around */
3746 int end
= (NBOFF(bp
) + bp
->nb_bufsize
> (off_t
)np
->n_size
) ?
3747 ((off_t
)np
->n_size
- NBOFF(bp
)) : bp
->nb_bufsize
;
3748 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
3749 error
= nfs_buf_upl_setup(bp
);
3750 if (error
== EINVAL
) {
3751 /* vm object must no longer exist */
3752 /* hopefully we don't need to do */
3753 /* anything for this buffer */
3755 printf("nfs_vinvalbuf: upl setup failed %d\n", error
);
3757 bp
->nb_valid
= bp
->nb_dirty
= 0;
3759 nfs_buf_upl_check(bp
);
3760 /* check for any dirty data before the EOF */
3761 if ((bp
->nb_dirtyend
> 0) && (bp
->nb_dirtyoff
< end
)) {
3762 /* clip dirty range to EOF */
3763 if (bp
->nb_dirtyend
> end
) {
3764 bp
->nb_dirtyend
= end
;
3765 if (bp
->nb_dirtyoff
>= bp
->nb_dirtyend
) {
3766 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
3769 if ((bp
->nb_dirtyend
> 0) && (bp
->nb_dirtyoff
< end
)) {
3773 bp
->nb_dirty
&= (1 << (round_page_32(end
) / PAGE_SIZE
)) - 1;
3777 /* also make sure we'll have a credential to do the write */
3778 if (mustwrite
&& !IS_VALID_CRED(bp
->nb_wcred
) && !IS_VALID_CRED(cred
)) {
3779 printf("nfs_vinvalbuf: found dirty buffer with no write creds\n");
3783 FSDBG(554, np
, bp
, 0xd00dee, bp
->nb_flags
);
3784 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
3785 panic("nfs_vinvalbuf: dirty buffer without upl");
3787 /* gotta write out dirty data before invalidating */
3788 /* (NB_STABLE indicates that data writes should be FILESYNC) */
3789 /* (NB_NOCACHE indicates buffer should be discarded) */
3790 CLR(bp
->nb_flags
, (NB_DONE
| NB_ERROR
| NB_INVAL
| NB_ASYNC
));
3791 SET(bp
->nb_flags
, NB_STABLE
| NB_NOCACHE
);
3792 if (!IS_VALID_CRED(bp
->nb_wcred
)) {
3793 kauth_cred_ref(cred
);
3794 bp
->nb_wcred
= cred
;
3796 error
= nfs_buf_write(bp
);
3797 // Note: bp has been released
3799 FSDBG(554, bp
, 0xd00dee, 0xbad, error
);
3800 nfs_node_lock_force(np
);
3801 if ((error
!= EINTR
) && (error
!= ERESTART
)) {
3802 np
->n_error
= error
;
3803 np
->n_flag
|= NWRITEERR
;
3806 * There was a write error and we need to
3807 * invalidate attrs to sync with server.
3808 * (if this write was extending the file,
3809 * we may no longer know the correct size)
3811 NATTRINVALIDATE(np
);
3812 nfs_node_unlock(np
);
3813 if ((error
== EINTR
) || (error
== ERESTART
)) {
3815 * Abort on EINTR. If we don't, we could
3816 * be stuck in this loop forever because
3817 * the buffer will continue to stay dirty.
3819 lck_mtx_lock(nfs_buf_mutex
);
3820 nfs_buf_itercomplete(np
, &blist
, list
);
3821 lck_mtx_unlock(nfs_buf_mutex
);
3826 lck_mtx_lock(nfs_buf_mutex
);
3830 SET(bp
->nb_flags
, NB_INVAL
);
3831 // hold off on FREEUPs until we're done here
3832 nfs_buf_release(bp
, 0);
3833 lck_mtx_lock(nfs_buf_mutex
);
3835 nfs_buf_itercomplete(np
, &blist
, list
);
3837 if (!LIST_EMPTY(&(np
)->n_dirtyblkhd
) || !LIST_EMPTY(&(np
)->n_cleanblkhd
)) {
3838 panic("nfs_vinvalbuf: flush/inval failed");
3840 lck_mtx_unlock(nfs_buf_mutex
);
3841 nfs_node_lock_force(np
);
3842 if (!(flags
& V_SAVE
)) {
3843 np
->n_flag
&= ~NMODIFIED
;
3845 if (vnode_vtype(NFSTOV(np
)) == VREG
) {
3846 np
->n_lastrahead
= -1;
3848 nfs_node_unlock(np
);
3855 * Flush and invalidate all dirty buffers. If another process is already
3856 * doing the flush, just wait for completion.
3859 nfs_vinvalbuf(vnode_t vp
, int flags
, vfs_context_t ctx
, int intrflg
)
3861 return nfs_vinvalbuf2(vp
, flags
, vfs_context_thread(ctx
), vfs_context_ucred(ctx
), intrflg
);
3865 nfs_vinvalbuf2(vnode_t vp
, int flags
, thread_t thd
, kauth_cred_t cred
, int intrflg
)
3867 nfsnode_t np
= VTONFS(vp
);
3868 struct nfsmount
*nmp
= VTONMP(vp
);
3869 int error
, slpflag
, slptimeo
, nflags
, retry
= 0;
3870 int ubcflags
= UBC_PUSHALL
| UBC_SYNC
| UBC_INVALIDATE
;
3871 struct timespec ts
= { .tv_sec
= 2, .tv_nsec
= 0 };
3874 FSDBG_TOP(554, np
, flags
, intrflg
, 0);
3877 * If the mount is gone no sense to try and write anything.
3878 * and hang trying to do IO.
3880 if (nfs_mount_gone(nmp
)) {
3882 ubcflags
&= ~UBC_PUSHALL
;
3885 if (nmp
&& !NMFLAG(nmp
, INTR
)) {
3896 /* First wait for any other process doing a flush to complete. */
3897 lck_mtx_lock(nfs_buf_mutex
);
3898 while (np
->n_bflag
& NBINVALINPROG
) {
3899 np
->n_bflag
|= NBINVALWANT
;
3900 msleep(&np
->n_bflag
, nfs_buf_mutex
, slpflag
, "nfs_vinvalbuf", &ts
);
3901 if ((error
= nfs_sigintr(VTONMP(vp
), NULL
, thd
, 0))) {
3902 lck_mtx_unlock(nfs_buf_mutex
);
3905 if (np
->n_bflag
& NBINVALINPROG
) {
3909 np
->n_bflag
|= NBINVALINPROG
;
3910 lck_mtx_unlock(nfs_buf_mutex
);
3912 /* Now, flush as required. */
3914 error
= nfs_vinvalbuf_internal(np
, flags
, thd
, cred
, slpflag
, 0);
3916 FSDBG(554, np
, 0, 0, error
);
3917 if ((error
= nfs_sigintr(VTONMP(vp
), NULL
, thd
, 0))) {
3920 error
= nfs_vinvalbuf_internal(np
, flags
, thd
, cred
, 0, slptimeo
);
3923 /* get the pages out of vm also */
3924 if (UBCINFOEXISTS(vp
) && (size
= ubc_getsize(vp
))) {
3925 if ((error
= ubc_msync(vp
, 0, size
, NULL
, ubcflags
))) {
3926 if (error
== EINVAL
) {
3927 panic("nfs_vinvalbuf(): ubc_msync failed!, error %d", error
);
3929 if (retry
++ < 10) { /* retry invalidating a few times */
3930 if (retry
> 1 || error
== ENXIO
) {
3931 ubcflags
&= ~UBC_PUSHALL
;
3936 printf("nfs_vinvalbuf(): ubc_msync failed!, error %d\n", error
);
3940 lck_mtx_lock(nfs_buf_mutex
);
3941 nflags
= np
->n_bflag
;
3942 np
->n_bflag
&= ~(NBINVALINPROG
| NBINVALWANT
);
3943 lck_mtx_unlock(nfs_buf_mutex
);
3944 if (nflags
& NBINVALWANT
) {
3945 wakeup(&np
->n_bflag
);
3948 FSDBG_BOT(554, np
, flags
, intrflg
, error
);
3953 * Wait for any busy buffers to complete.
3956 nfs_wait_bufs(nfsnode_t np
)
3959 struct nfsbuflists blist
;
3962 lck_mtx_lock(nfs_buf_mutex
);
3963 if (!nfs_buf_iterprepare(np
, &blist
, NBI_CLEAN
)) {
3964 while ((bp
= LIST_FIRST(&blist
))) {
3965 LIST_REMOVE(bp
, nb_vnbufs
);
3966 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
3968 while ((error
= nfs_buf_acquire(bp
, 0, 0, 0))) {
3969 if (error
!= EAGAIN
) {
3970 nfs_buf_refrele(bp
);
3971 nfs_buf_itercomplete(np
, &blist
, NBI_CLEAN
);
3972 lck_mtx_unlock(nfs_buf_mutex
);
3976 nfs_buf_refrele(bp
);
3979 nfs_buf_itercomplete(np
, &blist
, NBI_CLEAN
);
3981 if (!nfs_buf_iterprepare(np
, &blist
, NBI_DIRTY
)) {
3982 while ((bp
= LIST_FIRST(&blist
))) {
3983 LIST_REMOVE(bp
, nb_vnbufs
);
3984 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
3986 while ((error
= nfs_buf_acquire(bp
, 0, 0, 0))) {
3987 if (error
!= EAGAIN
) {
3988 nfs_buf_refrele(bp
);
3989 nfs_buf_itercomplete(np
, &blist
, NBI_DIRTY
);
3990 lck_mtx_unlock(nfs_buf_mutex
);
3994 nfs_buf_refrele(bp
);
3997 nfs_buf_itercomplete(np
, &blist
, NBI_DIRTY
);
3999 lck_mtx_unlock(nfs_buf_mutex
);
4004 * Add an async I/O request to the mount's async I/O queue and make
4005 * sure that an nfsiod will service it.
4008 nfs_asyncio_finish(struct nfsreq
*req
)
4010 struct nfsmount
*nmp
;
4011 struct nfsiod
*niod
;
4014 FSDBG_TOP(552, nmp
, 0, 0, 0);
4022 lck_mtx_lock(nfsiod_mutex
);
4023 niod
= nmp
->nm_niod
;
4025 /* grab an nfsiod if we don't have one already */
4027 niod
= TAILQ_FIRST(&nfsiodfree
);
4029 TAILQ_REMOVE(&nfsiodfree
, niod
, niod_link
);
4030 TAILQ_INSERT_TAIL(&nfsiodwork
, niod
, niod_link
);
4031 niod
->niod_nmp
= nmp
;
4032 } else if (((nfsiod_thread_count
< NFSIOD_MAX
) || (nfsiod_thread_count
<= 0)) && (started
< 4)) {
4034 * Try starting a new thread.
4035 * We may try a couple times if other callers
4036 * get the new threads before we do.
4038 lck_mtx_unlock(nfsiod_mutex
);
4040 if (!nfsiod_start()) {
4043 lck_mtx_lock(nfsiod_mutex
);
4048 * If we got here while being on the resendq we need to get off. This
4049 * happens when the timer fires and errors out requests from nfs_sigintr
4050 * or we receive a reply (UDP case) while being on the resend queue so
4051 * we're just finishing up and are not going to be resent.
4053 lck_mtx_lock(&req
->r_mtx
);
4054 if (req
->r_flags
& R_RESENDQ
) {
4055 lck_mtx_lock(&nmp
->nm_lock
);
4056 if (req
->r_rchain
.tqe_next
!= NFSREQNOLIST
) {
4057 NFS_BIO_DBG("Proccessing async request on resendq. Removing");
4058 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
4059 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
4060 assert(req
->r_refs
> 1);
4061 /* Remove resendq reference */
4064 lck_mtx_unlock(&nmp
->nm_lock
);
4065 req
->r_flags
&= ~R_RESENDQ
;
4067 lck_mtx_unlock(&req
->r_mtx
);
4069 if (req
->r_achain
.tqe_next
== NFSREQNOLIST
) {
4070 TAILQ_INSERT_TAIL(&nmp
->nm_iodq
, req
, r_achain
);
4073 /* If this mount doesn't already have an nfsiod working on it... */
4074 if (!nmp
->nm_niod
) {
4075 if (niod
) { /* give it the nfsiod we just grabbed */
4076 nmp
->nm_niod
= niod
;
4077 lck_mtx_unlock(nfsiod_mutex
);
4079 } else if (nfsiod_thread_count
> 0) {
4080 /* just queue it up on nfsiod mounts queue if needed */
4081 if (nmp
->nm_iodlink
.tqe_next
== NFSNOLIST
) {
4082 TAILQ_INSERT_TAIL(&nfsiodmounts
, nmp
, nm_iodlink
);
4084 lck_mtx_unlock(nfsiod_mutex
);
4086 printf("nfs_asyncio(): no nfsiods? %d %d (%d)\n", nfsiod_thread_count
, NFSIOD_MAX
, started
);
4087 lck_mtx_unlock(nfsiod_mutex
);
4088 /* we have no other option but to be persistent */
4093 lck_mtx_unlock(nfsiod_mutex
);
4096 FSDBG_BOT(552, nmp
, 0, 0, 0);
4100 * queue up async I/O request for resend
4103 nfs_asyncio_resend(struct nfsreq
*req
)
4105 struct nfsmount
*nmp
= req
->r_nmp
;
4107 if (nfs_mount_gone(nmp
)) {
4112 nfs_gss_clnt_rpcdone(req
);
4114 lck_mtx_lock(&nmp
->nm_lock
);
4115 if (!(req
->r_flags
& R_RESENDQ
)) {
4116 TAILQ_INSERT_TAIL(&nmp
->nm_resendq
, req
, r_rchain
);
4117 req
->r_flags
|= R_RESENDQ
;
4119 * We take a reference on this request so that it can't be
4120 * destroyed while a resend is queued or in progress.
4122 nfs_request_ref(req
, 1);
4124 nfs_mount_sock_thread_wake(nmp
);
4125 lck_mtx_unlock(&nmp
->nm_lock
);
4129 * Read directory data into a buffer.
4131 * Buffer will be filled (unless EOF is hit).
4132 * Buffers after this one may also be completely/partially filled.
4135 nfs_buf_readdir(struct nfsbuf
*bp
, vfs_context_t ctx
)
4137 nfsnode_t np
= bp
->nb_np
;
4138 struct nfsmount
*nmp
= NFSTONMP(np
);
4141 if (nfs_mount_gone(nmp
)) {
4145 if (nmp
->nm_vers
< NFS_VER4
) {
4146 error
= nfs3_readdir_rpc(np
, bp
, ctx
);
4150 error
= nfs4_readdir_rpc(np
, bp
, ctx
);
4153 if (error
&& (error
!= NFSERR_DIRBUFDROPPED
)) {
4154 SET(bp
->nb_flags
, NB_ERROR
);
4155 bp
->nb_error
= error
;
4160 #endif /* CONFIG_NFS_CLIENT */