2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/resourcevar.h>
70 #include <sys/signalvar.h>
71 #include <sys/proc_internal.h>
72 #include <sys/kauth.h>
73 #include <sys/malloc.h>
74 #include <sys/vnode.h>
75 #include <sys/dirent.h>
76 #include <sys/mount_internal.h>
77 #include <sys/kernel.h>
78 #include <sys/ubc_internal.h>
79 #include <sys/uio_internal.h>
80 #include <sys/kpi_mbuf.h>
83 #include <sys/vmparam.h>
86 #include <kern/clock.h>
87 #include <libkern/OSAtomic.h>
88 #include <kern/kalloc.h>
89 #include <kern/thread_call.h>
91 #include <nfs/rpcv2.h>
92 #include <nfs/nfsproto.h>
94 #include <nfs/nfs_gss.h>
95 #include <nfs/nfsmount.h>
96 #include <nfs/nfsnode.h>
97 #include <sys/buf_internal.h>
98 #include <libkern/OSAtomic.h>
100 #define NFS_BIO_DBG(...) NFS_DBG(NFS_FAC_BIO, 7, ## __VA_ARGS__)
102 kern_return_t
thread_terminate(thread_t
); /* XXX */
104 #define NFSBUFHASH(np, lbn) \
105 (&nfsbufhashtbl[((long)(np) / sizeof(*(np)) + (int)(lbn)) & nfsbufhash])
106 LIST_HEAD(nfsbufhashhead
, nfsbuf
) * nfsbufhashtbl
;
107 struct nfsbuffreehead nfsbuffree
, nfsbuffreemeta
, nfsbufdelwri
;
109 int nfsbufcnt
, nfsbufmin
, nfsbufmax
, nfsbufmetacnt
, nfsbufmetamax
;
110 int nfsbuffreecnt
, nfsbuffreemetacnt
, nfsbufdelwricnt
, nfsneedbuffer
;
112 int nfs_buf_timer_on
= 0;
113 thread_t nfsbufdelwrithd
= NULL
;
115 lck_grp_t
*nfs_buf_lck_grp
;
116 lck_mtx_t
*nfs_buf_mutex
;
118 #define NFSBUF_FREE_PERIOD 30 /* seconds */
119 #define NFSBUF_LRU_STALE 120
120 #define NFSBUF_META_STALE 240
122 /* number of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffree list */
123 #define LRU_TO_FREEUP 6
124 /* number of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffreemeta list */
125 #define META_TO_FREEUP 3
126 /* total number of nfsbufs nfs_buf_freeup() should attempt to free */
127 #define TOTAL_TO_FREEUP (LRU_TO_FREEUP+META_TO_FREEUP)
128 /* fraction of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffree list when called from timer */
129 #define LRU_FREEUP_FRAC_ON_TIMER 8
130 /* fraction of nfsbufs nfs_buf_freeup() should attempt to free from nfsbuffreemeta list when called from timer */
131 #define META_FREEUP_FRAC_ON_TIMER 16
132 /* fraction of total nfsbufs that nfsbuffreecnt should exceed before bothering to call nfs_buf_freeup() */
133 #define LRU_FREEUP_MIN_FRAC 4
134 /* fraction of total nfsbufs that nfsbuffreemetacnt should exceed before bothering to call nfs_buf_freeup() */
135 #define META_FREEUP_MIN_FRAC 2
137 #define NFS_BUF_FREEUP() \
139 /* only call nfs_buf_freeup() if it has work to do: */ \
140 if (((nfsbuffreecnt > nfsbufcnt/LRU_FREEUP_MIN_FRAC) || \
141 (nfsbuffreemetacnt > nfsbufcnt/META_FREEUP_MIN_FRAC)) && \
142 ((nfsbufcnt - TOTAL_TO_FREEUP) > nfsbufmin)) \
147 * Initialize nfsbuf lists
152 nfs_buf_lck_grp
= lck_grp_alloc_init("nfs_buf", LCK_GRP_ATTR_NULL
);
153 nfs_buf_mutex
= lck_mtx_alloc_init(nfs_buf_lck_grp
, LCK_ATTR_NULL
);
155 nfsbufcnt
= nfsbufmetacnt
=
156 nfsbuffreecnt
= nfsbuffreemetacnt
= nfsbufdelwricnt
= 0;
158 /* size nfsbufmax to cover at most half sane_size (w/default buf size) */
159 nfsbufmax
= (sane_size
>> PAGE_SHIFT
) / (2 * (NFS_RWSIZE
>> PAGE_SHIFT
));
160 nfsbufmetamax
= nfsbufmax
/ 4;
164 nfsbufhashtbl
= hashinit(nfsbufmax
/ 4, M_TEMP
, &nfsbufhash
);
165 TAILQ_INIT(&nfsbuffree
);
166 TAILQ_INIT(&nfsbuffreemeta
);
167 TAILQ_INIT(&nfsbufdelwri
);
171 * Check periodically for stale/unused nfs bufs
174 nfs_buf_timer(__unused
void *param0
, __unused
void *param1
)
178 lck_mtx_lock(nfs_buf_mutex
);
179 if (nfsbufcnt
<= nfsbufmin
) {
180 nfs_buf_timer_on
= 0;
181 lck_mtx_unlock(nfs_buf_mutex
);
184 lck_mtx_unlock(nfs_buf_mutex
);
186 nfs_interval_timer_start(nfs_buf_timer_call
,
187 NFSBUF_FREE_PERIOD
* 1000);
191 * try to free up some excess, unused nfsbufs
194 nfs_buf_freeup(int timer
)
199 struct nfsbuffreehead nfsbuffreeup
;
201 TAILQ_INIT(&nfsbuffreeup
);
203 lck_mtx_lock(nfs_buf_mutex
);
207 FSDBG(320, nfsbufcnt
, nfsbuffreecnt
, nfsbuffreemetacnt
, 0);
209 count
= timer
? nfsbuffreecnt
/ LRU_FREEUP_FRAC_ON_TIMER
: LRU_TO_FREEUP
;
210 while ((nfsbufcnt
> nfsbufmin
) && (count
-- > 0)) {
211 fbp
= TAILQ_FIRST(&nfsbuffree
);
218 if (NBUFSTAMPVALID(fbp
) &&
219 (fbp
->nb_timestamp
+ (2 * NFSBUF_LRU_STALE
)) > now
.tv_sec
) {
222 nfs_buf_remfree(fbp
);
223 /* disassociate buffer from any nfsnode */
225 if (fbp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
226 LIST_REMOVE(fbp
, nb_vnbufs
);
227 fbp
->nb_vnbufs
.le_next
= NFSNOLIST
;
231 LIST_REMOVE(fbp
, nb_hash
);
232 TAILQ_INSERT_TAIL(&nfsbuffreeup
, fbp
, nb_free
);
236 count
= timer
? nfsbuffreemetacnt
/ META_FREEUP_FRAC_ON_TIMER
: META_TO_FREEUP
;
237 while ((nfsbufcnt
> nfsbufmin
) && (count
-- > 0)) {
238 fbp
= TAILQ_FIRST(&nfsbuffreemeta
);
245 if (NBUFSTAMPVALID(fbp
) &&
246 (fbp
->nb_timestamp
+ (2 * NFSBUF_META_STALE
)) > now
.tv_sec
) {
249 nfs_buf_remfree(fbp
);
250 /* disassociate buffer from any nfsnode */
252 if (fbp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
253 LIST_REMOVE(fbp
, nb_vnbufs
);
254 fbp
->nb_vnbufs
.le_next
= NFSNOLIST
;
258 LIST_REMOVE(fbp
, nb_hash
);
259 TAILQ_INSERT_TAIL(&nfsbuffreeup
, fbp
, nb_free
);
264 FSDBG(320, nfsbufcnt
, nfsbuffreecnt
, nfsbuffreemetacnt
, 0);
267 lck_mtx_unlock(nfs_buf_mutex
);
269 while ((fbp
= TAILQ_FIRST(&nfsbuffreeup
))) {
270 TAILQ_REMOVE(&nfsbuffreeup
, fbp
, nb_free
);
272 if (IS_VALID_CRED(fbp
->nb_rcred
)) {
273 kauth_cred_unref(&fbp
->nb_rcred
);
275 if (IS_VALID_CRED(fbp
->nb_wcred
)) {
276 kauth_cred_unref(&fbp
->nb_wcred
);
278 /* if buf was NB_META, dump buffer */
279 if (ISSET(fbp
->nb_flags
, NB_META
) && fbp
->nb_data
) {
280 kfree(fbp
->nb_data
, fbp
->nb_bufsize
);
287 * remove a buffer from the freelist
288 * (must be called with nfs_buf_mutex held)
291 nfs_buf_remfree(struct nfsbuf
*bp
)
293 if (bp
->nb_free
.tqe_next
== NFSNOLIST
) {
294 panic("nfsbuf not on free list");
296 if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
298 TAILQ_REMOVE(&nfsbufdelwri
, bp
, nb_free
);
299 } else if (ISSET(bp
->nb_flags
, NB_META
)) {
301 TAILQ_REMOVE(&nfsbuffreemeta
, bp
, nb_free
);
304 TAILQ_REMOVE(&nfsbuffree
, bp
, nb_free
);
306 bp
->nb_free
.tqe_next
= NFSNOLIST
;
311 * check for existence of nfsbuf in cache
314 nfs_buf_is_incore(nfsnode_t np
, daddr64_t blkno
)
317 lck_mtx_lock(nfs_buf_mutex
);
318 if (nfs_buf_incore(np
, blkno
)) {
323 lck_mtx_unlock(nfs_buf_mutex
);
328 * return incore buffer (must be called with nfs_buf_mutex held)
331 nfs_buf_incore(nfsnode_t np
, daddr64_t blkno
)
333 /* Search hash chain */
334 struct nfsbuf
* bp
= NFSBUFHASH(np
, blkno
)->lh_first
;
335 for (; bp
!= NULL
; bp
= bp
->nb_hash
.le_next
) {
336 if ((bp
->nb_lblkno
== blkno
) && (bp
->nb_np
== np
)) {
337 if (!ISSET(bp
->nb_flags
, NB_INVAL
)) {
338 FSDBG(547, bp
, blkno
, bp
->nb_flags
, bp
->nb_np
);
347 * Check if it's OK to drop a page.
349 * Called by vnode_pager() on pageout request of non-dirty page.
350 * We need to make sure that it's not part of a delayed write.
351 * If it is, we can't let the VM drop it because we may need it
352 * later when/if we need to write the data (again).
355 nfs_buf_page_inval(vnode_t vp
, off_t offset
)
357 struct nfsmount
*nmp
= VTONMP(vp
);
361 if (nfs_mount_gone(nmp
)) {
365 lck_mtx_lock(nfs_buf_mutex
);
366 bp
= nfs_buf_incore(VTONFS(vp
), (daddr64_t
)(offset
/ nmp
->nm_biosize
));
370 FSDBG(325, bp
, bp
->nb_flags
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
);
371 if (ISSET(bp
->nb_lflags
, NBL_BUSY
)) {
376 * If there's a dirty range in the buffer, check to
377 * see if this page intersects with the dirty range.
378 * If it does, we can't let the pager drop the page.
380 if (bp
->nb_dirtyend
> 0) {
381 int start
= offset
- NBOFF(bp
);
382 if ((bp
->nb_dirtyend
> start
) &&
383 (bp
->nb_dirtyoff
< (start
+ PAGE_SIZE
))) {
385 * Before returning the bad news, move the
386 * buffer to the start of the delwri list and
387 * give the list a push to try to flush the
392 TAILQ_INSERT_HEAD(&nfsbufdelwri
, bp
, nb_free
);
394 nfs_buf_delwri_push(1);
398 lck_mtx_unlock(nfs_buf_mutex
);
403 * set up the UPL for a buffer
404 * (must NOT be called with nfs_buf_mutex held)
407 nfs_buf_upl_setup(struct nfsbuf
*bp
)
413 if (ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
417 upl_flags
= UPL_PRECIOUS
;
418 if (!ISSET(bp
->nb_flags
, NB_READ
)) {
420 * We're doing a "write", so we intend to modify
421 * the pages we're gathering.
423 upl_flags
|= UPL_WILL_MODIFY
;
425 kret
= ubc_create_upl_kernel(NFSTOV(bp
->nb_np
), NBOFF(bp
), bp
->nb_bufsize
,
426 &upl
, NULL
, upl_flags
, VM_KERN_MEMORY_FILE
);
427 if (kret
== KERN_INVALID_ARGUMENT
) {
428 /* vm object probably doesn't exist any more */
429 bp
->nb_pagelist
= NULL
;
432 if (kret
!= KERN_SUCCESS
) {
433 printf("nfs_buf_upl_setup(): failed to get pagelist %d\n", kret
);
434 bp
->nb_pagelist
= NULL
;
438 FSDBG(538, bp
, NBOFF(bp
), bp
->nb_bufsize
, bp
->nb_np
);
440 bp
->nb_pagelist
= upl
;
441 SET(bp
->nb_flags
, NB_PAGELIST
);
446 * update buffer's valid/dirty info from UBC
447 * (must NOT be called with nfs_buf_mutex held)
450 nfs_buf_upl_check(struct nfsbuf
*bp
)
453 off_t filesize
, fileoffset
;
456 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
460 npages
= round_page_32(bp
->nb_bufsize
) / PAGE_SIZE
;
461 filesize
= ubc_getsize(NFSTOV(bp
->nb_np
));
462 fileoffset
= NBOFF(bp
);
463 if (fileoffset
< filesize
) {
464 SET(bp
->nb_flags
, NB_CACHE
);
466 CLR(bp
->nb_flags
, NB_CACHE
);
469 pl
= ubc_upl_pageinfo(bp
->nb_pagelist
);
470 bp
->nb_valid
= bp
->nb_dirty
= 0;
472 for (i
= 0; i
< npages
; i
++, fileoffset
+= PAGE_SIZE_64
) {
473 /* anything beyond the end of the file is not valid or dirty */
474 if (fileoffset
>= filesize
) {
477 if (!upl_valid_page(pl
, i
)) {
478 CLR(bp
->nb_flags
, NB_CACHE
);
481 NBPGVALID_SET(bp
, i
);
482 if (upl_dirty_page(pl
, i
)) {
483 NBPGDIRTY_SET(bp
, i
);
486 fileoffset
= NBOFF(bp
);
487 if (ISSET(bp
->nb_flags
, NB_CACHE
)) {
489 bp
->nb_validend
= bp
->nb_bufsize
;
490 if (fileoffset
+ bp
->nb_validend
> filesize
) {
491 bp
->nb_validend
= filesize
- fileoffset
;
494 bp
->nb_validoff
= bp
->nb_validend
= -1;
496 FSDBG(539, bp
, fileoffset
, bp
->nb_valid
, bp
->nb_dirty
);
497 FSDBG(539, bp
->nb_validoff
, bp
->nb_validend
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
);
501 * make sure that a buffer is mapped
502 * (must NOT be called with nfs_buf_mutex held)
505 nfs_buf_map(struct nfsbuf
*bp
)
512 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
516 kret
= ubc_upl_map(bp
->nb_pagelist
, (vm_offset_t
*)&(bp
->nb_data
));
517 if (kret
!= KERN_SUCCESS
) {
518 panic("nfs_buf_map: ubc_upl_map() failed with (%d)", kret
);
520 if (bp
->nb_data
== 0) {
521 panic("ubc_upl_map mapped 0");
523 FSDBG(540, bp
, bp
->nb_flags
, NBOFF(bp
), bp
->nb_data
);
528 * normalize an nfsbuf's valid range
530 * the read/write code guarantees that we'll always have a valid
531 * region that is an integral number of pages. If either end
532 * of the valid range isn't page-aligned, it gets corrected
533 * here as we extend the valid range through all of the
534 * contiguous valid pages.
537 nfs_buf_normalize_valid_range(nfsnode_t np
, struct nfsbuf
*bp
)
540 /* pull validoff back to start of contiguous valid page range */
541 pg
= bp
->nb_validoff
/ PAGE_SIZE
;
542 while (pg
>= 0 && NBPGVALID(bp
, pg
)) {
545 bp
->nb_validoff
= (pg
+ 1) * PAGE_SIZE
;
546 /* push validend forward to end of contiguous valid page range */
547 npg
= bp
->nb_bufsize
/ PAGE_SIZE
;
548 pg
= bp
->nb_validend
/ PAGE_SIZE
;
549 while (pg
< npg
&& NBPGVALID(bp
, pg
)) {
552 bp
->nb_validend
= pg
* PAGE_SIZE
;
554 if (NBOFF(bp
) + bp
->nb_validend
> (off_t
)np
->n_size
) {
555 bp
->nb_validend
= np
->n_size
% bp
->nb_bufsize
;
560 * process some entries on the delayed write queue
561 * (must be called with nfs_buf_mutex held)
564 nfs_buf_delwri_service(void)
570 while (i
< 8 && (bp
= TAILQ_FIRST(&nfsbufdelwri
)) != NULL
) {
574 while ((error
= nfs_buf_acquire(bp
, 0, 0, 0)) == EAGAIN
) {
582 /* buffer is no longer valid */
586 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
587 nfs_buf_check_write_verifier(np
, bp
);
589 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
590 /* put buffer at end of delwri list */
591 TAILQ_INSERT_TAIL(&nfsbufdelwri
, bp
, nb_free
);
594 lck_mtx_unlock(nfs_buf_mutex
);
595 nfs_flushcommits(np
, 1);
597 SET(bp
->nb_flags
, NB_ASYNC
);
598 lck_mtx_unlock(nfs_buf_mutex
);
602 lck_mtx_lock(nfs_buf_mutex
);
607 * thread to service the delayed write queue when asked
610 nfs_buf_delwri_thread(__unused
void *arg
, __unused wait_result_t wr
)
612 struct timespec ts
= { 30, 0 };
615 lck_mtx_lock(nfs_buf_mutex
);
617 nfs_buf_delwri_service();
618 error
= msleep(&nfsbufdelwrithd
, nfs_buf_mutex
, 0, "nfsbufdelwri", &ts
);
620 nfsbufdelwrithd
= NULL
;
621 lck_mtx_unlock(nfs_buf_mutex
);
622 thread_terminate(nfsbufdelwrithd
);
626 * try to push out some delayed/uncommitted writes
627 * ("locked" indicates whether nfs_buf_mutex is already held)
630 nfs_buf_delwri_push(int locked
)
632 if (TAILQ_EMPTY(&nfsbufdelwri
)) {
636 lck_mtx_lock(nfs_buf_mutex
);
638 /* wake up the delayed write service thread */
639 if (nfsbufdelwrithd
) {
640 wakeup(&nfsbufdelwrithd
);
641 } else if (kernel_thread_start(nfs_buf_delwri_thread
, NULL
, &nfsbufdelwrithd
) == KERN_SUCCESS
) {
642 thread_deallocate(nfsbufdelwrithd
);
644 /* otherwise, try to do some of the work ourselves */
645 if (!nfsbufdelwrithd
) {
646 nfs_buf_delwri_service();
649 lck_mtx_unlock(nfs_buf_mutex
);
656 * Returns errno on error, 0 otherwise.
657 * Any buffer is returned in *bpp.
659 * If NBLK_ONLYVALID is set, only return buffer if found in cache.
660 * If NBLK_NOWAIT is set, don't wait for the buffer if it's marked BUSY.
662 * Check for existence of buffer in cache.
663 * Or attempt to reuse a buffer from one of the free lists.
664 * Or allocate a new buffer if we haven't already hit max allocation.
665 * Or wait for a free buffer.
667 * If available buffer found, prepare it, and return it.
669 * If the calling process is interrupted by a signal for
670 * an interruptible mount point, return EINTR.
681 vnode_t vp
= NFSTOV(np
);
682 struct nfsmount
*nmp
= VTONMP(vp
);
685 int slpflag
= PCATCH
;
686 int operation
= (flags
& NBLK_OPMASK
);
690 FSDBG_TOP(541, np
, blkno
, size
, flags
);
694 if (bufsize
> NFS_MAXBSIZE
) {
695 panic("nfs_buf_get: buffer larger than NFS_MAXBSIZE requested");
698 if (nfs_mount_gone(nmp
)) {
699 FSDBG_BOT(541, np
, blkno
, 0, ENXIO
);
703 if (!UBCINFOEXISTS(vp
)) {
704 operation
= NBLK_META
;
705 } else if (bufsize
< (uint32_t)nmp
->nm_biosize
) {
706 /* reg files should always have biosize blocks */
707 bufsize
= nmp
->nm_biosize
;
710 /* if NBLK_WRITE, check for too many delayed/uncommitted writes */
711 if ((operation
== NBLK_WRITE
) && (nfs_nbdwrite
> NFS_A_LOT_OF_DELAYED_WRITES
)) {
712 FSDBG_TOP(542, np
, blkno
, nfs_nbdwrite
, NFS_A_LOT_OF_DELAYED_WRITES
);
714 /* poke the delwri list */
715 nfs_buf_delwri_push(0);
717 /* sleep to let other threads run... */
718 tsleep(&nfs_nbdwrite
, PCATCH
, "nfs_nbdwrite", 1);
719 FSDBG_BOT(542, np
, blkno
, nfs_nbdwrite
, NFS_A_LOT_OF_DELAYED_WRITES
);
723 lck_mtx_lock(nfs_buf_mutex
);
725 /* wait for any buffer invalidation/flushing to complete */
726 while (np
->n_bflag
& NBINVALINPROG
) {
727 np
->n_bflag
|= NBINVALWANT
;
730 msleep(&np
->n_bflag
, nfs_buf_mutex
, slpflag
, "nfs_buf_get_invalwait", &ts
);
731 if ((error
= nfs_sigintr(VTONMP(vp
), NULL
, thd
, 0))) {
732 lck_mtx_unlock(nfs_buf_mutex
);
733 FSDBG_BOT(541, np
, blkno
, 0, error
);
736 if (np
->n_bflag
& NBINVALINPROG
) {
741 /* check for existence of nfsbuf in cache */
742 if ((bp
= nfs_buf_incore(np
, blkno
))) {
743 /* if busy, set wanted and wait */
744 if (ISSET(bp
->nb_lflags
, NBL_BUSY
)) {
745 if (flags
& NBLK_NOWAIT
) {
746 lck_mtx_unlock(nfs_buf_mutex
);
747 FSDBG_BOT(541, np
, blkno
, bp
, 0xbcbcbcbc);
750 FSDBG_TOP(543, np
, blkno
, bp
, bp
->nb_flags
);
751 SET(bp
->nb_lflags
, NBL_WANTED
);
755 msleep(bp
, nfs_buf_mutex
, slpflag
| (PRIBIO
+ 1) | PDROP
,
756 "nfsbufget", (slpflag
== PCATCH
) ? NULL
: &ts
);
758 FSDBG_BOT(543, np
, blkno
, bp
, bp
->nb_flags
);
759 if ((error
= nfs_sigintr(VTONMP(vp
), NULL
, thd
, 0))) {
760 FSDBG_BOT(541, np
, blkno
, 0, error
);
765 if (bp
->nb_bufsize
!= bufsize
) {
766 panic("nfsbuf size mismatch");
768 SET(bp
->nb_lflags
, NBL_BUSY
);
769 SET(bp
->nb_flags
, NB_CACHE
);
771 /* additional paranoia: */
772 if (ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
773 panic("pagelist buffer was not busy");
778 if (flags
& NBLK_ONLYVALID
) {
779 lck_mtx_unlock(nfs_buf_mutex
);
780 FSDBG_BOT(541, np
, blkno
, 0, 0x0000cace);
785 * where to get a free buffer:
786 * - if meta and maxmeta reached, must reuse meta
787 * - alloc new if we haven't reached min bufs
788 * - if free lists are NOT empty
789 * - if free list is stale, use it
790 * - else if freemeta list is stale, use it
791 * - else if max bufs allocated, use least-time-to-stale
792 * - alloc new if we haven't reached max allowed
793 * - start clearing out delwri list and try again
796 if ((operation
== NBLK_META
) && (nfsbufmetacnt
>= nfsbufmetamax
)) {
797 /* if we've hit max meta buffers, must reuse a meta buffer */
798 bp
= TAILQ_FIRST(&nfsbuffreemeta
);
799 } else if ((nfsbufcnt
> nfsbufmin
) &&
800 (!TAILQ_EMPTY(&nfsbuffree
) || !TAILQ_EMPTY(&nfsbuffreemeta
))) {
801 /* try to pull an nfsbuf off a free list */
802 struct nfsbuf
*lrubp
, *metabp
;
806 /* if the next LRU or META buffer is invalid or stale, use it */
807 lrubp
= TAILQ_FIRST(&nfsbuffree
);
808 if (lrubp
&& (!NBUFSTAMPVALID(lrubp
) ||
809 ((lrubp
->nb_timestamp
+ NFSBUF_LRU_STALE
) < now
.tv_sec
))) {
812 metabp
= TAILQ_FIRST(&nfsbuffreemeta
);
813 if (!bp
&& metabp
&& (!NBUFSTAMPVALID(metabp
) ||
814 ((metabp
->nb_timestamp
+ NFSBUF_META_STALE
) < now
.tv_sec
))) {
818 if (!bp
&& (nfsbufcnt
>= nfsbufmax
)) {
819 /* we've already allocated all bufs, so */
820 /* choose the buffer that'll go stale first */
826 int32_t lru_stale_time
, meta_stale_time
;
827 lru_stale_time
= lrubp
->nb_timestamp
+ NFSBUF_LRU_STALE
;
828 meta_stale_time
= metabp
->nb_timestamp
+ NFSBUF_META_STALE
;
829 if (lru_stale_time
<= meta_stale_time
) {
839 /* we have a buffer to reuse */
840 FSDBG(544, np
, blkno
, bp
, bp
->nb_flags
);
842 if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
843 panic("nfs_buf_get: delwri");
845 SET(bp
->nb_lflags
, NBL_BUSY
);
846 /* disassociate buffer from previous nfsnode */
848 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
849 LIST_REMOVE(bp
, nb_vnbufs
);
850 bp
->nb_vnbufs
.le_next
= NFSNOLIST
;
854 LIST_REMOVE(bp
, nb_hash
);
855 /* nuke any creds we're holding */
856 if (IS_VALID_CRED(bp
->nb_rcred
)) {
857 kauth_cred_unref(&bp
->nb_rcred
);
859 if (IS_VALID_CRED(bp
->nb_wcred
)) {
860 kauth_cred_unref(&bp
->nb_wcred
);
862 /* if buf will no longer be NB_META, dump old buffer */
863 if (operation
== NBLK_META
) {
864 if (!ISSET(bp
->nb_flags
, NB_META
)) {
867 } else if (ISSET(bp
->nb_flags
, NB_META
)) {
869 kfree(bp
->nb_data
, bp
->nb_bufsize
);
874 /* re-init buf fields */
876 bp
->nb_validoff
= bp
->nb_validend
= -1;
877 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
882 /* no buffer to reuse */
883 if ((nfsbufcnt
< nfsbufmax
) &&
884 ((operation
!= NBLK_META
) || (nfsbufmetacnt
< nfsbufmetamax
))) {
885 /* just alloc a new one */
886 MALLOC(bp
, struct nfsbuf
*, sizeof(struct nfsbuf
), M_TEMP
, M_WAITOK
);
888 lck_mtx_unlock(nfs_buf_mutex
);
889 FSDBG_BOT(541, np
, blkno
, 0, error
);
895 * If any excess bufs, make sure the timer
896 * is running to free them up later.
898 if (nfsbufcnt
> nfsbufmin
&& !nfs_buf_timer_on
) {
899 nfs_buf_timer_on
= 1;
900 nfs_interval_timer_start(nfs_buf_timer_call
,
901 NFSBUF_FREE_PERIOD
* 1000);
904 if (operation
== NBLK_META
) {
909 bzero(bp
, sizeof(*bp
));
910 bp
->nb_free
.tqe_next
= NFSNOLIST
;
911 bp
->nb_validoff
= bp
->nb_validend
= -1;
912 FSDBG(545, np
, blkno
, bp
, 0);
914 /* too many bufs... wait for buffers to free up */
915 FSDBG_TOP(546, np
, blkno
, nfsbufcnt
, nfsbufmax
);
917 /* poke the delwri list */
918 nfs_buf_delwri_push(1);
921 msleep(&nfsneedbuffer
, nfs_buf_mutex
, PCATCH
| PDROP
, "nfsbufget", NULL
);
922 FSDBG_BOT(546, np
, blkno
, nfsbufcnt
, nfsbufmax
);
923 if ((error
= nfs_sigintr(VTONMP(vp
), NULL
, thd
, 0))) {
924 FSDBG_BOT(541, np
, blkno
, 0, error
);
932 SET(bp
->nb_lflags
, NBL_BUSY
);
934 bp
->nb_lblkno
= blkno
;
935 /* insert buf in hash */
936 LIST_INSERT_HEAD(NFSBUFHASH(np
, blkno
), bp
, nb_hash
);
937 /* associate buffer with new nfsnode */
939 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
944 lck_mtx_unlock(nfs_buf_mutex
);
948 SET(bp
->nb_flags
, NB_META
);
949 if ((bp
->nb_bufsize
!= bufsize
) && bp
->nb_data
) {
950 kfree(bp
->nb_data
, bp
->nb_bufsize
);
952 bp
->nb_validoff
= bp
->nb_validend
= -1;
953 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
956 CLR(bp
->nb_flags
, NB_CACHE
);
959 bp
->nb_data
= kalloc(bufsize
);
962 /* Ack! couldn't allocate the data buffer! */
963 /* clean up buffer and return error */
964 lck_mtx_lock(nfs_buf_mutex
);
965 LIST_REMOVE(bp
, nb_vnbufs
);
966 bp
->nb_vnbufs
.le_next
= NFSNOLIST
;
968 /* invalidate usage timestamp to allow immediate freeing */
969 NBUFSTAMPINVALIDATE(bp
);
970 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
) {
971 panic("nfsbuf on freelist");
973 TAILQ_INSERT_HEAD(&nfsbuffree
, bp
, nb_free
);
975 lck_mtx_unlock(nfs_buf_mutex
);
976 FSDBG_BOT(541, np
, blkno
, 0xb00, ENOMEM
);
979 bp
->nb_bufsize
= bufsize
;
985 * Set or clear NB_READ now to let the UPL subsystem know
986 * if we intend to modify the pages or not.
988 if (operation
== NBLK_READ
) {
989 SET(bp
->nb_flags
, NB_READ
);
991 CLR(bp
->nb_flags
, NB_READ
);
993 if (bufsize
< PAGE_SIZE
) {
996 bp
->nb_bufsize
= bufsize
;
997 bp
->nb_validoff
= bp
->nb_validend
= -1;
999 if (UBCINFOEXISTS(vp
)) {
1001 if (nfs_buf_upl_setup(bp
)) {
1002 /* unable to create upl */
1003 /* vm object must no longer exist */
1004 /* clean up buffer and return error */
1005 lck_mtx_lock(nfs_buf_mutex
);
1006 LIST_REMOVE(bp
, nb_vnbufs
);
1007 bp
->nb_vnbufs
.le_next
= NFSNOLIST
;
1009 /* invalidate usage timestamp to allow immediate freeing */
1010 NBUFSTAMPINVALIDATE(bp
);
1011 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
) {
1012 panic("nfsbuf on freelist");
1014 TAILQ_INSERT_HEAD(&nfsbuffree
, bp
, nb_free
);
1016 lck_mtx_unlock(nfs_buf_mutex
);
1017 FSDBG_BOT(541, np
, blkno
, 0x2bc, EIO
);
1020 nfs_buf_upl_check(bp
);
1025 panic("nfs_buf_get: %d unknown operation", operation
);
1030 FSDBG_BOT(541, np
, blkno
, bp
, bp
->nb_flags
);
1036 nfs_buf_release(struct nfsbuf
*bp
, int freeup
)
1038 nfsnode_t np
= bp
->nb_np
;
1041 int wakeup_needbuffer
, wakeup_buffer
, wakeup_nbdwrite
;
1043 FSDBG_TOP(548, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_data
);
1044 FSDBG(548, bp
->nb_validoff
, bp
->nb_validend
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
);
1045 FSDBG(548, bp
->nb_valid
, 0, bp
->nb_dirty
, 0);
1047 vp
= np
? NFSTOV(np
) : NULL
;
1048 if (vp
&& UBCINFOEXISTS(vp
) && bp
->nb_bufsize
) {
1053 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
) && !ISSET(bp
->nb_flags
, NB_INVAL
)) {
1054 rv
= nfs_buf_upl_setup(bp
);
1056 printf("nfs_buf_release: upl create failed %d\n", rv
);
1058 nfs_buf_upl_check(bp
);
1061 upl
= bp
->nb_pagelist
;
1063 goto pagelist_cleanup_done
;
1066 if (ubc_upl_unmap(upl
) != KERN_SUCCESS
) {
1067 panic("ubc_upl_unmap failed");
1072 * Abort the pages on error or: if this is an invalid or
1073 * non-needcommit nocache buffer AND no pages are dirty.
1075 if (ISSET(bp
->nb_flags
, NB_ERROR
) || (!bp
->nb_dirty
&& (ISSET(bp
->nb_flags
, NB_INVAL
) ||
1076 (ISSET(bp
->nb_flags
, NB_NOCACHE
) && !ISSET(bp
->nb_flags
, (NB_NEEDCOMMIT
| NB_DELWRI
)))))) {
1077 if (ISSET(bp
->nb_flags
, (NB_READ
| NB_INVAL
| NB_NOCACHE
))) {
1078 upl_flags
= UPL_ABORT_DUMP_PAGES
;
1082 ubc_upl_abort(upl
, upl_flags
);
1083 goto pagelist_cleanup_done
;
1085 for (i
= 0; i
<= (bp
->nb_bufsize
- 1) / PAGE_SIZE
; i
++) {
1086 if (!NBPGVALID(bp
, i
)) {
1087 ubc_upl_abort_range(upl
,
1088 i
* PAGE_SIZE
, PAGE_SIZE
,
1089 UPL_ABORT_DUMP_PAGES
|
1090 UPL_ABORT_FREE_ON_EMPTY
);
1092 if (NBPGDIRTY(bp
, i
)) {
1093 upl_flags
= UPL_COMMIT_SET_DIRTY
;
1095 upl_flags
= UPL_COMMIT_CLEAR_DIRTY
;
1098 if (!ISSET(bp
->nb_flags
, (NB_NEEDCOMMIT
| NB_DELWRI
))) {
1099 upl_flags
|= UPL_COMMIT_CLEAR_PRECIOUS
;
1102 ubc_upl_commit_range(upl
,
1103 i
* PAGE_SIZE
, PAGE_SIZE
,
1105 UPL_COMMIT_INACTIVATE
|
1106 UPL_COMMIT_FREE_ON_EMPTY
);
1109 pagelist_cleanup_done
:
1110 /* invalidate any pages past EOF */
1111 if (NBOFF(bp
) + bp
->nb_bufsize
> (off_t
)(np
->n_size
)) {
1113 start
= trunc_page_64(np
->n_size
) + PAGE_SIZE_64
;
1114 end
= trunc_page_64(NBOFF(bp
) + bp
->nb_bufsize
);
1115 if (start
< NBOFF(bp
)) {
1119 if ((rv
= ubc_msync(vp
, start
, end
, NULL
, UBC_INVALIDATE
))) {
1120 printf("nfs_buf_release(): ubc_msync failed!, error %d\n", rv
);
1124 CLR(bp
->nb_flags
, NB_PAGELIST
);
1125 bp
->nb_pagelist
= NULL
;
1128 lck_mtx_lock(nfs_buf_mutex
);
1130 wakeup_needbuffer
= wakeup_buffer
= wakeup_nbdwrite
= 0;
1132 /* Wake up any processes waiting for any buffer to become free. */
1133 if (nfsneedbuffer
) {
1135 wakeup_needbuffer
= 1;
1137 /* Wake up any processes waiting for _this_ buffer to become free. */
1138 if (ISSET(bp
->nb_lflags
, NBL_WANTED
)) {
1139 CLR(bp
->nb_lflags
, NBL_WANTED
);
1143 /* If it's non-needcommit nocache, or an error, mark it invalid. */
1144 if (ISSET(bp
->nb_flags
, NB_ERROR
) ||
1145 (ISSET(bp
->nb_flags
, NB_NOCACHE
) && !ISSET(bp
->nb_flags
, (NB_NEEDCOMMIT
| NB_DELWRI
)))) {
1146 SET(bp
->nb_flags
, NB_INVAL
);
1149 if ((bp
->nb_bufsize
<= 0) || ISSET(bp
->nb_flags
, NB_INVAL
)) {
1150 /* If it's invalid or empty, dissociate it from its nfsnode */
1151 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
1152 LIST_REMOVE(bp
, nb_vnbufs
);
1153 bp
->nb_vnbufs
.le_next
= NFSNOLIST
;
1156 /* if this was a delayed write, wakeup anyone */
1157 /* waiting for delayed writes to complete */
1158 if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
1159 CLR(bp
->nb_flags
, NB_DELWRI
);
1162 wakeup_nbdwrite
= 1;
1164 /* invalidate usage timestamp to allow immediate freeing */
1165 NBUFSTAMPINVALIDATE(bp
);
1166 /* put buffer at head of free list */
1167 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
) {
1168 panic("nfsbuf on freelist");
1170 SET(bp
->nb_flags
, NB_INVAL
);
1171 if (ISSET(bp
->nb_flags
, NB_META
)) {
1172 TAILQ_INSERT_HEAD(&nfsbuffreemeta
, bp
, nb_free
);
1173 nfsbuffreemetacnt
++;
1175 TAILQ_INSERT_HEAD(&nfsbuffree
, bp
, nb_free
);
1178 } else if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
1179 /* put buffer at end of delwri list */
1180 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
) {
1181 panic("nfsbuf on freelist");
1183 TAILQ_INSERT_TAIL(&nfsbufdelwri
, bp
, nb_free
);
1187 /* update usage timestamp */
1189 bp
->nb_timestamp
= now
.tv_sec
;
1190 /* put buffer at end of free list */
1191 if (bp
->nb_free
.tqe_next
!= NFSNOLIST
) {
1192 panic("nfsbuf on freelist");
1194 if (ISSET(bp
->nb_flags
, NB_META
)) {
1195 TAILQ_INSERT_TAIL(&nfsbuffreemeta
, bp
, nb_free
);
1196 nfsbuffreemetacnt
++;
1198 TAILQ_INSERT_TAIL(&nfsbuffree
, bp
, nb_free
);
1205 /* Unlock the buffer. */
1206 CLR(bp
->nb_flags
, (NB_ASYNC
| NB_STABLE
));
1207 CLR(bp
->nb_lflags
, NBL_BUSY
);
1209 FSDBG_BOT(548, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_data
);
1211 lck_mtx_unlock(nfs_buf_mutex
);
1213 if (wakeup_needbuffer
) {
1214 wakeup(&nfsneedbuffer
);
1216 if (wakeup_buffer
) {
1219 if (wakeup_nbdwrite
) {
1220 wakeup(&nfs_nbdwrite
);
1228 * Wait for operations on the buffer to complete.
1229 * When they do, extract and return the I/O's error value.
1232 nfs_buf_iowait(struct nfsbuf
*bp
)
1234 FSDBG_TOP(549, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
1236 lck_mtx_lock(nfs_buf_mutex
);
1238 while (!ISSET(bp
->nb_flags
, NB_DONE
)) {
1239 msleep(bp
, nfs_buf_mutex
, PRIBIO
+ 1, "nfs_buf_iowait", NULL
);
1242 lck_mtx_unlock(nfs_buf_mutex
);
1244 FSDBG_BOT(549, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
1246 /* check for interruption of I/O, then errors. */
1247 if (ISSET(bp
->nb_flags
, NB_EINTR
)) {
1248 CLR(bp
->nb_flags
, NB_EINTR
);
1250 } else if (ISSET(bp
->nb_flags
, NB_ERROR
)) {
1251 return bp
->nb_error
? bp
->nb_error
: EIO
;
1257 * Mark I/O complete on a buffer.
1260 nfs_buf_iodone(struct nfsbuf
*bp
)
1262 FSDBG_TOP(550, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
1264 if (ISSET(bp
->nb_flags
, NB_DONE
)) {
1265 panic("nfs_buf_iodone already");
1268 if (!ISSET(bp
->nb_flags
, NB_READ
)) {
1269 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
1271 * vnode_writedone() takes care of waking up
1272 * any throttled write operations
1274 vnode_writedone(NFSTOV(bp
->nb_np
));
1275 nfs_node_lock_force(bp
->nb_np
);
1276 bp
->nb_np
->n_numoutput
--;
1277 nfs_node_unlock(bp
->nb_np
);
1279 if (ISSET(bp
->nb_flags
, NB_ASYNC
)) { /* if async, release it */
1280 SET(bp
->nb_flags
, NB_DONE
); /* note that it's done */
1281 nfs_buf_release(bp
, 1);
1282 } else { /* or just wakeup the buffer */
1283 lck_mtx_lock(nfs_buf_mutex
);
1284 SET(bp
->nb_flags
, NB_DONE
); /* note that it's done */
1285 CLR(bp
->nb_lflags
, NBL_WANTED
);
1286 lck_mtx_unlock(nfs_buf_mutex
);
1290 FSDBG_BOT(550, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
1294 nfs_buf_write_delayed(struct nfsbuf
*bp
)
1296 nfsnode_t np
= bp
->nb_np
;
1298 FSDBG_TOP(551, bp
, NBOFF(bp
), bp
->nb_flags
, 0);
1299 FSDBG(551, bp
, bp
->nb_dirtyoff
, bp
->nb_dirtyend
, bp
->nb_dirty
);
1302 * If the block hasn't been seen before:
1303 * (1) Mark it as having been seen,
1304 * (2) Make sure it's on its node's correct block list,
1306 if (!ISSET(bp
->nb_flags
, NB_DELWRI
)) {
1307 SET(bp
->nb_flags
, NB_DELWRI
);
1308 /* move to dirty list */
1309 lck_mtx_lock(nfs_buf_mutex
);
1312 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
1313 LIST_REMOVE(bp
, nb_vnbufs
);
1315 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
1316 lck_mtx_unlock(nfs_buf_mutex
);
1320 * If the vnode has "too many" write operations in progress
1321 * wait for them to finish the IO
1323 vnode_waitforwrites(NFSTOV(np
), VNODE_ASYNC_THROTTLE
, 0, 0, "nfs_buf_write_delayed");
1325 /* the file is in a modified state, so make sure the flag's set */
1326 nfs_node_lock_force(np
);
1327 np
->n_flag
|= NMODIFIED
;
1328 nfs_node_unlock(np
);
1331 * If we have too many delayed write buffers,
1332 * just fall back to doing the async write.
1334 if (nfs_nbdwrite
< 0) {
1335 panic("nfs_buf_write_delayed: Negative nfs_nbdwrite");
1337 if (nfs_nbdwrite
> NFS_A_LOT_OF_DELAYED_WRITES
) {
1338 /* issue async write */
1339 SET(bp
->nb_flags
, NB_ASYNC
);
1341 FSDBG_BOT(551, bp
, NBOFF(bp
), bp
->nb_flags
, bp
->nb_error
);
1345 /* Otherwise, the "write" is done, so mark and release the buffer. */
1346 SET(bp
->nb_flags
, NB_DONE
);
1347 nfs_buf_release(bp
, 1);
1348 FSDBG_BOT(551, bp
, NBOFF(bp
), bp
->nb_flags
, 0);
1353 * Check that a "needcommit" buffer can still be committed.
1354 * If the write verifier has changed, we need to clear the
1355 * the needcommit flag.
1358 nfs_buf_check_write_verifier(nfsnode_t np
, struct nfsbuf
*bp
)
1360 struct nfsmount
*nmp
;
1362 if (!ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
1367 if (nfs_mount_gone(nmp
)) {
1370 if (!ISSET(bp
->nb_flags
, NB_STALEWVERF
) && (bp
->nb_verf
== nmp
->nm_verf
)) {
1374 /* write verifier changed, clear commit/wverf flags */
1375 CLR(bp
->nb_flags
, (NB_NEEDCOMMIT
| NB_STALEWVERF
));
1377 nfs_node_lock_force(np
);
1378 np
->n_needcommitcnt
--;
1379 CHECK_NEEDCOMMITCNT(np
);
1380 nfs_node_unlock(np
);
1384 * add a reference to a buffer so it doesn't disappear while being used
1385 * (must be called with nfs_buf_mutex held)
1388 nfs_buf_refget(struct nfsbuf
*bp
)
1393 * release a reference on a buffer
1394 * (must be called with nfs_buf_mutex held)
1397 nfs_buf_refrele(struct nfsbuf
*bp
)
1403 * mark a particular buffer as BUSY
1404 * (must be called with nfs_buf_mutex held)
1407 nfs_buf_acquire(struct nfsbuf
*bp
, int flags
, int slpflag
, int slptimeo
)
1412 if (ISSET(bp
->nb_lflags
, NBL_BUSY
)) {
1414 * since the lck_mtx_lock may block, the buffer
1415 * may become BUSY, so we need to recheck for
1418 if (flags
& NBAC_NOWAIT
) {
1421 SET(bp
->nb_lflags
, NBL_WANTED
);
1423 ts
.tv_sec
= (slptimeo
/ 100);
1424 /* the hz value is 100; which leads to 10ms */
1425 ts
.tv_nsec
= (slptimeo
% 100) * 10 * NSEC_PER_USEC
* 1000;
1427 error
= msleep(bp
, nfs_buf_mutex
, slpflag
| (PRIBIO
+ 1),
1428 "nfs_buf_acquire", &ts
);
1434 if (flags
& NBAC_REMOVE
) {
1435 nfs_buf_remfree(bp
);
1437 SET(bp
->nb_lflags
, NBL_BUSY
);
1443 * simply drop the BUSY status of a buffer
1444 * (must be called with nfs_buf_mutex held)
1447 nfs_buf_drop(struct nfsbuf
*bp
)
1449 int need_wakeup
= 0;
1451 if (!ISSET(bp
->nb_lflags
, NBL_BUSY
)) {
1452 panic("nfs_buf_drop: buffer not busy!");
1454 if (ISSET(bp
->nb_lflags
, NBL_WANTED
)) {
1455 /* delay the actual wakeup until after we clear NBL_BUSY */
1458 /* Unlock the buffer. */
1459 CLR(bp
->nb_lflags
, (NBL_BUSY
| NBL_WANTED
));
1467 * prepare for iterating over an nfsnode's buffer list
1468 * this lock protects the queue manipulation
1469 * (must be called with nfs_buf_mutex held)
1472 nfs_buf_iterprepare(nfsnode_t np
, struct nfsbuflists
*iterheadp
, int flags
)
1474 struct nfsbuflists
*listheadp
;
1476 if (flags
& NBI_DIRTY
) {
1477 listheadp
= &np
->n_dirtyblkhd
;
1479 listheadp
= &np
->n_cleanblkhd
;
1482 if ((flags
& NBI_NOWAIT
) && (np
->n_bufiterflags
& NBI_ITER
)) {
1483 LIST_INIT(iterheadp
);
1487 while (np
->n_bufiterflags
& NBI_ITER
) {
1488 np
->n_bufiterflags
|= NBI_ITERWANT
;
1489 msleep(&np
->n_bufiterflags
, nfs_buf_mutex
, 0, "nfs_buf_iterprepare", NULL
);
1491 if (LIST_EMPTY(listheadp
)) {
1492 LIST_INIT(iterheadp
);
1495 np
->n_bufiterflags
|= NBI_ITER
;
1497 iterheadp
->lh_first
= listheadp
->lh_first
;
1498 listheadp
->lh_first
->nb_vnbufs
.le_prev
= &iterheadp
->lh_first
;
1499 LIST_INIT(listheadp
);
1505 * clean up after iterating over an nfsnode's buffer list
1506 * this lock protects the queue manipulation
1507 * (must be called with nfs_buf_mutex held)
1510 nfs_buf_itercomplete(nfsnode_t np
, struct nfsbuflists
*iterheadp
, int flags
)
1512 struct nfsbuflists
* listheadp
;
1515 if (flags
& NBI_DIRTY
) {
1516 listheadp
= &np
->n_dirtyblkhd
;
1518 listheadp
= &np
->n_cleanblkhd
;
1521 while (!LIST_EMPTY(iterheadp
)) {
1522 bp
= LIST_FIRST(iterheadp
);
1523 LIST_REMOVE(bp
, nb_vnbufs
);
1524 LIST_INSERT_HEAD(listheadp
, bp
, nb_vnbufs
);
1527 np
->n_bufiterflags
&= ~NBI_ITER
;
1528 if (np
->n_bufiterflags
& NBI_ITERWANT
) {
1529 np
->n_bufiterflags
&= ~NBI_ITERWANT
;
1530 wakeup(&np
->n_bufiterflags
);
1536 * Read an NFS buffer for a file.
1539 nfs_buf_read(struct nfsbuf
*bp
)
1547 cred
= bp
->nb_rcred
;
1548 if (IS_VALID_CRED(cred
)) {
1549 kauth_cred_ref(cred
);
1551 thd
= ISSET(bp
->nb_flags
, NB_ASYNC
) ? NULL
: current_thread();
1554 if (!ISSET(bp
->nb_flags
, NB_READ
)) {
1555 panic("nfs_buf_read: !NB_READ");
1557 if (ISSET(bp
->nb_flags
, NB_DONE
)) {
1558 CLR(bp
->nb_flags
, NB_DONE
);
1563 OSAddAtomic64(1, &nfsstats
.read_bios
);
1565 error
= nfs_buf_read_rpc(bp
, thd
, cred
);
1567 * For async I/O, the callbacks will finish up the
1568 * read. Otherwise, the read has already been finished.
1571 if (IS_VALID_CRED(cred
)) {
1572 kauth_cred_unref(&cred
);
1578 * finish the reading of a buffer
1581 nfs_buf_read_finish(struct nfsbuf
*bp
)
1583 nfsnode_t np
= bp
->nb_np
;
1584 struct nfsmount
*nmp
;
1586 if (!ISSET(bp
->nb_flags
, NB_ERROR
)) {
1587 /* update valid range */
1588 bp
->nb_validoff
= 0;
1589 bp
->nb_validend
= bp
->nb_endio
;
1590 if (bp
->nb_endio
< (int)bp
->nb_bufsize
) {
1592 * The read may be short because we have unflushed writes
1593 * that are extending the file size and the reads hit the
1594 * (old) EOF on the server. So, just make sure nb_validend
1595 * correctly tracks EOF.
1596 * Note that the missing data should have already been zeroed
1597 * in nfs_buf_read_rpc_finish().
1599 off_t boff
= NBOFF(bp
);
1600 if ((off_t
)np
->n_size
>= (boff
+ bp
->nb_bufsize
)) {
1601 bp
->nb_validend
= bp
->nb_bufsize
;
1602 } else if ((off_t
)np
->n_size
>= boff
) {
1603 bp
->nb_validend
= np
->n_size
- boff
;
1605 bp
->nb_validend
= 0;
1608 if ((nmp
= NFSTONMP(np
)) && (nmp
->nm_vers
== NFS_VER2
) &&
1609 ((NBOFF(bp
) + bp
->nb_validend
) > 0x100000000LL
)) {
1610 bp
->nb_validend
= 0x100000000LL
- NBOFF(bp
);
1612 bp
->nb_valid
= (1 << (round_page_32(bp
->nb_validend
) / PAGE_SIZE
)) - 1;
1613 if (bp
->nb_validend
& PAGE_MASK
) {
1614 /* zero-fill remainder of last page */
1615 bzero(bp
->nb_data
+ bp
->nb_validend
, PAGE_SIZE
- (bp
->nb_validend
& PAGE_MASK
));
1622 * initiate the NFS READ RPC(s) for a buffer
1625 nfs_buf_read_rpc(struct nfsbuf
*bp
, thread_t thd
, kauth_cred_t cred
)
1627 struct nfsmount
*nmp
;
1628 nfsnode_t np
= bp
->nb_np
;
1629 int error
= 0, nfsvers
, async
;
1631 uint32_t nmrsize
, length
, len
;
1634 struct nfsreq_cbinfo cb
;
1637 if (nfs_mount_gone(nmp
)) {
1638 bp
->nb_error
= error
= ENXIO
;
1639 SET(bp
->nb_flags
, NB_ERROR
);
1643 nfsvers
= nmp
->nm_vers
;
1644 nmrsize
= nmp
->nm_rsize
;
1648 length
= bp
->nb_bufsize
;
1650 if (nfsvers
== NFS_VER2
) {
1651 if (boff
> 0xffffffffLL
) {
1652 bp
->nb_error
= error
= EFBIG
;
1653 SET(bp
->nb_flags
, NB_ERROR
);
1657 if ((boff
+ length
- 1) > 0xffffffffLL
) {
1658 length
= 0x100000000LL
- boff
;
1662 /* Note: Can only do async I/O if nfsiods are configured. */
1663 async
= (bp
->nb_flags
& NB_ASYNC
);
1664 cb
.rcb_func
= async
? nfs_buf_read_rpc_finish
: NULL
;
1667 bp
->nb_offio
= bp
->nb_endio
= 0;
1668 bp
->nb_rpcs
= nrpcs
= (length
+ nmrsize
- 1) / nmrsize
;
1669 if (async
&& (nrpcs
> 1)) {
1670 SET(bp
->nb_flags
, NB_MULTASYNCRPC
);
1672 CLR(bp
->nb_flags
, NB_MULTASYNCRPC
);
1675 while (length
> 0) {
1676 if (ISSET(bp
->nb_flags
, NB_ERROR
)) {
1677 error
= bp
->nb_error
;
1680 len
= (length
> nmrsize
) ? nmrsize
: length
;
1681 cb
.rcb_args
[0] = offset
;
1682 cb
.rcb_args
[1] = len
;
1683 if (nmp
->nm_vers
>= NFS_VER4
) {
1684 cb
.rcb_args
[2] = nmp
->nm_stategenid
;
1687 error
= nmp
->nm_funcs
->nf_read_rpc_async(np
, boff
+ offset
, len
, thd
, cred
, &cb
, &req
);
1696 nfs_buf_read_rpc_finish(req
);
1697 if (ISSET(bp
->nb_flags
, NB_ERROR
)) {
1698 error
= bp
->nb_error
;
1705 * Something bad happened while trying to send the RPC(s).
1706 * Wait for any outstanding requests to complete.
1708 bp
->nb_error
= error
;
1709 SET(bp
->nb_flags
, NB_ERROR
);
1710 if (ISSET(bp
->nb_flags
, NB_MULTASYNCRPC
)) {
1711 nrpcs
= (length
+ nmrsize
- 1) / nmrsize
;
1712 lck_mtx_lock(nfs_buf_mutex
);
1713 bp
->nb_rpcs
-= nrpcs
;
1714 if (bp
->nb_rpcs
== 0) {
1715 /* No RPCs left, so the buffer's done */
1716 lck_mtx_unlock(nfs_buf_mutex
);
1719 /* wait for the last RPC to mark it done */
1720 while (bp
->nb_rpcs
> 0) {
1721 msleep(&bp
->nb_rpcs
, nfs_buf_mutex
, 0,
1722 "nfs_buf_read_rpc_cancel", NULL
);
1724 lck_mtx_unlock(nfs_buf_mutex
);
1735 * finish up an NFS READ RPC on a buffer
1738 nfs_buf_read_rpc_finish(struct nfsreq
*req
)
1740 struct nfsmount
*nmp
;
1742 struct nfsreq_cbinfo cb
;
1744 int error
= 0, nfsvers
, offset
, length
, eof
= 0, multasyncrpc
, finished
;
1745 void *wakeme
= NULL
;
1746 struct nfsreq
*rreq
= NULL
;
1751 char uio_buf
[UIO_SIZEOF(1)];
1755 thd
= req
->r_thread
;
1757 if (IS_VALID_CRED(cred
)) {
1758 kauth_cred_ref(cred
);
1760 cb
= req
->r_callback
;
1762 if (cb
.rcb_func
) { /* take an extra reference on the nfsreq in case we want to resend it later due to grace error */
1763 nfs_request_ref(req
, 0);
1767 if (nfs_mount_gone(nmp
)) {
1768 SET(bp
->nb_flags
, NB_ERROR
);
1769 bp
->nb_error
= error
= ENXIO
;
1771 if (error
|| ISSET(bp
->nb_flags
, NB_ERROR
)) {
1773 nfs_request_async_cancel(req
);
1777 nfsvers
= nmp
->nm_vers
;
1778 offset
= cb
.rcb_args
[0];
1779 rlen
= length
= cb
.rcb_args
[1];
1781 auio
= uio_createwithbuffer(1, NBOFF(bp
) + offset
, UIO_SYSSPACE
,
1782 UIO_READ
, &uio_buf
, sizeof(uio_buf
));
1783 uio_addiov(auio
, CAST_USER_ADDR_T(bp
->nb_data
+ offset
), length
);
1785 /* finish the RPC */
1786 error
= nmp
->nm_funcs
->nf_read_rpc_async_finish(np
, req
, auio
, &rlen
, &eof
);
1787 if ((error
== EINPROGRESS
) && cb
.rcb_func
) {
1788 /* async request restarted */
1790 nfs_request_rele(req
);
1792 if (IS_VALID_CRED(cred
)) {
1793 kauth_cred_unref(&cred
);
1797 if ((nmp
->nm_vers
>= NFS_VER4
) && nfs_mount_state_error_should_restart(error
) && !ISSET(bp
->nb_flags
, NB_ERROR
)) {
1798 lck_mtx_lock(&nmp
->nm_lock
);
1799 if ((error
!= NFSERR_OLD_STATEID
) && (error
!= NFSERR_GRACE
) && (cb
.rcb_args
[2] == nmp
->nm_stategenid
)) {
1800 NP(np
, "nfs_buf_read_rpc_finish: error %d @ 0x%llx, 0x%x 0x%x, initiating recovery",
1801 error
, NBOFF(bp
) + offset
, cb
.rcb_args
[2], nmp
->nm_stategenid
);
1802 nfs_need_recover(nmp
, error
);
1804 lck_mtx_unlock(&nmp
->nm_lock
);
1805 if (np
->n_flag
& NREVOKE
) {
1808 if (error
== NFSERR_GRACE
) {
1811 * For an async I/O request, handle a grace delay just like
1812 * jukebox errors. Set the resend time and queue it up.
1815 if (req
->r_nmrep
.nmc_mhead
) {
1816 mbuf_freem(req
->r_nmrep
.nmc_mhead
);
1817 req
->r_nmrep
.nmc_mhead
= NULL
;
1821 lck_mtx_lock(&req
->r_mtx
);
1822 req
->r_resendtime
= now
.tv_sec
+ 2;
1823 req
->r_xid
= 0; // get a new XID
1824 req
->r_flags
|= R_RESTART
;
1826 nfs_asyncio_resend(req
);
1827 lck_mtx_unlock(&req
->r_mtx
);
1828 if (IS_VALID_CRED(cred
)) {
1829 kauth_cred_unref(&cred
);
1831 /* Note: nfsreq reference taken will be dropped later when finished */
1834 /* otherwise, just pause a couple seconds and retry */
1835 tsleep(&nmp
->nm_state
, (PZERO
- 1), "nfsgrace", 2 * hz
);
1837 if (!(error
= nfs_mount_state_wait_for_recovery(nmp
))) {
1844 SET(bp
->nb_flags
, NB_ERROR
);
1845 bp
->nb_error
= error
;
1849 if ((rlen
> 0) && (bp
->nb_endio
< (offset
+ (int)rlen
))) {
1850 bp
->nb_endio
= offset
+ rlen
;
1853 if ((nfsvers
== NFS_VER2
) || eof
|| (rlen
== 0)) {
1854 /* zero out the remaining data (up to EOF) */
1855 off_t rpcrem
, eofrem
, rem
;
1856 rpcrem
= (length
- rlen
);
1857 eofrem
= np
->n_size
- (NBOFF(bp
) + offset
+ rlen
);
1858 rem
= (rpcrem
< eofrem
) ? rpcrem
: eofrem
;
1860 bzero(bp
->nb_data
+ offset
+ rlen
, rem
);
1862 } else if (((int)rlen
< length
) && !ISSET(bp
->nb_flags
, NB_ERROR
)) {
1866 * We haven't hit EOF and we didn't get all the data
1867 * requested, so we need to issue another read for the rest.
1868 * (Don't bother if the buffer already hit an error.)
1873 cb
.rcb_args
[0] = offset
;
1874 cb
.rcb_args
[1] = length
;
1875 if (nmp
->nm_vers
>= NFS_VER4
) {
1876 cb
.rcb_args
[2] = nmp
->nm_stategenid
;
1878 error
= nmp
->nm_funcs
->nf_read_rpc_async(np
, NBOFF(bp
) + offset
, length
, thd
, cred
, &cb
, &rreq
);
1880 if (IS_VALID_CRED(cred
)) {
1881 kauth_cred_unref(&cred
);
1884 /* if !async we'll need to wait for this RPC to finish */
1889 nfs_request_rele(req
);
1892 * Outstanding RPC count is unchanged.
1893 * Callback will be called when RPC is done.
1897 SET(bp
->nb_flags
, NB_ERROR
);
1898 bp
->nb_error
= error
;
1903 nfs_request_rele(req
);
1905 if (IS_VALID_CRED(cred
)) {
1906 kauth_cred_unref(&cred
);
1910 * Decrement outstanding RPC count on buffer
1911 * and call nfs_buf_read_finish on last RPC.
1913 * (Note: when there are multiple async RPCs issued for a
1914 * buffer we need nfs_buffer_mutex to avoid problems when
1915 * aborting a partially-initiated set of RPCs)
1918 multasyncrpc
= ISSET(bp
->nb_flags
, NB_MULTASYNCRPC
);
1920 lck_mtx_lock(nfs_buf_mutex
);
1924 finished
= (bp
->nb_rpcs
== 0);
1927 lck_mtx_unlock(nfs_buf_mutex
);
1932 wakeme
= &bp
->nb_rpcs
;
1934 nfs_buf_read_finish(bp
);
1942 * Do buffer readahead.
1943 * Initiate async I/O to read buffers not in cache.
1946 nfs_buf_readahead(nfsnode_t np
, int ioflag
, daddr64_t
*rabnp
, daddr64_t lastrabn
, thread_t thd
, kauth_cred_t cred
)
1948 struct nfsmount
*nmp
= NFSTONMP(np
);
1953 if (nfs_mount_gone(nmp
)) {
1956 if (nmp
->nm_readahead
<= 0) {
1959 if (*rabnp
> lastrabn
) {
1963 for (nra
= 0; (nra
< nmp
->nm_readahead
) && (*rabnp
<= lastrabn
); nra
++, *rabnp
= *rabnp
+ 1) {
1964 /* check if block exists and is valid. */
1965 if ((*rabnp
* nmp
->nm_biosize
) >= (off_t
)np
->n_size
) {
1966 /* stop reading ahead if we're beyond EOF */
1970 error
= nfs_buf_get(np
, *rabnp
, nmp
->nm_biosize
, thd
, NBLK_READ
| NBLK_NOWAIT
, &bp
);
1974 nfs_node_lock_force(np
);
1975 np
->n_lastrahead
= *rabnp
;
1976 nfs_node_unlock(np
);
1980 if ((ioflag
& IO_NOCACHE
) && ISSET(bp
->nb_flags
, NB_CACHE
) &&
1981 !bp
->nb_dirty
&& !ISSET(bp
->nb_flags
, (NB_DELWRI
| NB_NCRDAHEAD
))) {
1982 CLR(bp
->nb_flags
, NB_CACHE
);
1984 bp
->nb_validoff
= bp
->nb_validend
= -1;
1986 if ((bp
->nb_dirtyend
<= 0) && !bp
->nb_dirty
&&
1987 !ISSET(bp
->nb_flags
, (NB_CACHE
| NB_DELWRI
))) {
1988 SET(bp
->nb_flags
, (NB_READ
| NB_ASYNC
));
1989 if (ioflag
& IO_NOCACHE
) {
1990 SET(bp
->nb_flags
, NB_NCRDAHEAD
);
1992 if (!IS_VALID_CRED(bp
->nb_rcred
) && IS_VALID_CRED(cred
)) {
1993 kauth_cred_ref(cred
);
1994 bp
->nb_rcred
= cred
;
1996 if ((error
= nfs_buf_read(bp
))) {
2001 nfs_buf_release(bp
, 1);
2007 * NFS buffer I/O for reading files.
2010 nfs_bioread(nfsnode_t np
, uio_t uio
, int ioflag
, vfs_context_t ctx
)
2012 vnode_t vp
= NFSTOV(np
);
2013 struct nfsbuf
*bp
= NULL
;
2014 struct nfsmount
*nmp
= VTONMP(vp
);
2015 daddr64_t lbn
, rabn
= 0, lastrabn
, maxrabn
= -1;
2017 int error
= 0, n
= 0, on
= 0;
2018 int nfsvers
, biosize
, modified
, readaheads
= 0;
2023 FSDBG_TOP(514, np
, uio_offset(uio
), uio_resid(uio
), ioflag
);
2025 nfsvers
= nmp
->nm_vers
;
2026 biosize
= nmp
->nm_biosize
;
2027 thd
= vfs_context_thread(ctx
);
2028 cred
= vfs_context_ucred(ctx
);
2030 if (vnode_vtype(vp
) != VREG
) {
2031 printf("nfs_bioread: type %x unexpected\n", vnode_vtype(vp
));
2032 FSDBG_BOT(514, np
, 0xd1e0016, 0, EINVAL
);
2037 * For NFS, cache consistency can only be maintained approximately.
2038 * Although RFC1094 does not specify the criteria, the following is
2039 * believed to be compatible with the reference port.
2041 * If the file has changed since the last read RPC or you have
2042 * written to the file, you may have lost data cache consistency
2043 * with the server. So, check for a change, and flush all of the
2044 * file's data out of the cache.
2045 * NB: This implies that cache data can be read when up to
2046 * NFS_MAXATTRTIMO seconds out of date. If you find that you
2047 * need current attributes, nfs_getattr() can be forced to fetch
2048 * new attributes (via NATTRINVALIDATE() or NGA_UNCACHED).
2051 if (ISSET(np
->n_flag
, NUPDATESIZE
)) {
2052 nfs_data_update_size(np
, 0);
2055 if ((error
= nfs_node_lock(np
))) {
2056 FSDBG_BOT(514, np
, 0xd1e0222, 0, error
);
2060 if (np
->n_flag
& NNEEDINVALIDATE
) {
2061 np
->n_flag
&= ~NNEEDINVALIDATE
;
2062 nfs_node_unlock(np
);
2063 error
= nfs_vinvalbuf(vp
, V_SAVE
| V_IGNORE_WRITEERR
, ctx
, 1);
2065 error
= nfs_node_lock(np
);
2068 FSDBG_BOT(514, np
, 0xd1e0322, 0, error
);
2073 modified
= (np
->n_flag
& NMODIFIED
);
2074 nfs_node_unlock(np
);
2075 /* nfs_getattr() will check changed and purge caches */
2076 error
= nfs_getattr(np
, NULL
, ctx
, modified
? NGA_UNCACHED
: NGA_CACHED
);
2078 FSDBG_BOT(514, np
, 0xd1e0004, 0, error
);
2082 if (uio_resid(uio
) == 0) {
2083 FSDBG_BOT(514, np
, 0xd1e0001, 0, 0);
2086 if (uio_offset(uio
) < 0) {
2087 FSDBG_BOT(514, np
, 0xd1e0002, 0, EINVAL
);
2092 * set up readahead - which may be limited by:
2093 * + current request length (for IO_NOCACHE)
2094 * + readahead setting
2097 if (nmp
->nm_readahead
> 0) {
2098 off_t end
= uio_offset(uio
) + uio_resid(uio
);
2099 if (end
> (off_t
)np
->n_size
) {
2102 rabn
= uio_offset(uio
) / biosize
;
2103 maxrabn
= (end
- 1) / biosize
;
2104 nfs_node_lock_force(np
);
2105 if (!(ioflag
& IO_NOCACHE
) &&
2106 (!rabn
|| (rabn
== np
->n_lastread
) || (rabn
== (np
->n_lastread
+ 1)))) {
2107 maxrabn
+= nmp
->nm_readahead
;
2108 if ((maxrabn
* biosize
) >= (off_t
)np
->n_size
) {
2109 maxrabn
= ((off_t
)np
->n_size
- 1) / biosize
;
2112 if (maxrabn
< np
->n_lastrahead
) {
2113 np
->n_lastrahead
= -1;
2115 if (rabn
< np
->n_lastrahead
) {
2116 rabn
= np
->n_lastrahead
+ 1;
2118 nfs_node_unlock(np
);
2124 nfs_data_lock(np
, NFS_DATA_LOCK_SHARED
);
2125 lbn
= uio_offset(uio
) / biosize
;
2128 * Copy directly from any cached pages without grabbing the bufs.
2129 * (If we are NOCACHE and we've issued readahead requests, we need
2130 * to grab the NB_NCRDAHEAD bufs to drop them.)
2132 if ((!(ioflag
& IO_NOCACHE
) || !readaheads
) &&
2133 ((uio
->uio_segflg
== UIO_USERSPACE32
||
2134 uio
->uio_segflg
== UIO_USERSPACE64
||
2135 uio
->uio_segflg
== UIO_USERSPACE
))) {
2136 io_resid
= uio_resid(uio
);
2137 diff
= np
->n_size
- uio_offset(uio
);
2138 if (diff
< io_resid
) {
2142 int count
= (io_resid
> INT_MAX
) ? INT_MAX
: io_resid
;
2143 error
= cluster_copy_ubc_data(vp
, uio
, &count
, 0);
2145 nfs_data_unlock(np
);
2146 FSDBG_BOT(514, np
, uio_offset(uio
), 0xcacefeed, error
);
2150 /* count any biocache reads that we just copied directly */
2151 if (lbn
!= (uio_offset(uio
) / biosize
)) {
2152 OSAddAtomic64((uio_offset(uio
) / biosize
) - lbn
, &nfsstats
.biocache_reads
);
2153 FSDBG(514, np
, 0xcacefeed, uio_offset(uio
), error
);
2157 lbn
= uio_offset(uio
) / biosize
;
2158 on
= uio_offset(uio
) % biosize
;
2159 nfs_node_lock_force(np
);
2160 np
->n_lastread
= (uio_offset(uio
) - 1) / biosize
;
2161 nfs_node_unlock(np
);
2163 if ((uio_resid(uio
) <= 0) || (uio_offset(uio
) >= (off_t
)np
->n_size
)) {
2164 nfs_data_unlock(np
);
2165 FSDBG_BOT(514, np
, uio_offset(uio
), uio_resid(uio
), 0xaaaaaaaa);
2169 /* adjust readahead block number, if necessary */
2173 lastrabn
= MIN(maxrabn
, lbn
+ nmp
->nm_readahead
);
2174 if (rabn
<= lastrabn
) { /* start readaheads */
2175 error
= nfs_buf_readahead(np
, ioflag
, &rabn
, lastrabn
, thd
, cred
);
2177 nfs_data_unlock(np
);
2178 FSDBG_BOT(514, np
, 0xd1e000b, 1, error
);
2184 OSAddAtomic64(1, &nfsstats
.biocache_reads
);
2187 * If the block is in the cache and has the required data
2188 * in a valid region, just copy it out.
2189 * Otherwise, get the block and write back/read in,
2193 io_resid
= uio_resid(uio
);
2194 n
= (io_resid
> (biosize
- on
)) ? (biosize
- on
) : io_resid
;
2195 diff
= np
->n_size
- uio_offset(uio
);
2200 error
= nfs_buf_get(np
, lbn
, biosize
, thd
, NBLK_READ
, &bp
);
2202 nfs_data_unlock(np
);
2203 FSDBG_BOT(514, np
, 0xd1e000c, 0, error
);
2207 if ((ioflag
& IO_NOCACHE
) && ISSET(bp
->nb_flags
, NB_CACHE
)) {
2209 * IO_NOCACHE found a cached buffer.
2210 * Flush the buffer if it's dirty.
2211 * Invalidate the data if it wasn't just read
2212 * in as part of a "nocache readahead".
2214 if (bp
->nb_dirty
|| (bp
->nb_dirtyend
> 0)) {
2215 /* so write the buffer out and try again */
2216 SET(bp
->nb_flags
, NB_NOCACHE
);
2219 if (ISSET(bp
->nb_flags
, NB_NCRDAHEAD
)) {
2220 CLR(bp
->nb_flags
, NB_NCRDAHEAD
);
2221 SET(bp
->nb_flags
, NB_NOCACHE
);
2225 /* if any pages are valid... */
2227 /* ...check for any invalid pages in the read range */
2228 int pg
, firstpg
, lastpg
, dirtypg
;
2229 dirtypg
= firstpg
= lastpg
= -1;
2230 pg
= on
/ PAGE_SIZE
;
2231 while (pg
<= (on
+ n
- 1) / PAGE_SIZE
) {
2232 if (!NBPGVALID(bp
, pg
)) {
2237 } else if (firstpg
>= 0 && dirtypg
< 0 && NBPGDIRTY(bp
, pg
)) {
2243 /* if there are no invalid pages, we're all set */
2245 if (bp
->nb_validoff
< 0) {
2246 /* valid range isn't set up, so */
2247 /* set it to what we know is valid */
2248 bp
->nb_validoff
= trunc_page(on
);
2249 bp
->nb_validend
= round_page(on
+ n
);
2250 nfs_buf_normalize_valid_range(np
, bp
);
2255 /* there are invalid pages in the read range */
2256 if (((dirtypg
> firstpg
) && (dirtypg
< lastpg
)) ||
2257 (((firstpg
* PAGE_SIZE
) < bp
->nb_dirtyend
) && (((lastpg
+ 1) * PAGE_SIZE
) > bp
->nb_dirtyoff
))) {
2258 /* there are also dirty page(s) (or range) in the read range, */
2259 /* so write the buffer out and try again */
2261 CLR(bp
->nb_flags
, (NB_DONE
| NB_ERROR
| NB_INVAL
));
2262 SET(bp
->nb_flags
, NB_ASYNC
);
2263 if (!IS_VALID_CRED(bp
->nb_wcred
)) {
2264 kauth_cred_ref(cred
);
2265 bp
->nb_wcred
= cred
;
2267 error
= nfs_buf_write(bp
);
2269 nfs_data_unlock(np
);
2270 FSDBG_BOT(514, np
, 0xd1e000d, 0, error
);
2275 if (!bp
->nb_dirty
&& bp
->nb_dirtyend
<= 0 &&
2276 (lastpg
- firstpg
+ 1) > (biosize
/ PAGE_SIZE
) / 2) {
2277 /* we need to read in more than half the buffer and the */
2278 /* buffer's not dirty, so just fetch the whole buffer */
2281 /* read the page range in */
2283 char uio_buf
[UIO_SIZEOF(1)];
2286 auio
= uio_createwithbuffer(1, (NBOFF(bp
) + firstpg
* PAGE_SIZE_64
),
2287 UIO_SYSSPACE
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
2291 uio_addiov(auio
, CAST_USER_ADDR_T(bp
->nb_data
+ (firstpg
* PAGE_SIZE
)),
2292 ((lastpg
- firstpg
+ 1) * PAGE_SIZE
));
2293 error
= nfs_read_rpc(np
, auio
, ctx
);
2296 if (ioflag
& IO_NOCACHE
) {
2297 SET(bp
->nb_flags
, NB_NOCACHE
);
2299 nfs_buf_release(bp
, 1);
2300 nfs_data_unlock(np
);
2301 FSDBG_BOT(514, np
, 0xd1e000e, 0, error
);
2304 /* Make sure that the valid range is set to cover this read. */
2305 bp
->nb_validoff
= trunc_page_32(on
);
2306 bp
->nb_validend
= round_page_32(on
+ n
);
2307 nfs_buf_normalize_valid_range(np
, bp
);
2308 if (uio_resid(auio
) > 0) {
2309 /* if short read, must have hit EOF, */
2310 /* so zero the rest of the range */
2311 bzero(CAST_DOWN(caddr_t
, uio_curriovbase(auio
)), uio_resid(auio
));
2313 /* mark the pages (successfully read) as valid */
2314 for (pg
= firstpg
; pg
<= lastpg
; pg
++) {
2315 NBPGVALID_SET(bp
, pg
);
2319 /* if no pages are valid, read the whole block */
2320 if (!bp
->nb_valid
) {
2321 if (!IS_VALID_CRED(bp
->nb_rcred
) && IS_VALID_CRED(cred
)) {
2322 kauth_cred_ref(cred
);
2323 bp
->nb_rcred
= cred
;
2325 SET(bp
->nb_flags
, NB_READ
);
2326 CLR(bp
->nb_flags
, (NB_DONE
| NB_ERROR
| NB_INVAL
));
2327 error
= nfs_buf_read(bp
);
2328 if (ioflag
& IO_NOCACHE
) {
2329 SET(bp
->nb_flags
, NB_NOCACHE
);
2332 nfs_data_unlock(np
);
2333 nfs_buf_release(bp
, 1);
2334 FSDBG_BOT(514, np
, 0xd1e000f, 0, error
);
2339 /* validate read range against valid range and clip */
2340 if (bp
->nb_validend
> 0) {
2341 diff
= (on
>= bp
->nb_validend
) ? 0 : (bp
->nb_validend
- on
);
2348 error
= uiomove(bp
->nb_data
+ on
, n
, uio
);
2351 nfs_buf_release(bp
, 1);
2352 nfs_data_unlock(np
);
2353 nfs_node_lock_force(np
);
2354 np
->n_lastread
= (uio_offset(uio
) - 1) / biosize
;
2355 nfs_node_unlock(np
);
2356 } while (error
== 0 && uio_resid(uio
) > 0 && n
> 0);
2357 FSDBG_BOT(514, np
, uio_offset(uio
), uio_resid(uio
), error
);
2362 * limit the number of outstanding async I/O writes
2365 nfs_async_write_start(struct nfsmount
*nmp
)
2367 int error
= 0, slpflag
= NMFLAG(nmp
, INTR
) ? PCATCH
: 0;
2368 struct timespec ts
= {1, 0};
2370 if (nfs_max_async_writes
<= 0) {
2373 lck_mtx_lock(&nmp
->nm_lock
);
2374 while ((nfs_max_async_writes
> 0) && (nmp
->nm_asyncwrites
>= nfs_max_async_writes
)) {
2375 if ((error
= nfs_sigintr(nmp
, NULL
, current_thread(), 1))) {
2378 msleep(&nmp
->nm_asyncwrites
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfsasyncwrites", &ts
);
2382 nmp
->nm_asyncwrites
++;
2384 lck_mtx_unlock(&nmp
->nm_lock
);
2388 nfs_async_write_done(struct nfsmount
*nmp
)
2390 if (nmp
->nm_asyncwrites
<= 0) {
2393 lck_mtx_lock(&nmp
->nm_lock
);
2394 if (nmp
->nm_asyncwrites
-- >= nfs_max_async_writes
) {
2395 wakeup(&nmp
->nm_asyncwrites
);
2397 lck_mtx_unlock(&nmp
->nm_lock
);
2401 * write (or commit) the given NFS buffer
2403 * Commit the buffer if we can.
2404 * Write out any dirty range.
2405 * If any dirty pages remain, write them out.
2408 * For async requests, all the work beyond sending the initial
2409 * write RPC is handled in the RPC callback(s).
2412 nfs_buf_write(struct nfsbuf
*bp
)
2414 int error
= 0, oldflags
, async
;
2418 proc_t p
= current_proc();
2419 int iomode
, doff
, dend
, firstpg
, lastpg
;
2422 FSDBG_TOP(553, bp
, NBOFF(bp
), bp
->nb_flags
, 0);
2424 if (!ISSET(bp
->nb_lflags
, NBL_BUSY
)) {
2425 panic("nfs_buf_write: buffer is not busy???");
2429 async
= ISSET(bp
->nb_flags
, NB_ASYNC
);
2430 oldflags
= bp
->nb_flags
;
2432 CLR(bp
->nb_flags
, (NB_READ
| NB_DONE
| NB_ERROR
| NB_DELWRI
));
2433 if (ISSET(oldflags
, NB_DELWRI
)) {
2434 lck_mtx_lock(nfs_buf_mutex
);
2437 lck_mtx_unlock(nfs_buf_mutex
);
2438 wakeup(&nfs_nbdwrite
);
2441 /* move to clean list */
2442 if (ISSET(oldflags
, (NB_ASYNC
| NB_DELWRI
))) {
2443 lck_mtx_lock(nfs_buf_mutex
);
2444 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
2445 LIST_REMOVE(bp
, nb_vnbufs
);
2447 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
2448 lck_mtx_unlock(nfs_buf_mutex
);
2450 nfs_node_lock_force(np
);
2452 nfs_node_unlock(np
);
2453 vnode_startwrite(NFSTOV(np
));
2455 if (p
&& p
->p_stats
) {
2456 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_oublock
);
2459 cred
= bp
->nb_wcred
;
2460 if (!IS_VALID_CRED(cred
) && ISSET(bp
->nb_flags
, NB_READ
)) {
2461 cred
= bp
->nb_rcred
; /* shouldn't really happen, but... */
2463 if (IS_VALID_CRED(cred
)) {
2464 kauth_cred_ref(cred
);
2466 thd
= async
? NULL
: current_thread();
2468 /* We need to make sure the pages are locked before doing I/O. */
2469 if (!ISSET(bp
->nb_flags
, NB_META
)) {
2470 if (UBCINFOEXISTS(NFSTOV(np
))) {
2471 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
2472 error
= nfs_buf_upl_setup(bp
);
2474 printf("nfs_buf_write: upl create failed %d\n", error
);
2475 SET(bp
->nb_flags
, NB_ERROR
);
2476 bp
->nb_error
= error
= EIO
;
2480 nfs_buf_upl_check(bp
);
2483 /* We should never be in nfs_buf_write() with no UBCINFO. */
2484 printf("nfs_buf_write: ubcinfo already gone\n");
2485 SET(bp
->nb_flags
, NB_ERROR
);
2486 bp
->nb_error
= error
= EIO
;
2492 /* If NB_NEEDCOMMIT is set, a commit RPC may do the trick. */
2493 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
2494 nfs_buf_check_write_verifier(np
, bp
);
2496 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
2497 struct nfsmount
*nmp
= NFSTONMP(np
);
2498 if (nfs_mount_gone(nmp
)) {
2499 SET(bp
->nb_flags
, NB_ERROR
);
2500 bp
->nb_error
= error
= EIO
;
2504 SET(bp
->nb_flags
, NB_WRITEINPROG
);
2505 error
= nmp
->nm_funcs
->nf_commit_rpc(np
, NBOFF(bp
) + bp
->nb_dirtyoff
,
2506 bp
->nb_dirtyend
- bp
->nb_dirtyoff
, bp
->nb_wcred
, bp
->nb_verf
);
2507 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
2509 if (error
!= NFSERR_STALEWRITEVERF
) {
2510 SET(bp
->nb_flags
, NB_ERROR
);
2511 bp
->nb_error
= error
;
2516 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
2517 CLR(bp
->nb_flags
, NB_NEEDCOMMIT
);
2518 nfs_node_lock_force(np
);
2519 np
->n_needcommitcnt
--;
2520 CHECK_NEEDCOMMITCNT(np
);
2521 nfs_node_unlock(np
);
2523 if (!error
&& (bp
->nb_dirtyend
> 0)) {
2524 /* sanity check the dirty range */
2525 if (NBOFF(bp
) + bp
->nb_dirtyend
> (off_t
) np
->n_size
) {
2526 bp
->nb_dirtyend
= np
->n_size
- NBOFF(bp
);
2527 if (bp
->nb_dirtyoff
>= bp
->nb_dirtyend
) {
2528 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
2532 if (!error
&& (bp
->nb_dirtyend
> 0)) {
2533 /* there's a dirty range that needs to be written out */
2536 doff
= bp
->nb_dirtyoff
;
2537 dend
= bp
->nb_dirtyend
;
2539 /* if doff page is dirty, move doff to start of page */
2540 if (NBPGDIRTY(bp
, doff
/ PAGE_SIZE
)) {
2541 doff
-= doff
& PAGE_MASK
;
2543 /* try to expand write range to include preceding dirty pages */
2544 if (!(doff
& PAGE_MASK
)) {
2545 while ((doff
> 0) && NBPGDIRTY(bp
, (doff
- 1) / PAGE_SIZE
)) {
2549 /* if dend page is dirty, move dend to start of next page */
2550 if ((dend
& PAGE_MASK
) && NBPGDIRTY(bp
, dend
/ PAGE_SIZE
)) {
2551 dend
= round_page_32(dend
);
2553 /* try to expand write range to include trailing dirty pages */
2554 if (!(dend
& PAGE_MASK
)) {
2555 while ((dend
< (int)bp
->nb_bufsize
) && NBPGDIRTY(bp
, dend
/ PAGE_SIZE
)) {
2559 /* make sure to keep dend clipped to EOF */
2560 if ((NBOFF(bp
) + dend
) > (off_t
) np
->n_size
) {
2561 dend
= np
->n_size
- NBOFF(bp
);
2563 /* calculate range of complete pages being written */
2564 firstpg
= round_page_32(doff
) / PAGE_SIZE
;
2565 lastpg
= (trunc_page_32(dend
) - 1) / PAGE_SIZE
;
2566 /* calculate mask for that page range */
2567 pagemask
= ((1 << (lastpg
+ 1)) - 1) & ~((1 << firstpg
) - 1);
2570 * compare page mask to nb_dirty; if there are other dirty pages
2571 * then write FILESYNC; otherwise, write UNSTABLE if async and
2572 * not needcommit/stable; otherwise write FILESYNC
2574 if (bp
->nb_dirty
& ~pagemask
) {
2575 iomode
= NFS_WRITE_FILESYNC
;
2576 } else if ((bp
->nb_flags
& (NB_ASYNC
| NB_NEEDCOMMIT
| NB_STABLE
)) == NB_ASYNC
) {
2577 iomode
= NFS_WRITE_UNSTABLE
;
2579 iomode
= NFS_WRITE_FILESYNC
;
2582 /* write the whole contiguous dirty range */
2583 bp
->nb_offio
= doff
;
2584 bp
->nb_endio
= dend
;
2586 OSAddAtomic64(1, &nfsstats
.write_bios
);
2588 SET(bp
->nb_flags
, NB_WRITEINPROG
);
2589 error
= nfs_buf_write_rpc(bp
, iomode
, thd
, cred
);
2591 * For async I/O, the callbacks will finish up the
2592 * write and push out any dirty pages. Otherwise,
2593 * the write has already been finished and any dirty
2597 if (!error
&& bp
->nb_dirty
) { /* write out any dirty pages */
2598 error
= nfs_buf_write_dirty_pages(bp
, thd
, cred
);
2602 /* note: bp is still valid only for !async case */
2605 error
= nfs_buf_iowait(bp
);
2606 /* move to clean list */
2607 if (oldflags
& NB_DELWRI
) {
2608 lck_mtx_lock(nfs_buf_mutex
);
2609 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
2610 LIST_REMOVE(bp
, nb_vnbufs
);
2612 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
2613 lck_mtx_unlock(nfs_buf_mutex
);
2615 FSDBG_BOT(553, bp
, NBOFF(bp
), bp
->nb_flags
, error
);
2616 nfs_buf_release(bp
, 1);
2617 /* check if we need to invalidate (and we can) */
2618 if ((np
->n_flag
& NNEEDINVALIDATE
) &&
2619 !(np
->n_bflag
& (NBINVALINPROG
| NBFLUSHINPROG
))) {
2621 nfs_node_lock_force(np
);
2622 if (np
->n_flag
& NNEEDINVALIDATE
) {
2624 np
->n_flag
&= ~NNEEDINVALIDATE
;
2626 nfs_node_unlock(np
);
2629 * There was a write error and we need to
2630 * invalidate attrs and flush buffers in
2631 * order to sync up with the server.
2632 * (if this write was extending the file,
2633 * we may no longer know the correct size)
2635 * But we couldn't call vinvalbuf while holding
2636 * the buffer busy. So we call vinvalbuf() after
2637 * releasing the buffer.
2639 nfs_vinvalbuf2(NFSTOV(np
), V_SAVE
| V_IGNORE_WRITEERR
, thd
, cred
, 1);
2644 if (IS_VALID_CRED(cred
)) {
2645 kauth_cred_unref(&cred
);
2651 * finish the writing of a buffer
2654 nfs_buf_write_finish(struct nfsbuf
*bp
, thread_t thd
, kauth_cred_t cred
)
2656 nfsnode_t np
= bp
->nb_np
;
2657 int error
= (bp
->nb_flags
& NB_ERROR
) ? bp
->nb_error
: 0;
2658 int firstpg
, lastpg
;
2661 if ((error
== EINTR
) || (error
== ERESTART
)) {
2662 CLR(bp
->nb_flags
, NB_ERROR
);
2663 SET(bp
->nb_flags
, NB_EINTR
);
2667 /* calculate range of complete pages being written */
2668 firstpg
= round_page_32(bp
->nb_offio
) / PAGE_SIZE
;
2669 lastpg
= (trunc_page_32(bp
->nb_endio
) - 1) / PAGE_SIZE
;
2670 /* calculate mask for that page range written */
2671 pagemask
= ((1 << (lastpg
+ 1)) - 1) & ~((1 << firstpg
) - 1);
2672 /* clear dirty bits for pages we've written */
2673 bp
->nb_dirty
&= ~pagemask
;
2676 /* manage needcommit state */
2677 if (!error
&& (bp
->nb_commitlevel
== NFS_WRITE_UNSTABLE
)) {
2678 if (!ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
2679 nfs_node_lock_force(np
);
2680 np
->n_needcommitcnt
++;
2681 nfs_node_unlock(np
);
2682 SET(bp
->nb_flags
, NB_NEEDCOMMIT
);
2684 /* make sure nb_dirtyoff/nb_dirtyend reflect actual range written */
2685 bp
->nb_dirtyoff
= bp
->nb_offio
;
2686 bp
->nb_dirtyend
= bp
->nb_endio
;
2687 } else if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
2688 nfs_node_lock_force(np
);
2689 np
->n_needcommitcnt
--;
2690 CHECK_NEEDCOMMITCNT(np
);
2691 nfs_node_unlock(np
);
2692 CLR(bp
->nb_flags
, NB_NEEDCOMMIT
);
2695 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
2698 * For an unstable write, the buffer is still treated as dirty until
2699 * a commit (or stable (re)write) is performed. Buffers needing only
2700 * a commit are marked with the NB_DELWRI and NB_NEEDCOMMIT flags.
2702 * If the write was interrupted we set NB_EINTR. Don't set NB_ERROR
2703 * because that would cause the buffer to be dropped. The buffer is
2704 * still valid and simply needs to be written again.
2706 if ((error
== EINTR
) || (error
== ERESTART
) || (!error
&& (bp
->nb_flags
& NB_NEEDCOMMIT
))) {
2707 CLR(bp
->nb_flags
, NB_INVAL
);
2708 if (!ISSET(bp
->nb_flags
, NB_DELWRI
)) {
2709 SET(bp
->nb_flags
, NB_DELWRI
);
2710 lck_mtx_lock(nfs_buf_mutex
);
2713 lck_mtx_unlock(nfs_buf_mutex
);
2716 * Since for the NB_ASYNC case, we've reassigned the buffer to the
2717 * clean list, we have to reassign it back to the dirty one. Ugh.
2719 if (ISSET(bp
->nb_flags
, NB_ASYNC
)) {
2720 /* move to dirty list */
2721 lck_mtx_lock(nfs_buf_mutex
);
2722 if (bp
->nb_vnbufs
.le_next
!= NFSNOLIST
) {
2723 LIST_REMOVE(bp
, nb_vnbufs
);
2725 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
2726 lck_mtx_unlock(nfs_buf_mutex
);
2729 /* either there's an error or we don't need to commit */
2732 * There was a write error and we need to invalidate
2733 * attrs and flush buffers in order to sync up with the
2734 * server. (if this write was extending the file, we
2735 * may no longer know the correct size)
2737 * But we can't call vinvalbuf while holding this
2738 * buffer busy. Set a flag to do it after releasing
2741 nfs_node_lock_force(np
);
2742 np
->n_error
= error
;
2743 np
->n_flag
|= (NWRITEERR
| NNEEDINVALIDATE
);
2744 NATTRINVALIDATE(np
);
2745 nfs_node_unlock(np
);
2747 /* clear the dirty range */
2748 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
2751 if (!error
&& bp
->nb_dirty
) {
2752 nfs_buf_write_dirty_pages(bp
, thd
, cred
);
2758 * write out any pages marked dirty in a buffer
2760 * We do use unstable writes and follow up with a commit.
2761 * If we catch the write verifier changing we'll restart
2762 * do the writes filesync.
2765 nfs_buf_write_dirty_pages(struct nfsbuf
*bp
, thread_t thd
, kauth_cred_t cred
)
2767 nfsnode_t np
= bp
->nb_np
;
2768 struct nfsmount
*nmp
= NFSTONMP(np
);
2769 int error
= 0, commit
, iomode
, iomode2
, len
, pg
, count
, npages
, off
;
2770 uint32_t dirty
= bp
->nb_dirty
;
2773 char uio_buf
[UIO_SIZEOF(1)];
2775 if (!bp
->nb_dirty
) {
2779 /* there are pages marked dirty that need to be written out */
2780 OSAddAtomic64(1, &nfsstats
.write_bios
);
2782 SET(bp
->nb_flags
, NB_WRITEINPROG
);
2783 npages
= bp
->nb_bufsize
/ PAGE_SIZE
;
2784 iomode
= NFS_WRITE_UNSTABLE
;
2786 auio
= uio_createwithbuffer(1, 0, UIO_SYSSPACE
, UIO_WRITE
,
2787 &uio_buf
, sizeof(uio_buf
));
2790 dirty
= bp
->nb_dirty
;
2791 wverf
= bp
->nb_verf
;
2792 commit
= NFS_WRITE_FILESYNC
;
2793 for (pg
= 0; pg
< npages
; pg
++) {
2794 if (!NBPGDIRTY(bp
, pg
)) {
2798 while (((pg
+ count
) < npages
) && NBPGDIRTY(bp
, pg
+ count
)) {
2801 /* write count pages starting with page pg */
2802 off
= pg
* PAGE_SIZE
;
2803 len
= count
* PAGE_SIZE
;
2804 /* clip writes to EOF */
2805 if (NBOFF(bp
) + off
+ len
> (off_t
) np
->n_size
) {
2806 len
-= (NBOFF(bp
) + off
+ len
) - np
->n_size
;
2810 uio_reset(auio
, NBOFF(bp
) + off
, UIO_SYSSPACE
, UIO_WRITE
);
2811 uio_addiov(auio
, CAST_USER_ADDR_T(bp
->nb_data
+ off
), len
);
2812 error
= nfs_write_rpc2(np
, auio
, thd
, cred
, &iomode2
, &bp
->nb_verf
);
2816 if (iomode2
< commit
) { /* Retain the lowest commitment level returned. */
2819 if ((commit
!= NFS_WRITE_FILESYNC
) && (wverf
!= bp
->nb_verf
)) {
2820 /* verifier changed, redo all the writes filesync */
2821 iomode
= NFS_WRITE_FILESYNC
;
2825 /* clear dirty bits */
2827 dirty
&= ~(1 << pg
);
2828 if (count
) { /* leave pg on last page */
2833 CLR(bp
->nb_flags
, NB_WRITEINPROG
);
2835 if (!error
&& (commit
!= NFS_WRITE_FILESYNC
)) {
2836 error
= nmp
->nm_funcs
->nf_commit_rpc(np
, NBOFF(bp
), bp
->nb_bufsize
, cred
, wverf
);
2837 if (error
== NFSERR_STALEWRITEVERF
) {
2838 /* verifier changed, so we need to restart all the writes */
2839 iomode
= NFS_WRITE_FILESYNC
;
2844 bp
->nb_dirty
= dirty
;
2846 SET(bp
->nb_flags
, NB_ERROR
);
2847 bp
->nb_error
= error
;
2853 * initiate the NFS WRITE RPC(s) for a buffer
2856 nfs_buf_write_rpc(struct nfsbuf
*bp
, int iomode
, thread_t thd
, kauth_cred_t cred
)
2858 struct nfsmount
*nmp
;
2859 nfsnode_t np
= bp
->nb_np
;
2860 int error
= 0, nfsvers
, async
;
2862 uint32_t nmwsize
, length
, len
;
2864 struct nfsreq_cbinfo cb
;
2866 char uio_buf
[UIO_SIZEOF(1)];
2869 if (nfs_mount_gone(nmp
)) {
2870 bp
->nb_error
= error
= ENXIO
;
2871 SET(bp
->nb_flags
, NB_ERROR
);
2875 nfsvers
= nmp
->nm_vers
;
2876 nmwsize
= nmp
->nm_wsize
;
2878 offset
= bp
->nb_offio
;
2879 length
= bp
->nb_endio
- bp
->nb_offio
;
2881 /* Note: Can only do async I/O if nfsiods are configured. */
2882 async
= (bp
->nb_flags
& NB_ASYNC
) && (NFSIOD_MAX
> 0);
2883 bp
->nb_commitlevel
= NFS_WRITE_FILESYNC
;
2884 cb
.rcb_func
= async
? nfs_buf_write_rpc_finish
: NULL
;
2887 if ((nfsvers
== NFS_VER2
) && ((NBOFF(bp
) + bp
->nb_endio
) > 0xffffffffLL
)) {
2888 bp
->nb_error
= error
= EFBIG
;
2889 SET(bp
->nb_flags
, NB_ERROR
);
2894 auio
= uio_createwithbuffer(1, NBOFF(bp
) + offset
, UIO_SYSSPACE
,
2895 UIO_WRITE
, &uio_buf
, sizeof(uio_buf
));
2896 uio_addiov(auio
, CAST_USER_ADDR_T(bp
->nb_data
+ offset
), length
);
2898 bp
->nb_rpcs
= nrpcs
= (length
+ nmwsize
- 1) / nmwsize
;
2899 if (async
&& (nrpcs
> 1)) {
2900 SET(bp
->nb_flags
, NB_MULTASYNCRPC
);
2902 CLR(bp
->nb_flags
, NB_MULTASYNCRPC
);
2905 while (length
> 0) {
2906 if (ISSET(bp
->nb_flags
, NB_ERROR
)) {
2907 error
= bp
->nb_error
;
2910 len
= (length
> nmwsize
) ? nmwsize
: length
;
2911 cb
.rcb_args
[0] = offset
;
2912 cb
.rcb_args
[1] = len
;
2913 if (nmp
->nm_vers
>= NFS_VER4
) {
2914 cb
.rcb_args
[2] = nmp
->nm_stategenid
;
2916 if (async
&& ((error
= nfs_async_write_start(nmp
)))) {
2920 error
= nmp
->nm_funcs
->nf_write_rpc_async(np
, auio
, len
, thd
, cred
,
2924 nfs_async_write_done(nmp
);
2933 nfs_buf_write_rpc_finish(req
);
2938 * Something bad happened while trying to send the RPCs.
2939 * Wait for any outstanding requests to complete.
2941 bp
->nb_error
= error
;
2942 SET(bp
->nb_flags
, NB_ERROR
);
2943 if (ISSET(bp
->nb_flags
, NB_MULTASYNCRPC
)) {
2944 nrpcs
= (length
+ nmwsize
- 1) / nmwsize
;
2945 lck_mtx_lock(nfs_buf_mutex
);
2946 bp
->nb_rpcs
-= nrpcs
;
2947 if (bp
->nb_rpcs
== 0) {
2948 /* No RPCs left, so the buffer's done */
2949 lck_mtx_unlock(nfs_buf_mutex
);
2950 nfs_buf_write_finish(bp
, thd
, cred
);
2952 /* wait for the last RPC to mark it done */
2953 while (bp
->nb_rpcs
> 0) {
2954 msleep(&bp
->nb_rpcs
, nfs_buf_mutex
, 0,
2955 "nfs_buf_write_rpc_cancel", NULL
);
2957 lck_mtx_unlock(nfs_buf_mutex
);
2960 nfs_buf_write_finish(bp
, thd
, cred
);
2962 /* It may have just been an interrupt... that's OK */
2963 if (!ISSET(bp
->nb_flags
, NB_ERROR
)) {
2972 * finish up an NFS WRITE RPC on a buffer
2975 nfs_buf_write_rpc_finish(struct nfsreq
*req
)
2977 int error
= 0, nfsvers
, offset
, length
, multasyncrpc
, finished
;
2978 int committed
= NFS_WRITE_FILESYNC
;
2981 void *wakeme
= NULL
;
2982 struct nfsreq_cbinfo cb
;
2983 struct nfsreq
*wreq
= NULL
;
2985 struct nfsmount
*nmp
;
2990 char uio_buf
[UIO_SIZEOF(1)];
2994 thd
= req
->r_thread
;
2996 if (IS_VALID_CRED(cred
)) {
2997 kauth_cred_ref(cred
);
2999 cb
= req
->r_callback
;
3001 if (cb
.rcb_func
) { /* take an extra reference on the nfsreq in case we want to resend it later due to grace error */
3002 nfs_request_ref(req
, 0);
3006 if (nfs_mount_gone(nmp
)) {
3007 SET(bp
->nb_flags
, NB_ERROR
);
3008 bp
->nb_error
= error
= ENXIO
;
3010 if (error
|| ISSET(bp
->nb_flags
, NB_ERROR
)) {
3012 nfs_request_async_cancel(req
);
3015 nfsvers
= nmp
->nm_vers
;
3017 offset
= cb
.rcb_args
[0];
3018 rlen
= length
= cb
.rcb_args
[1];
3020 /* finish the RPC */
3021 error
= nmp
->nm_funcs
->nf_write_rpc_async_finish(np
, req
, &committed
, &rlen
, &wverf
);
3022 if ((error
== EINPROGRESS
) && cb
.rcb_func
) {
3023 /* async request restarted */
3025 nfs_request_rele(req
);
3027 if (IS_VALID_CRED(cred
)) {
3028 kauth_cred_unref(&cred
);
3032 if ((nmp
->nm_vers
>= NFS_VER4
) && nfs_mount_state_error_should_restart(error
) && !ISSET(bp
->nb_flags
, NB_ERROR
)) {
3033 lck_mtx_lock(&nmp
->nm_lock
);
3034 if ((error
!= NFSERR_OLD_STATEID
) && (error
!= NFSERR_GRACE
) && (cb
.rcb_args
[2] == nmp
->nm_stategenid
)) {
3035 NP(np
, "nfs_buf_write_rpc_finish: error %d @ 0x%llx, 0x%x 0x%x, initiating recovery",
3036 error
, NBOFF(bp
) + offset
, cb
.rcb_args
[2], nmp
->nm_stategenid
);
3037 nfs_need_recover(nmp
, error
);
3039 lck_mtx_unlock(&nmp
->nm_lock
);
3040 if (np
->n_flag
& NREVOKE
) {
3043 if (error
== NFSERR_GRACE
) {
3046 * For an async I/O request, handle a grace delay just like
3047 * jukebox errors. Set the resend time and queue it up.
3050 if (req
->r_nmrep
.nmc_mhead
) {
3051 mbuf_freem(req
->r_nmrep
.nmc_mhead
);
3052 req
->r_nmrep
.nmc_mhead
= NULL
;
3056 lck_mtx_lock(&req
->r_mtx
);
3057 req
->r_resendtime
= now
.tv_sec
+ 2;
3058 req
->r_xid
= 0; // get a new XID
3059 req
->r_flags
|= R_RESTART
;
3061 nfs_asyncio_resend(req
);
3062 lck_mtx_unlock(&req
->r_mtx
);
3063 if (IS_VALID_CRED(cred
)) {
3064 kauth_cred_unref(&cred
);
3066 /* Note: nfsreq reference taken will be dropped later when finished */
3069 /* otherwise, just pause a couple seconds and retry */
3070 tsleep(&nmp
->nm_state
, (PZERO
- 1), "nfsgrace", 2 * hz
);
3072 if (!(error
= nfs_mount_state_wait_for_recovery(nmp
))) {
3079 SET(bp
->nb_flags
, NB_ERROR
);
3080 bp
->nb_error
= error
;
3082 if (error
|| (nfsvers
== NFS_VER2
)) {
3086 SET(bp
->nb_flags
, NB_ERROR
);
3087 bp
->nb_error
= error
= EIO
;
3091 /* save lowest commit level returned */
3092 if (committed
< bp
->nb_commitlevel
) {
3093 bp
->nb_commitlevel
= committed
;
3096 /* check the write verifier */
3098 bp
->nb_verf
= wverf
;
3099 } else if (bp
->nb_verf
!= wverf
) {
3100 /* verifier changed, so buffer will need to be rewritten */
3101 bp
->nb_flags
|= NB_STALEWVERF
;
3102 bp
->nb_commitlevel
= NFS_WRITE_UNSTABLE
;
3103 bp
->nb_verf
= wverf
;
3107 * check for a short write
3109 * If the server didn't write all the data, then we
3110 * need to issue another write for the rest of it.
3111 * (Don't bother if the buffer hit an error or stale wverf.)
3113 if (((int)rlen
< length
) && !(bp
->nb_flags
& (NB_STALEWVERF
| NB_ERROR
))) {
3118 auio
= uio_createwithbuffer(1, NBOFF(bp
) + offset
, UIO_SYSSPACE
,
3119 UIO_WRITE
, &uio_buf
, sizeof(uio_buf
));
3120 uio_addiov(auio
, CAST_USER_ADDR_T(bp
->nb_data
+ offset
), length
);
3122 cb
.rcb_args
[0] = offset
;
3123 cb
.rcb_args
[1] = length
;
3124 if (nmp
->nm_vers
>= NFS_VER4
) {
3125 cb
.rcb_args
[2] = nmp
->nm_stategenid
;
3128 // XXX iomode should really match the original request
3129 error
= nmp
->nm_funcs
->nf_write_rpc_async(np
, auio
, length
, thd
, cred
,
3130 NFS_WRITE_FILESYNC
, &cb
, &wreq
);
3132 if (IS_VALID_CRED(cred
)) {
3133 kauth_cred_unref(&cred
);
3136 /* if !async we'll need to wait for this RPC to finish */
3141 nfs_request_rele(req
);
3144 * Outstanding RPC count is unchanged.
3145 * Callback will be called when RPC is done.
3149 SET(bp
->nb_flags
, NB_ERROR
);
3150 bp
->nb_error
= error
;
3155 nfs_async_write_done(nmp
);
3156 nfs_request_rele(req
);
3159 * Decrement outstanding RPC count on buffer
3160 * and call nfs_buf_write_finish on last RPC.
3162 * (Note: when there are multiple async RPCs issued for a
3163 * buffer we need nfs_buffer_mutex to avoid problems when
3164 * aborting a partially-initiated set of RPCs)
3166 multasyncrpc
= ISSET(bp
->nb_flags
, NB_MULTASYNCRPC
);
3168 lck_mtx_lock(nfs_buf_mutex
);
3172 finished
= (bp
->nb_rpcs
== 0);
3175 lck_mtx_unlock(nfs_buf_mutex
);
3180 wakeme
= &bp
->nb_rpcs
;
3182 nfs_buf_write_finish(bp
, thd
, cred
);
3188 if (IS_VALID_CRED(cred
)) {
3189 kauth_cred_unref(&cred
);
3194 * Send commit(s) for the given node's "needcommit" buffers
3197 nfs_flushcommits(nfsnode_t np
, int nowait
)
3199 struct nfsmount
*nmp
;
3200 struct nfsbuf
*bp
, *prevlbp
, *lbp
;
3201 struct nfsbuflists blist
, commitlist
;
3202 int error
= 0, retv
, wcred_set
, flags
, dirty
;
3203 u_quad_t off
, endoff
, toff
;
3206 kauth_cred_t wcred
= NULL
;
3208 FSDBG_TOP(557, np
, 0, 0, 0);
3211 * A nb_flags == (NB_DELWRI | NB_NEEDCOMMIT) block has been written to the
3212 * server, but nas not been committed to stable storage on the server
3213 * yet. The byte range is worked out for as many nfsbufs as we can handle
3214 * and the commit rpc is done.
3216 if (!LIST_EMPTY(&np
->n_dirtyblkhd
)) {
3217 error
= nfs_node_lock(np
);
3221 np
->n_flag
|= NMODIFIED
;
3222 nfs_node_unlock(np
);
3228 LIST_INIT(&commitlist
);
3231 if (nfs_mount_gone(nmp
)) {
3235 if (nmp
->nm_vers
== NFS_VER2
) {
3242 flags
|= NBI_NOWAIT
;
3244 lck_mtx_lock(nfs_buf_mutex
);
3245 wverf
= nmp
->nm_verf
;
3246 if (!nfs_buf_iterprepare(np
, &blist
, flags
)) {
3247 while ((bp
= LIST_FIRST(&blist
))) {
3248 LIST_REMOVE(bp
, nb_vnbufs
);
3249 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
3250 error
= nfs_buf_acquire(bp
, NBAC_NOWAIT
, 0, 0);
3254 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
3255 nfs_buf_check_write_verifier(np
, bp
);
3257 if (((bp
->nb_flags
& (NB_DELWRI
| NB_NEEDCOMMIT
)) != (NB_DELWRI
| NB_NEEDCOMMIT
)) ||
3258 (bp
->nb_verf
!= wverf
)) {
3262 nfs_buf_remfree(bp
);
3264 /* buffer UPLs will be grabbed *in order* below */
3266 FSDBG(557, bp
, bp
->nb_flags
, bp
->nb_valid
, bp
->nb_dirty
);
3267 FSDBG(557, bp
->nb_validoff
, bp
->nb_validend
,
3268 bp
->nb_dirtyoff
, bp
->nb_dirtyend
);
3271 * Work out if all buffers are using the same cred
3272 * so we can deal with them all with one commit.
3274 * Note: creds in bp's must be obtained by kauth_cred_ref
3275 * on the same original cred in order for them to be equal.
3277 if (wcred_set
== 0) {
3278 wcred
= bp
->nb_wcred
;
3279 if (!IS_VALID_CRED(wcred
)) {
3280 panic("nfs: needcommit w/out wcred");
3283 } else if ((wcred_set
== 1) && wcred
!= bp
->nb_wcred
) {
3286 SET(bp
->nb_flags
, NB_WRITEINPROG
);
3289 * Add this buffer to the list of buffers we are committing.
3290 * Buffers are inserted into the list in ascending order so that
3291 * we can take the UPLs in order after the list is complete.
3294 LIST_FOREACH(lbp
, &commitlist
, nb_vnbufs
) {
3295 if (bp
->nb_lblkno
< lbp
->nb_lblkno
) {
3300 LIST_REMOVE(bp
, nb_vnbufs
);
3302 LIST_INSERT_AFTER(prevlbp
, bp
, nb_vnbufs
);
3304 LIST_INSERT_HEAD(&commitlist
, bp
, nb_vnbufs
);
3307 /* update commit range start, end */
3308 toff
= NBOFF(bp
) + bp
->nb_dirtyoff
;
3312 toff
+= (u_quad_t
)(bp
->nb_dirtyend
- bp
->nb_dirtyoff
);
3313 if (toff
> endoff
) {
3317 nfs_buf_itercomplete(np
, &blist
, NBI_DIRTY
);
3319 lck_mtx_unlock(nfs_buf_mutex
);
3321 if (LIST_EMPTY(&commitlist
)) {
3327 * We need a UPL to prevent others from accessing the buffers during
3328 * our commit RPC(s).
3330 * We used to also check for dirty pages here; if there were any we'd
3331 * abort the commit and force the entire buffer to be written again.
3332 * Instead of doing that, we just go ahead and commit the dirty range,
3333 * and then leave the buffer around with dirty pages that will be
3334 * written out later.
3336 LIST_FOREACH(bp
, &commitlist
, nb_vnbufs
) {
3337 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
3338 retv
= nfs_buf_upl_setup(bp
);
3340 /* Unable to create the UPL, the VM object probably no longer exists. */
3341 printf("nfs_flushcommits: upl create failed %d\n", retv
);
3342 bp
->nb_valid
= bp
->nb_dirty
= 0;
3345 nfs_buf_upl_check(bp
);
3349 * Commit data on the server, as required.
3350 * If all bufs are using the same wcred, then use that with
3351 * one call for all of them, otherwise commit each one
3354 if (wcred_set
== 1) {
3356 * Note, it's possible the commit range could be >2^32-1.
3357 * If it is, we'll send one commit that covers the whole file.
3359 if ((endoff
- off
) > 0xffffffff) {
3362 count
= (endoff
- off
);
3364 retv
= nmp
->nm_funcs
->nf_commit_rpc(np
, off
, count
, wcred
, wverf
);
3367 LIST_FOREACH(bp
, &commitlist
, nb_vnbufs
) {
3368 toff
= NBOFF(bp
) + bp
->nb_dirtyoff
;
3369 count
= bp
->nb_dirtyend
- bp
->nb_dirtyoff
;
3370 retv
= nmp
->nm_funcs
->nf_commit_rpc(np
, toff
, count
, bp
->nb_wcred
, wverf
);
3378 * Now, either mark the blocks I/O done or mark the
3379 * blocks dirty, depending on whether the commit
3382 while ((bp
= LIST_FIRST(&commitlist
))) {
3383 LIST_REMOVE(bp
, nb_vnbufs
);
3384 FSDBG(557, bp
, retv
, bp
->nb_flags
, bp
->nb_dirty
);
3385 nfs_node_lock_force(np
);
3386 CLR(bp
->nb_flags
, (NB_NEEDCOMMIT
| NB_WRITEINPROG
));
3387 np
->n_needcommitcnt
--;
3388 CHECK_NEEDCOMMITCNT(np
);
3389 nfs_node_unlock(np
);
3392 /* move back to dirty list */
3393 lck_mtx_lock(nfs_buf_mutex
);
3394 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
3395 lck_mtx_unlock(nfs_buf_mutex
);
3396 nfs_buf_release(bp
, 1);
3400 nfs_node_lock_force(np
);
3402 nfs_node_unlock(np
);
3403 vnode_startwrite(NFSTOV(np
));
3404 if (ISSET(bp
->nb_flags
, NB_DELWRI
)) {
3405 lck_mtx_lock(nfs_buf_mutex
);
3408 lck_mtx_unlock(nfs_buf_mutex
);
3409 wakeup(&nfs_nbdwrite
);
3411 CLR(bp
->nb_flags
, (NB_READ
| NB_DONE
| NB_ERROR
| NB_DELWRI
));
3412 /* if block still has dirty pages, we don't want it to */
3413 /* be released in nfs_buf_iodone(). So, don't set NB_ASYNC. */
3414 if (!(dirty
= bp
->nb_dirty
)) {
3415 SET(bp
->nb_flags
, NB_ASYNC
);
3417 CLR(bp
->nb_flags
, NB_ASYNC
);
3420 /* move to clean list */
3421 lck_mtx_lock(nfs_buf_mutex
);
3422 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
3423 lck_mtx_unlock(nfs_buf_mutex
);
3425 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
3429 /* throw it back in as a delayed write buffer */
3430 CLR(bp
->nb_flags
, NB_DONE
);
3431 nfs_buf_write_delayed(bp
);
3436 FSDBG_BOT(557, np
, 0, 0, error
);
3441 * Flush all the blocks associated with a vnode.
3442 * Walk through the buffer pool and push any dirty pages
3443 * associated with the vnode.
3446 nfs_flush(nfsnode_t np
, int waitfor
, thread_t thd
, int ignore_writeerr
)
3449 struct nfsbuflists blist
;
3450 struct nfsmount
*nmp
= NFSTONMP(np
);
3451 int error
= 0, error2
, slptimeo
= 0, slpflag
= 0;
3452 int nfsvers
, flags
, passone
= 1;
3454 FSDBG_TOP(517, np
, waitfor
, ignore_writeerr
, 0);
3456 if (nfs_mount_gone(nmp
)) {
3460 nfsvers
= nmp
->nm_vers
;
3461 if (NMFLAG(nmp
, INTR
)) {
3465 if (!LIST_EMPTY(&np
->n_dirtyblkhd
)) {
3466 nfs_node_lock_force(np
);
3467 np
->n_flag
|= NMODIFIED
;
3468 nfs_node_unlock(np
);
3471 lck_mtx_lock(nfs_buf_mutex
);
3472 while (np
->n_bflag
& NBFLUSHINPROG
) {
3473 np
->n_bflag
|= NBFLUSHWANT
;
3474 error
= msleep(&np
->n_bflag
, nfs_buf_mutex
, slpflag
, "nfs_flush", NULL
);
3475 if ((error
&& (error
!= EWOULDBLOCK
)) ||
3476 ((error
= nfs_sigintr(NFSTONMP(np
), NULL
, thd
, 0)))) {
3477 lck_mtx_unlock(nfs_buf_mutex
);
3481 np
->n_bflag
|= NBFLUSHINPROG
;
3484 * On the first pass, start async/unstable writes on all
3485 * delayed write buffers. Then wait for all writes to complete
3486 * and call nfs_flushcommits() to commit any uncommitted buffers.
3487 * On all subsequent passes, start STABLE writes on any remaining
3488 * dirty buffers. Then wait for all writes to complete.
3491 FSDBG(518, LIST_FIRST(&np
->n_dirtyblkhd
), np
->n_flag
, 0, 0);
3492 if (!NFSTONMP(np
)) {
3493 lck_mtx_unlock(nfs_buf_mutex
);
3498 /* Start/do any write(s) that are required. */
3499 if (!nfs_buf_iterprepare(np
, &blist
, NBI_DIRTY
)) {
3500 while ((bp
= LIST_FIRST(&blist
))) {
3501 LIST_REMOVE(bp
, nb_vnbufs
);
3502 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
3503 flags
= (passone
|| !(waitfor
== MNT_WAIT
|| waitfor
== MNT_DWAIT
)) ? NBAC_NOWAIT
: 0;
3504 if (flags
!= NBAC_NOWAIT
) {
3507 while ((error
= nfs_buf_acquire(bp
, flags
, slpflag
, slptimeo
))) {
3508 FSDBG(524, bp
, flags
, bp
->nb_lflags
, bp
->nb_flags
);
3509 if (error
== EBUSY
) {
3513 error2
= nfs_sigintr(NFSTONMP(np
), NULL
, thd
, 0);
3515 if (flags
!= NBAC_NOWAIT
) {
3516 nfs_buf_refrele(bp
);
3518 nfs_buf_itercomplete(np
, &blist
, NBI_DIRTY
);
3519 lck_mtx_unlock(nfs_buf_mutex
);
3523 if (slpflag
== PCATCH
) {
3529 if (flags
!= NBAC_NOWAIT
) {
3530 nfs_buf_refrele(bp
);
3532 if (error
== EBUSY
) {
3536 /* buffer is no longer valid */
3540 if (ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
3541 nfs_buf_check_write_verifier(np
, bp
);
3543 if (!ISSET(bp
->nb_flags
, NB_DELWRI
)) {
3544 /* buffer is no longer dirty */
3548 FSDBG(525, bp
, passone
, bp
->nb_lflags
, bp
->nb_flags
);
3549 if ((passone
|| !(waitfor
== MNT_WAIT
|| waitfor
== MNT_DWAIT
)) &&
3550 ISSET(bp
->nb_flags
, NB_NEEDCOMMIT
)) {
3554 nfs_buf_remfree(bp
);
3555 lck_mtx_unlock(nfs_buf_mutex
);
3556 if (ISSET(bp
->nb_flags
, NB_ERROR
)) {
3557 nfs_node_lock_force(np
);
3558 np
->n_error
= bp
->nb_error
? bp
->nb_error
: EIO
;
3559 np
->n_flag
|= NWRITEERR
;
3560 nfs_node_unlock(np
);
3561 nfs_buf_release(bp
, 1);
3562 lck_mtx_lock(nfs_buf_mutex
);
3565 SET(bp
->nb_flags
, NB_ASYNC
);
3567 /* NB_STABLE forces this to be written FILESYNC */
3568 SET(bp
->nb_flags
, NB_STABLE
);
3571 lck_mtx_lock(nfs_buf_mutex
);
3573 nfs_buf_itercomplete(np
, &blist
, NBI_DIRTY
);
3575 lck_mtx_unlock(nfs_buf_mutex
);
3577 if (waitfor
== MNT_WAIT
|| waitfor
== MNT_DWAIT
) {
3578 while ((error
= vnode_waitforwrites(NFSTOV(np
), 0, slpflag
, slptimeo
, "nfsflush"))) {
3579 error2
= nfs_sigintr(NFSTONMP(np
), NULL
, thd
, 0);
3584 if (slpflag
== PCATCH
) {
3591 if (nfsvers
!= NFS_VER2
) {
3592 /* loop while it looks like there are still buffers to be */
3593 /* commited and nfs_flushcommits() seems to be handling them. */
3594 while (np
->n_needcommitcnt
) {
3595 if (nfs_flushcommits(np
, 0)) {
3603 if (!LIST_EMPTY(&np
->n_dirtyblkhd
)) {
3604 nfs_node_lock_force(np
);
3605 np
->n_flag
|= NMODIFIED
;
3606 nfs_node_unlock(np
);
3608 lck_mtx_lock(nfs_buf_mutex
);
3612 if (waitfor
== MNT_WAIT
|| waitfor
== MNT_DWAIT
) {
3613 if (!LIST_EMPTY(&np
->n_dirtyblkhd
)) {
3614 nfs_node_lock_force(np
);
3615 np
->n_flag
|= NMODIFIED
;
3616 nfs_node_unlock(np
);
3618 lck_mtx_lock(nfs_buf_mutex
);
3619 if (!LIST_EMPTY(&np
->n_dirtyblkhd
)) {
3622 lck_mtx_unlock(nfs_buf_mutex
);
3623 nfs_node_lock_force(np
);
3625 * OK, it looks like there are no dirty blocks. If we have no
3626 * writes in flight and no one in the write code, we can clear
3627 * the modified flag. In order to make sure we see the latest
3628 * attributes and size, we also invalidate the attributes and
3629 * advance the attribute cache XID to guarantee that attributes
3630 * newer than our clearing of NMODIFIED will get loaded next.
3631 * (If we don't do this, it's possible for the flush's final
3632 * write/commit (xid1) to be executed in parallel with a subsequent
3633 * getattr request (xid2). The getattr could return attributes
3634 * from *before* the write/commit completed but the stale attributes
3635 * would be preferred because of the xid ordering.)
3637 if (!np
->n_wrbusy
&& !np
->n_numoutput
) {
3638 np
->n_flag
&= ~NMODIFIED
;
3639 NATTRINVALIDATE(np
);
3640 nfs_get_xid(&np
->n_xid
);
3643 nfs_node_lock_force(np
);
3646 FSDBG(526, np
->n_flag
, np
->n_error
, 0, 0);
3647 if (!ignore_writeerr
&& (np
->n_flag
& NWRITEERR
)) {
3648 error
= np
->n_error
;
3649 np
->n_flag
&= ~NWRITEERR
;
3651 nfs_node_unlock(np
);
3653 lck_mtx_lock(nfs_buf_mutex
);
3654 flags
= np
->n_bflag
;
3655 np
->n_bflag
&= ~(NBFLUSHINPROG
| NBFLUSHWANT
);
3656 lck_mtx_unlock(nfs_buf_mutex
);
3657 if (flags
& NBFLUSHWANT
) {
3658 wakeup(&np
->n_bflag
);
3661 FSDBG_BOT(517, np
, error
, ignore_writeerr
, 0);
3666 * Flush out and invalidate all buffers associated with a vnode.
3667 * Called with the underlying object locked.
3670 nfs_vinvalbuf_internal(
3679 struct nfsbuflists blist
;
3680 int list
, error
= 0;
3682 if (flags
& V_SAVE
) {
3683 if ((error
= nfs_flush(np
, MNT_WAIT
, thd
, (flags
& V_IGNORE_WRITEERR
)))) {
3688 lck_mtx_lock(nfs_buf_mutex
);
3691 if (nfs_buf_iterprepare(np
, &blist
, list
)) {
3693 if (nfs_buf_iterprepare(np
, &blist
, list
)) {
3697 while ((bp
= LIST_FIRST(&blist
))) {
3698 LIST_REMOVE(bp
, nb_vnbufs
);
3699 if (list
== NBI_CLEAN
) {
3700 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
3702 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
3705 while ((error
= nfs_buf_acquire(bp
, NBAC_REMOVE
, slpflag
, slptimeo
))) {
3706 FSDBG(556, np
, bp
, NBOFF(bp
), bp
->nb_flags
);
3707 if (error
!= EAGAIN
) {
3708 FSDBG(554, np
, bp
, -1, error
);
3709 nfs_buf_refrele(bp
);
3710 nfs_buf_itercomplete(np
, &blist
, list
);
3711 lck_mtx_unlock(nfs_buf_mutex
);
3715 nfs_buf_refrele(bp
);
3716 FSDBG(554, np
, bp
, NBOFF(bp
), bp
->nb_flags
);
3717 lck_mtx_unlock(nfs_buf_mutex
);
3718 if ((flags
& V_SAVE
) && UBCINFOEXISTS(NFSTOV(np
)) && bp
->nb_np
&&
3719 (NBOFF(bp
) < (off_t
)np
->n_size
)) {
3720 /* extra paranoia: make sure we're not */
3721 /* somehow leaving any dirty data around */
3723 int end
= (NBOFF(bp
) + bp
->nb_bufsize
> (off_t
)np
->n_size
) ?
3724 ((off_t
)np
->n_size
- NBOFF(bp
)) : bp
->nb_bufsize
;
3725 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
3726 error
= nfs_buf_upl_setup(bp
);
3727 if (error
== EINVAL
) {
3728 /* vm object must no longer exist */
3729 /* hopefully we don't need to do */
3730 /* anything for this buffer */
3732 printf("nfs_vinvalbuf: upl setup failed %d\n", error
);
3734 bp
->nb_valid
= bp
->nb_dirty
= 0;
3736 nfs_buf_upl_check(bp
);
3737 /* check for any dirty data before the EOF */
3738 if ((bp
->nb_dirtyend
> 0) && (bp
->nb_dirtyoff
< end
)) {
3739 /* clip dirty range to EOF */
3740 if (bp
->nb_dirtyend
> end
) {
3741 bp
->nb_dirtyend
= end
;
3742 if (bp
->nb_dirtyoff
>= bp
->nb_dirtyend
) {
3743 bp
->nb_dirtyoff
= bp
->nb_dirtyend
= 0;
3746 if ((bp
->nb_dirtyend
> 0) && (bp
->nb_dirtyoff
< end
)) {
3750 bp
->nb_dirty
&= (1 << (round_page_32(end
) / PAGE_SIZE
)) - 1;
3754 /* also make sure we'll have a credential to do the write */
3755 if (mustwrite
&& !IS_VALID_CRED(bp
->nb_wcred
) && !IS_VALID_CRED(cred
)) {
3756 printf("nfs_vinvalbuf: found dirty buffer with no write creds\n");
3760 FSDBG(554, np
, bp
, 0xd00dee, bp
->nb_flags
);
3761 if (!ISSET(bp
->nb_flags
, NB_PAGELIST
)) {
3762 panic("nfs_vinvalbuf: dirty buffer without upl");
3764 /* gotta write out dirty data before invalidating */
3765 /* (NB_STABLE indicates that data writes should be FILESYNC) */
3766 /* (NB_NOCACHE indicates buffer should be discarded) */
3767 CLR(bp
->nb_flags
, (NB_DONE
| NB_ERROR
| NB_INVAL
| NB_ASYNC
));
3768 SET(bp
->nb_flags
, NB_STABLE
| NB_NOCACHE
);
3769 if (!IS_VALID_CRED(bp
->nb_wcred
)) {
3770 kauth_cred_ref(cred
);
3771 bp
->nb_wcred
= cred
;
3773 error
= nfs_buf_write(bp
);
3774 // Note: bp has been released
3776 FSDBG(554, bp
, 0xd00dee, 0xbad, error
);
3777 nfs_node_lock_force(np
);
3778 if ((error
!= EINTR
) && (error
!= ERESTART
)) {
3779 np
->n_error
= error
;
3780 np
->n_flag
|= NWRITEERR
;
3783 * There was a write error and we need to
3784 * invalidate attrs to sync with server.
3785 * (if this write was extending the file,
3786 * we may no longer know the correct size)
3788 NATTRINVALIDATE(np
);
3789 nfs_node_unlock(np
);
3790 if ((error
== EINTR
) || (error
== ERESTART
)) {
3792 * Abort on EINTR. If we don't, we could
3793 * be stuck in this loop forever because
3794 * the buffer will continue to stay dirty.
3796 lck_mtx_lock(nfs_buf_mutex
);
3797 nfs_buf_itercomplete(np
, &blist
, list
);
3798 lck_mtx_unlock(nfs_buf_mutex
);
3803 lck_mtx_lock(nfs_buf_mutex
);
3807 SET(bp
->nb_flags
, NB_INVAL
);
3808 // hold off on FREEUPs until we're done here
3809 nfs_buf_release(bp
, 0);
3810 lck_mtx_lock(nfs_buf_mutex
);
3812 nfs_buf_itercomplete(np
, &blist
, list
);
3814 if (!LIST_EMPTY(&(np
)->n_dirtyblkhd
) || !LIST_EMPTY(&(np
)->n_cleanblkhd
)) {
3815 panic("nfs_vinvalbuf: flush/inval failed");
3817 lck_mtx_unlock(nfs_buf_mutex
);
3818 nfs_node_lock_force(np
);
3819 if (!(flags
& V_SAVE
)) {
3820 np
->n_flag
&= ~NMODIFIED
;
3822 if (vnode_vtype(NFSTOV(np
)) == VREG
) {
3823 np
->n_lastrahead
= -1;
3825 nfs_node_unlock(np
);
3832 * Flush and invalidate all dirty buffers. If another process is already
3833 * doing the flush, just wait for completion.
3836 nfs_vinvalbuf(vnode_t vp
, int flags
, vfs_context_t ctx
, int intrflg
)
3838 return nfs_vinvalbuf2(vp
, flags
, vfs_context_thread(ctx
), vfs_context_ucred(ctx
), intrflg
);
3842 nfs_vinvalbuf2(vnode_t vp
, int flags
, thread_t thd
, kauth_cred_t cred
, int intrflg
)
3844 nfsnode_t np
= VTONFS(vp
);
3845 struct nfsmount
*nmp
= VTONMP(vp
);
3846 int error
, slpflag
, slptimeo
, nflags
, retry
= 0;
3847 int ubcflags
= UBC_PUSHALL
| UBC_SYNC
| UBC_INVALIDATE
;
3848 struct timespec ts
= { 2, 0 };
3851 FSDBG_TOP(554, np
, flags
, intrflg
, 0);
3854 * If the mount is gone no sense to try and write anything.
3855 * and hang trying to do IO.
3857 if (nfs_mount_gone(nmp
)) {
3859 ubcflags
&= ~UBC_PUSHALL
;
3862 if (nmp
&& !NMFLAG(nmp
, INTR
)) {
3873 /* First wait for any other process doing a flush to complete. */
3874 lck_mtx_lock(nfs_buf_mutex
);
3875 while (np
->n_bflag
& NBINVALINPROG
) {
3876 np
->n_bflag
|= NBINVALWANT
;
3877 msleep(&np
->n_bflag
, nfs_buf_mutex
, slpflag
, "nfs_vinvalbuf", &ts
);
3878 if ((error
= nfs_sigintr(VTONMP(vp
), NULL
, thd
, 0))) {
3879 lck_mtx_unlock(nfs_buf_mutex
);
3882 if (np
->n_bflag
& NBINVALINPROG
) {
3886 np
->n_bflag
|= NBINVALINPROG
;
3887 lck_mtx_unlock(nfs_buf_mutex
);
3889 /* Now, flush as required. */
3891 error
= nfs_vinvalbuf_internal(np
, flags
, thd
, cred
, slpflag
, 0);
3893 FSDBG(554, np
, 0, 0, error
);
3894 if ((error
= nfs_sigintr(VTONMP(vp
), NULL
, thd
, 0))) {
3897 error
= nfs_vinvalbuf_internal(np
, flags
, thd
, cred
, 0, slptimeo
);
3900 /* get the pages out of vm also */
3901 if (UBCINFOEXISTS(vp
) && (size
= ubc_getsize(vp
))) {
3902 if ((error
= ubc_msync(vp
, 0, size
, NULL
, ubcflags
))) {
3903 if (error
== EINVAL
) {
3904 panic("nfs_vinvalbuf(): ubc_msync failed!, error %d", error
);
3906 if (retry
++ < 10) { /* retry invalidating a few times */
3907 if (retry
> 1 || error
== ENXIO
) {
3908 ubcflags
&= ~UBC_PUSHALL
;
3913 printf("nfs_vinvalbuf(): ubc_msync failed!, error %d\n", error
);
3917 lck_mtx_lock(nfs_buf_mutex
);
3918 nflags
= np
->n_bflag
;
3919 np
->n_bflag
&= ~(NBINVALINPROG
| NBINVALWANT
);
3920 lck_mtx_unlock(nfs_buf_mutex
);
3921 if (nflags
& NBINVALWANT
) {
3922 wakeup(&np
->n_bflag
);
3925 FSDBG_BOT(554, np
, flags
, intrflg
, error
);
3930 * Wait for any busy buffers to complete.
3933 nfs_wait_bufs(nfsnode_t np
)
3936 struct nfsbuflists blist
;
3939 lck_mtx_lock(nfs_buf_mutex
);
3940 if (!nfs_buf_iterprepare(np
, &blist
, NBI_CLEAN
)) {
3941 while ((bp
= LIST_FIRST(&blist
))) {
3942 LIST_REMOVE(bp
, nb_vnbufs
);
3943 LIST_INSERT_HEAD(&np
->n_cleanblkhd
, bp
, nb_vnbufs
);
3945 while ((error
= nfs_buf_acquire(bp
, 0, 0, 0))) {
3946 if (error
!= EAGAIN
) {
3947 nfs_buf_refrele(bp
);
3948 nfs_buf_itercomplete(np
, &blist
, NBI_CLEAN
);
3949 lck_mtx_unlock(nfs_buf_mutex
);
3953 nfs_buf_refrele(bp
);
3956 nfs_buf_itercomplete(np
, &blist
, NBI_CLEAN
);
3958 if (!nfs_buf_iterprepare(np
, &blist
, NBI_DIRTY
)) {
3959 while ((bp
= LIST_FIRST(&blist
))) {
3960 LIST_REMOVE(bp
, nb_vnbufs
);
3961 LIST_INSERT_HEAD(&np
->n_dirtyblkhd
, bp
, nb_vnbufs
);
3963 while ((error
= nfs_buf_acquire(bp
, 0, 0, 0))) {
3964 if (error
!= EAGAIN
) {
3965 nfs_buf_refrele(bp
);
3966 nfs_buf_itercomplete(np
, &blist
, NBI_DIRTY
);
3967 lck_mtx_unlock(nfs_buf_mutex
);
3971 nfs_buf_refrele(bp
);
3974 nfs_buf_itercomplete(np
, &blist
, NBI_DIRTY
);
3976 lck_mtx_unlock(nfs_buf_mutex
);
3981 * Add an async I/O request to the mount's async I/O queue and make
3982 * sure that an nfsiod will service it.
3985 nfs_asyncio_finish(struct nfsreq
*req
)
3987 struct nfsmount
*nmp
;
3988 struct nfsiod
*niod
;
3991 FSDBG_TOP(552, nmp
, 0, 0, 0);
3999 lck_mtx_lock(nfsiod_mutex
);
4000 niod
= nmp
->nm_niod
;
4002 /* grab an nfsiod if we don't have one already */
4004 niod
= TAILQ_FIRST(&nfsiodfree
);
4006 TAILQ_REMOVE(&nfsiodfree
, niod
, niod_link
);
4007 TAILQ_INSERT_TAIL(&nfsiodwork
, niod
, niod_link
);
4008 niod
->niod_nmp
= nmp
;
4009 } else if (((nfsiod_thread_count
< NFSIOD_MAX
) || (nfsiod_thread_count
<= 0)) && (started
< 4)) {
4011 * Try starting a new thread.
4012 * We may try a couple times if other callers
4013 * get the new threads before we do.
4015 lck_mtx_unlock(nfsiod_mutex
);
4017 if (!nfsiod_start()) {
4020 lck_mtx_lock(nfsiod_mutex
);
4025 * If we got here while being on the resendq we need to get off. This
4026 * happens when the timer fires and errors out requests from nfs_sigintr
4027 * or we receive a reply (UDP case) while being on the resend queue so
4028 * we're just finishing up and are not going to be resent.
4030 lck_mtx_lock(&req
->r_mtx
);
4031 if (req
->r_flags
& R_RESENDQ
) {
4032 lck_mtx_lock(&nmp
->nm_lock
);
4033 if (req
->r_rchain
.tqe_next
!= NFSREQNOLIST
) {
4034 NFS_BIO_DBG("Proccessing async request on resendq. Removing");
4035 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
4036 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
4037 assert(req
->r_refs
> 1);
4038 /* Remove resendq reference */
4041 lck_mtx_unlock(&nmp
->nm_lock
);
4042 req
->r_flags
&= ~R_RESENDQ
;
4044 lck_mtx_unlock(&req
->r_mtx
);
4046 if (req
->r_achain
.tqe_next
== NFSREQNOLIST
) {
4047 TAILQ_INSERT_TAIL(&nmp
->nm_iodq
, req
, r_achain
);
4050 /* If this mount doesn't already have an nfsiod working on it... */
4051 if (!nmp
->nm_niod
) {
4052 if (niod
) { /* give it the nfsiod we just grabbed */
4053 nmp
->nm_niod
= niod
;
4054 lck_mtx_unlock(nfsiod_mutex
);
4056 } else if (nfsiod_thread_count
> 0) {
4057 /* just queue it up on nfsiod mounts queue if needed */
4058 if (nmp
->nm_iodlink
.tqe_next
== NFSNOLIST
) {
4059 TAILQ_INSERT_TAIL(&nfsiodmounts
, nmp
, nm_iodlink
);
4061 lck_mtx_unlock(nfsiod_mutex
);
4063 printf("nfs_asyncio(): no nfsiods? %d %d (%d)\n", nfsiod_thread_count
, NFSIOD_MAX
, started
);
4064 lck_mtx_unlock(nfsiod_mutex
);
4065 /* we have no other option but to be persistent */
4070 lck_mtx_unlock(nfsiod_mutex
);
4073 FSDBG_BOT(552, nmp
, 0, 0, 0);
4077 * queue up async I/O request for resend
4080 nfs_asyncio_resend(struct nfsreq
*req
)
4082 struct nfsmount
*nmp
= req
->r_nmp
;
4084 if (nfs_mount_gone(nmp
)) {
4088 nfs_gss_clnt_rpcdone(req
);
4089 lck_mtx_lock(&nmp
->nm_lock
);
4090 if (!(req
->r_flags
& R_RESENDQ
)) {
4091 TAILQ_INSERT_TAIL(&nmp
->nm_resendq
, req
, r_rchain
);
4092 req
->r_flags
|= R_RESENDQ
;
4094 * We take a reference on this request so that it can't be
4095 * destroyed while a resend is queued or in progress.
4097 nfs_request_ref(req
, 1);
4099 nfs_mount_sock_thread_wake(nmp
);
4100 lck_mtx_unlock(&nmp
->nm_lock
);
4104 * Read directory data into a buffer.
4106 * Buffer will be filled (unless EOF is hit).
4107 * Buffers after this one may also be completely/partially filled.
4110 nfs_buf_readdir(struct nfsbuf
*bp
, vfs_context_t ctx
)
4112 nfsnode_t np
= bp
->nb_np
;
4113 struct nfsmount
*nmp
= NFSTONMP(np
);
4116 if (nfs_mount_gone(nmp
)) {
4120 if (nmp
->nm_vers
< NFS_VER4
) {
4121 error
= nfs3_readdir_rpc(np
, bp
, ctx
);
4123 error
= nfs4_readdir_rpc(np
, bp
, ctx
);
4126 if (error
&& (error
!= NFSERR_DIRBUFDROPPED
)) {
4127 SET(bp
->nb_flags
, NB_ERROR
);
4128 bp
->nb_error
= error
;