2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
69 * Socket operations for use by nfs
72 #include <sys/param.h>
73 #include <sys/systm.h>
75 #include <sys/kauth.h>
76 #include <sys/mount_internal.h>
77 #include <sys/kernel.h>
78 #include <sys/kpi_mbuf.h>
79 #include <sys/malloc.h>
80 #include <sys/vnode.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/socket.h>
84 #include <sys/syslog.h>
85 #include <sys/tprintf.h>
86 #include <sys/uio_internal.h>
87 #include <libkern/OSAtomic.h>
90 #include <kern/clock.h>
91 #include <kern/task.h>
92 #include <kern/thread.h>
93 #include <kern/thread_call.h>
96 #include <netinet/in.h>
97 #include <netinet/tcp.h>
99 #include <nfs/rpcv2.h>
100 #include <nfs/nfsproto.h>
102 #include <nfs/xdr_subs.h>
103 #include <nfs/nfsm_subs.h>
104 #include <nfs/nfs_gss.h>
105 #include <nfs/nfsmount.h>
106 #include <nfs/nfsnode.h>
109 boolean_t
current_thread_aborted(void);
110 kern_return_t
thread_terminate(thread_t
);
114 int nfsrv_sock_max_rec_queue_length
= 128; /* max # RPC records queued on (UDP) socket */
116 static int nfsrv_getstream(struct nfsrv_sock
*,int);
117 static int nfsrv_getreq(struct nfsrv_descript
*);
118 extern int nfsv3_procid
[NFS_NPROCS
];
119 #endif /* NFSSERVER */
123 static int nfs_connect_setup(struct nfsmount
*);
124 static void nfs_reqdequeue(struct nfsreq
*);
125 static void nfs_udp_rcv(socket_t
, void*, int);
126 static void nfs_tcp_rcv(socket_t
, void*, int);
127 static void nfs_request_match_reply(struct nfsmount
*, mbuf_t
);
128 static void nfs_softterm(struct nfsreq
*);
130 #ifdef NFS_SOCKET_DEBUGGING
131 #define NFS_SOCK_DBG(X) printf X
133 #define NFS_SOCK_DBG(X)
137 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
138 * Use the mean and mean deviation of rtt for the appropriate type of rpc
139 * for the frequent rpcs and a default for the others.
140 * The justification for doing "other" this way is that these rpcs
141 * happen so infrequently that timer est. would probably be stale.
142 * Also, since many of these rpcs are
143 * non-idempotent, a conservative timeout is desired.
144 * getattr, lookup - A+2D
148 #define NFS_RTO(n, t) \
149 ((t) == 0 ? (n)->nm_timeo : \
151 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
152 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
153 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
154 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
157 * Defines which timer to use for the procnum.
164 static int proct
[NFS_NPROCS
] = {
165 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
169 * There is a congestion window for outstanding rpcs maintained per mount
170 * point. The cwnd size is adjusted in roughly the way that:
171 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
172 * SIGCOMM '88". ACM, August 1988.
173 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
174 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
175 * of rpcs is in progress.
176 * (The sent count and cwnd are scaled for integer arith.)
177 * Variants of "slow start" were tried and were found to be too much of a
178 * performance hit (ave. rtt 3 times larger),
179 * I suspect due to the large rtt that nfs rpcs have.
181 #define NFS_CWNDSCALE 256
182 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
183 static int nfs_backoff
[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
186 * Initialize socket state and perform setup for a new NFS connection.
189 nfs_connect(struct nfsmount
*nmp
)
192 int error
, on
= 1, proto
;
194 struct sockaddr
*saddr
;
195 struct sockaddr_in sin
;
196 struct timeval timeo
;
199 lck_mtx_lock(&nmp
->nm_lock
);
200 nmp
->nm_sockflags
|= NMSOCK_CONNECTING
;
201 saddr
= mbuf_data(nmp
->nm_nam
);
202 upcall
= (nmp
->nm_sotype
== SOCK_STREAM
) ? nfs_tcp_rcv
: nfs_udp_rcv
;
203 lck_mtx_unlock(&nmp
->nm_lock
);
204 error
= sock_socket(saddr
->sa_family
, nmp
->nm_sotype
,
205 nmp
->nm_soproto
, upcall
, nmp
, &nmp
->nm_so
);
208 lck_mtx_lock(&nmp
->nm_lock
);
212 * Some servers require that the client port be a reserved port number.
214 if (saddr
->sa_family
== AF_INET
&& (nmp
->nm_flag
& NFSMNT_RESVPORT
)) {
215 lck_mtx_unlock(&nmp
->nm_lock
);
216 sin
.sin_len
= sizeof (struct sockaddr_in
);
217 sin
.sin_family
= AF_INET
;
218 sin
.sin_addr
.s_addr
= INADDR_ANY
;
219 tport
= IPPORT_RESERVED
- 1;
220 sin
.sin_port
= htons(tport
);
221 while (((error
= sock_bind(so
, (struct sockaddr
*) &sin
)) == EADDRINUSE
) &&
222 (--tport
> IPPORT_RESERVED
/ 2))
223 sin
.sin_port
= htons(tport
);
226 lck_mtx_lock(&nmp
->nm_lock
);
230 * Protocols that do not require connections may be optionally left
231 * unconnected for servers that reply from a different address/port.
233 if (nmp
->nm_flag
& NFSMNT_NOCONN
) {
234 if (nmp
->nm_sotype
== SOCK_STREAM
) {
236 lck_mtx_unlock(&nmp
->nm_lock
);
240 int tocnt
= 0, optlen
= sizeof(error
);
241 struct timespec ts
= { 2, 0 };
243 lck_mtx_unlock(&nmp
->nm_lock
);
244 error
= sock_connect(so
, mbuf_data(nmp
->nm_nam
), MSG_DONTWAIT
);
245 if (error
&& (error
!= EINPROGRESS
))
247 lck_mtx_lock(&nmp
->nm_lock
);
248 while (!sock_isconnected(so
)) {
249 if (tocnt
++ == 15) /* log a warning if connect is taking a while */
250 log(LOG_INFO
, "nfs_connect: socket connect taking a while for %s\n",
251 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
252 /* check for error on socket */
253 sock_getsockopt(so
, SOL_SOCKET
, SO_ERROR
, &error
, &optlen
);
255 log(LOG_INFO
, "nfs_connect: socket error %d for %s\n",
256 error
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
260 /* abort if this is taking too long */
264 if ((error
= nfs_sigintr(nmp
, NULL
, current_thread(), 1)))
266 msleep(&nmp
->nm_so
, &nmp
->nm_lock
, PSOCK
, "nfs_socket_connect", &ts
);
269 log(LOG_INFO
, "nfs_connect: socket connect %s for %s\n",
270 error
? "aborted" : "completed",
271 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
273 lck_mtx_unlock(&nmp
->nm_lock
);
279 * Set socket send/receive timeouts
280 * - Receive timeout shouldn't matter because all receives are performed
281 * in the socket upcall non-blocking.
282 * - Send timeout should allow us to react to a blocked socket.
283 * Soft mounts will want to abort sooner.
286 timeo
.tv_sec
= (nmp
->nm_flag
& NFSMNT_SOFT
) ? 10 : 60;
287 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
288 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
290 log(LOG_INFO
, "nfs_connect: socket timeout setting errors for %s\n",
291 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
295 if (nmp
->nm_sotype
== SOCK_STREAM
) {
296 /* Assume that SOCK_STREAM always requires a connection */
297 sock_setsockopt(so
, SOL_SOCKET
, SO_KEEPALIVE
, &on
, sizeof(on
));
298 /* set nodelay for TCP */
299 sock_gettype(so
, NULL
, NULL
, &proto
);
300 if (proto
== IPPROTO_TCP
)
301 sock_setsockopt(so
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
304 if (nmp
->nm_sotype
== SOCK_DGRAM
) { /* set socket buffer sizes for UDP */
305 int reserve
= NFS_UDPSOCKBUF
;
306 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDBUF
, &reserve
, sizeof(reserve
));
307 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVBUF
, &reserve
, sizeof(reserve
));
309 log(LOG_INFO
, "nfs_connect: socket buffer setting errors for %s\n",
310 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
315 /* set SO_NOADDRERR to detect network changes ASAP */
316 error
= sock_setsockopt(so
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
));
318 lck_mtx_unlock(&nmp
->nm_lock
);
321 /* just playin' it safe */
322 sock_setsockopt(so
, SOL_SOCKET
, SO_UPCALLCLOSEWAIT
, &on
, sizeof(on
));
324 if (!(nmp
->nm_flag
& NFSMNT_INT
))
325 sock_nointerrupt(so
, 1);
327 /* Initialize socket state variables */
328 nmp
->nm_srtt
[0] = nmp
->nm_srtt
[1] = nmp
->nm_srtt
[2] =
329 nmp
->nm_srtt
[3] = (NFS_TIMEO
<< 3);
330 nmp
->nm_sdrtt
[0] = nmp
->nm_sdrtt
[1] = nmp
->nm_sdrtt
[2] =
331 nmp
->nm_sdrtt
[3] = 0;
332 if (nmp
->nm_sotype
== SOCK_DGRAM
) {
333 /* XXX do we really want to reset this on each reconnect? */
334 nmp
->nm_cwnd
= NFS_MAXCWND
/ 2; /* Initial send window */
336 } else if (nmp
->nm_sotype
== SOCK_STREAM
) {
337 nmp
->nm_markerleft
= sizeof(nmp
->nm_fragleft
);
338 nmp
->nm_fragleft
= nmp
->nm_reclen
= 0;
339 nmp
->nm_timeouts
= 0;
341 nmp
->nm_sockflags
&= ~NMSOCK_CONNECTING
;
342 nmp
->nm_sockflags
|= NMSOCK_SETUP
;
343 FSDBG(529, nmp
, nmp
->nm_state
, nmp
->nm_flag
, nmp
->nm_cwnd
);
344 lck_mtx_unlock(&nmp
->nm_lock
);
345 error
= nfs_connect_setup(nmp
);
347 lck_mtx_lock(&nmp
->nm_lock
);
348 nmp
->nm_sockflags
&= ~(NMSOCK_CONNECTING
|NMSOCK_SETUP
);
350 nmp
->nm_sockflags
|= NMSOCK_READY
;
351 wakeup(&nmp
->nm_sockflags
);
353 lck_mtx_unlock(&nmp
->nm_lock
);
359 /* setup & confirm socket connection is functional */
361 nfs_connect_setup(struct nfsmount
*nmp
)
363 struct nfsm_chain nmreq
, nmrep
;
364 int error
= 0, status
;
367 if (nmp
->nm_vers
>= NFS_VER4
) {
368 error
= nfs4_setclientid(nmp
);
370 /* verify connection's OK by sending a NULL request */
371 nfsm_chain_null(&nmreq
);
372 nfsm_chain_null(&nmrep
);
373 nfsm_chain_build_alloc_init(error
, &nmreq
, 0);
374 nfsm_chain_build_done(error
, &nmreq
);
376 error
= nfs_request2(NULL
, nmp
->nm_mountp
, &nmreq
, NFSPROC_NULL
,
377 current_thread(), NULL
, R_SETUP
, &nmrep
, &xid
, &status
);
381 nfsm_chain_cleanup(&nmreq
);
382 nfsm_chain_cleanup(&nmrep
);
388 * NFS socket reconnect routine:
389 * Called when a connection is broken.
390 * - disconnect the old socket
391 * - nfs_connect() again
392 * - set R_MUSTRESEND for all outstanding requests on mount point
393 * If this fails the mount point is DEAD!
396 nfs_reconnect(struct nfsmount
*nmp
)
400 thread_t thd
= current_thread();
401 int error
, lastmsg
, wentdown
= 0;
404 lastmsg
= now
.tv_sec
- (nmp
->nm_tprintf_delay
- nmp
->nm_tprintf_initial_delay
);
408 while ((error
= nfs_connect(nmp
))) {
409 if (error
== EINTR
|| error
== ERESTART
)
414 if ((lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
) {
415 lastmsg
= now
.tv_sec
;
416 nfs_down(nmp
, thd
, error
, NFSSTA_TIMEO
, "can not connect");
419 lck_mtx_lock(&nmp
->nm_lock
);
420 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
421 /* we're not yet completely mounted and */
422 /* we can't reconnect, so we fail */
423 lck_mtx_unlock(&nmp
->nm_lock
);
426 if ((error
= nfs_sigintr(nmp
, NULL
, thd
, 1))) {
427 lck_mtx_unlock(&nmp
->nm_lock
);
430 lck_mtx_unlock(&nmp
->nm_lock
);
431 tsleep(&lbolt
, PSOCK
, "nfs_reconnect_delay", 0);
432 if ((error
= nfs_sigintr(nmp
, NULL
, thd
, 0)))
437 nfs_up(nmp
, thd
, NFSSTA_TIMEO
, "connected");
440 * Loop through outstanding request list and mark all requests
441 * as needing a resend. (Though nfs_need_reconnect() probably
442 * marked them all already.)
444 lck_mtx_lock(nfs_request_mutex
);
445 TAILQ_FOREACH(rq
, &nfs_reqq
, r_chain
) {
446 if (rq
->r_nmp
== nmp
) {
447 lck_mtx_lock(&rq
->r_mtx
);
448 if (!rq
->r_error
&& !rq
->r_nmrep
.nmc_mhead
&& !(rq
->r_flags
& R_MUSTRESEND
)) {
449 rq
->r_flags
|= R_MUSTRESEND
;
452 if ((rq
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
)) == R_ASYNC
)
453 nfs_asyncio_resend(rq
);
455 lck_mtx_unlock(&rq
->r_mtx
);
458 lck_mtx_unlock(nfs_request_mutex
);
463 * NFS disconnect. Clean up and unlink.
466 nfs_disconnect(struct nfsmount
*nmp
)
470 lck_mtx_lock(&nmp
->nm_lock
);
471 if ((nmp
->nm_sotype
== SOCK_STREAM
) && nmp
->nm_m
) {
472 mbuf_freem(nmp
->nm_m
);
473 nmp
->nm_m
= nmp
->nm_mlast
= NULL
;
478 lck_mtx_unlock(&nmp
->nm_lock
);
479 sock_shutdown(so
, SHUT_RDWR
);
482 lck_mtx_unlock(&nmp
->nm_lock
);
487 * mark an NFS mount as needing a reconnect/resends.
490 nfs_need_reconnect(struct nfsmount
*nmp
)
494 lck_mtx_lock(&nmp
->nm_lock
);
495 nmp
->nm_sockflags
&= ~(NMSOCK_READY
|NMSOCK_SETUP
);
496 lck_mtx_unlock(&nmp
->nm_lock
);
499 * Loop through outstanding request list and
500 * mark all requests as needing a resend.
502 lck_mtx_lock(nfs_request_mutex
);
503 TAILQ_FOREACH(rq
, &nfs_reqq
, r_chain
) {
504 if (rq
->r_nmp
== nmp
) {
505 lck_mtx_lock(&rq
->r_mtx
);
506 if (!rq
->r_error
&& !rq
->r_nmrep
.nmc_mhead
&& !(rq
->r_flags
& R_MUSTRESEND
)) {
507 rq
->r_flags
|= R_MUSTRESEND
;
510 if ((rq
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
)) == R_ASYNC
)
511 nfs_asyncio_resend(rq
);
513 lck_mtx_unlock(&rq
->r_mtx
);
516 lck_mtx_unlock(nfs_request_mutex
);
520 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
523 nfs_mount_sock_thread(void *arg
, __unused wait_result_t wr
)
525 struct nfsmount
*nmp
= arg
;
526 struct timespec ts
= { 30, 0 };
527 thread_t thd
= current_thread();
530 int error
, dofinish
, force
;
532 lck_mtx_lock(&nmp
->nm_lock
);
534 while (!(nmp
->nm_sockflags
& NMSOCK_READY
) || !TAILQ_EMPTY(&nmp
->nm_resendq
)) {
535 if (nmp
->nm_sockflags
& NMSOCK_UNMOUNT
)
537 force
= (nmp
->nm_state
& NFSSTA_FORCE
);
538 /* do reconnect, if necessary */
539 if (!(nmp
->nm_sockflags
& NMSOCK_READY
) && !force
) {
540 if (nmp
->nm_reconnect_start
<= 0) {
542 nmp
->nm_reconnect_start
= now
.tv_sec
;
544 lck_mtx_unlock(&nmp
->nm_lock
);
545 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
));
546 if ((error
= nfs_reconnect(nmp
)))
547 printf("nfs_reconnect failed %d for %s\n", error
,
548 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
550 nmp
->nm_reconnect_start
= 0;
551 lck_mtx_lock(&nmp
->nm_lock
);
553 /* do resends, if necessary/possible */
554 while (((nmp
->nm_sockflags
& NMSOCK_READY
) || force
) && ((req
= TAILQ_FIRST(&nmp
->nm_resendq
)))) {
555 if (req
->r_resendtime
)
557 while (req
&& !force
&& req
->r_resendtime
&& (now
.tv_sec
< req
->r_resendtime
))
558 req
= TAILQ_NEXT(req
, r_rchain
);
561 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
562 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
563 lck_mtx_unlock(&nmp
->nm_lock
);
564 lck_mtx_lock(&req
->r_mtx
);
565 if (req
->r_error
|| req
->r_nmrep
.nmc_mhead
) {
566 dofinish
= req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
567 req
->r_flags
&= ~R_RESENDQ
;
569 lck_mtx_unlock(&req
->r_mtx
);
571 nfs_asyncio_finish(req
);
572 lck_mtx_lock(&nmp
->nm_lock
);
575 if ((req
->r_flags
& R_RESTART
) || req
->r_gss_ctx
) {
576 req
->r_flags
&= ~R_RESTART
;
577 req
->r_resendtime
= 0;
578 lck_mtx_unlock(&req
->r_mtx
);
579 /* async RPCs on GSS mounts need to be rebuilt and resent. */
581 if (req
->r_gss_ctx
) {
582 nfs_gss_clnt_rpcdone(req
);
583 error
= nfs_gss_clnt_args_restore(req
);
584 if (error
== ENEEDAUTH
)
587 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
588 req
->r_gss_ctx
? " gss" : "", req
->r_procnum
, req
->r_xid
,
589 req
->r_flags
, req
->r_rtt
));
590 error
= !req
->r_nmp
? ENXIO
: 0; /* unmounted? */
592 error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0);
594 error
= nfs_request_add_header(req
);
596 error
= nfs_request_send(req
, 0);
597 lck_mtx_lock(&req
->r_mtx
);
598 if (req
->r_rchain
.tqe_next
== NFSREQNOLIST
)
599 req
->r_flags
&= ~R_RESENDQ
;
601 req
->r_error
= error
;
603 dofinish
= error
&& req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
604 lck_mtx_unlock(&req
->r_mtx
);
606 nfs_asyncio_finish(req
);
607 lck_mtx_lock(&nmp
->nm_lock
);
611 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
612 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
613 error
= !req
->r_nmp
? ENXIO
: 0; /* unmounted? */
615 error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0);
617 lck_mtx_unlock(&req
->r_mtx
);
618 error
= nfs_send(req
, 0);
619 lck_mtx_lock(&req
->r_mtx
);
621 if (req
->r_rchain
.tqe_next
== NFSREQNOLIST
)
622 req
->r_flags
&= ~R_RESENDQ
;
624 lck_mtx_unlock(&req
->r_mtx
);
625 lck_mtx_lock(&nmp
->nm_lock
);
629 req
->r_error
= error
;
630 if (req
->r_rchain
.tqe_next
== NFSREQNOLIST
)
631 req
->r_flags
&= ~R_RESENDQ
;
633 dofinish
= req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
634 lck_mtx_unlock(&req
->r_mtx
);
636 nfs_asyncio_finish(req
);
637 lck_mtx_lock(&nmp
->nm_lock
);
639 if (nmp
->nm_sockflags
& NMSOCK_READY
) {
640 ts
.tv_sec
= TAILQ_EMPTY(&nmp
->nm_resendq
) ? 30 : 1;
641 msleep(&nmp
->nm_sockthd
, &nmp
->nm_lock
, PSOCK
, "nfssockthread", &ts
);
646 if (nmp
->nm_sockthd
== thd
)
647 nmp
->nm_sockthd
= NULL
;
648 lck_mtx_unlock(&nmp
->nm_lock
);
649 wakeup(&nmp
->nm_sockthd
);
650 thread_terminate(thd
);
653 /* start or wake a mount's socket thread */
655 nfs_mount_sock_thread_wake(struct nfsmount
*nmp
)
658 wakeup(&nmp
->nm_sockthd
);
659 else if (kernel_thread_start(nfs_mount_sock_thread
, nmp
, &nmp
->nm_sockthd
) == KERN_SUCCESS
)
660 thread_deallocate(nmp
->nm_sockthd
);
664 * The NFS client send routine.
666 * Send the given NFS request out the mount's socket.
667 * Holds nfs_sndlock() for the duration of this call.
669 * - check for request termination (sigintr)
670 * - perform reconnect, if necessary
671 * - UDP: check the congestion window
672 * - make a copy of the request to send
673 * - UDP: update the congestion window
676 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
677 * rexmit count is also updated if this isn't the first send.
679 * If the send is not successful, make sure R_MUSTRESEND is set.
680 * If this wasn't the first transmit, set R_RESENDERR.
681 * Also, undo any UDP congestion window changes made.
683 * If the error appears to indicate that the socket should
684 * be reconnected, mark the socket for reconnection.
686 * Only return errors when the request should be aborted.
689 nfs_send(struct nfsreq
*req
, int wait
)
691 struct nfsmount
*nmp
;
693 int error
, error2
, sotype
, rexmit
, slpflag
= 0, needrecon
;
695 struct sockaddr
*sendnam
;
698 struct timespec ts
= { 2, 0 };
701 error
= nfs_sndlock(req
);
705 error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0);
711 sotype
= nmp
->nm_sotype
;
713 if ((req
->r_flags
& R_SETUP
) && !(nmp
->nm_sockflags
& NMSOCK_SETUP
)) {
714 /* a setup RPC but we're not in SETUP... must need reconnect */
719 /* If the socket needs reconnection, do that now. */
720 /* wait until socket is ready - unless this request is part of setup */
721 lck_mtx_lock(&nmp
->nm_lock
);
722 if (!(nmp
->nm_sockflags
& NMSOCK_READY
) &&
723 !((nmp
->nm_sockflags
& NMSOCK_SETUP
) && (req
->r_flags
& R_SETUP
))) {
724 if (nmp
->nm_flag
& NFSMNT_INT
)
726 lck_mtx_unlock(&nmp
->nm_lock
);
729 lck_mtx_lock(&req
->r_mtx
);
730 req
->r_flags
|= R_MUSTRESEND
;
732 lck_mtx_unlock(&req
->r_mtx
);
735 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req
->r_xid
));
736 lck_mtx_lock(&req
->r_mtx
);
737 req
->r_flags
&= ~R_MUSTRESEND
;
739 lck_mtx_unlock(&req
->r_mtx
);
740 lck_mtx_lock(&nmp
->nm_lock
);
741 while (!(nmp
->nm_sockflags
& NMSOCK_READY
)) {
742 /* don't bother waiting if the socket thread won't be reconnecting it */
743 if (nmp
->nm_state
& NFSSTA_FORCE
) {
747 /* make sure socket thread is running, then wait */
748 nfs_mount_sock_thread_wake(nmp
);
749 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 1)))
751 msleep(req
, &nmp
->nm_lock
, slpflag
|PSOCK
, "nfsconnectwait", &ts
);
754 lck_mtx_unlock(&nmp
->nm_lock
);
760 lck_mtx_unlock(&nmp
->nm_lock
);
763 lck_mtx_lock(&req
->r_mtx
);
764 req
->r_flags
|= R_MUSTRESEND
;
766 lck_mtx_unlock(&req
->r_mtx
);
770 lck_mtx_lock(&req
->r_mtx
);
771 rexmit
= (req
->r_flags
& R_SENT
);
773 if (sotype
== SOCK_DGRAM
) {
774 lck_mtx_lock(&nmp
->nm_lock
);
775 if (!(req
->r_flags
& R_CWND
) && (nmp
->nm_sent
>= nmp
->nm_cwnd
)) {
776 /* if we can't send this out yet, wait on the cwnd queue */
777 slpflag
= ((nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
) ? PCATCH
: 0;
778 lck_mtx_unlock(&nmp
->nm_lock
);
780 req
->r_flags
|= R_MUSTRESEND
;
781 lck_mtx_unlock(&req
->r_mtx
);
786 lck_mtx_lock(&nmp
->nm_lock
);
787 while (nmp
->nm_sent
>= nmp
->nm_cwnd
) {
788 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 1)))
790 TAILQ_INSERT_TAIL(&nmp
->nm_cwndq
, req
, r_cchain
);
791 msleep(req
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfswaitcwnd", &ts
);
793 if ((req
->r_cchain
.tqe_next
!= NFSREQNOLIST
)) {
794 TAILQ_REMOVE(&nmp
->nm_cwndq
, req
, r_cchain
);
795 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
798 lck_mtx_unlock(&nmp
->nm_lock
);
802 * We update these *before* the send to avoid racing
803 * against others who may be looking to send requests.
807 req
->r_flags
|= R_CWND
;
808 nmp
->nm_sent
+= NFS_CWNDSCALE
;
811 * When retransmitting, turn timing off
812 * and divide congestion window by 2.
814 req
->r_flags
&= ~R_TIMING
;
816 if (nmp
->nm_cwnd
< NFS_CWNDSCALE
)
817 nmp
->nm_cwnd
= NFS_CWNDSCALE
;
819 lck_mtx_unlock(&nmp
->nm_lock
);
822 req
->r_flags
&= ~R_MUSTRESEND
;
823 lck_mtx_unlock(&req
->r_mtx
);
825 error
= mbuf_copym(req
->r_mhead
, 0, MBUF_COPYALL
,
826 wait
? MBUF_WAITOK
: MBUF_DONTWAIT
, &mreqcopy
);
829 log(LOG_INFO
, "nfs_send: mbuf copy failed %d\n", error
);
831 lck_mtx_lock(&req
->r_mtx
);
832 req
->r_flags
|= R_MUSTRESEND
;
834 lck_mtx_unlock(&req
->r_mtx
);
838 bzero(&msg
, sizeof(msg
));
839 if (nmp
->nm_nam
&& (sotype
!= SOCK_STREAM
) && !sock_isconnected(so
)) {
840 if ((sendnam
= mbuf_data(nmp
->nm_nam
))) {
841 msg
.msg_name
= (caddr_t
)sendnam
;
842 msg
.msg_namelen
= sendnam
->sa_len
;
845 error
= sock_sendmbuf(so
, &msg
, mreqcopy
, 0, &sentlen
);
846 #ifdef NFS_SOCKET_DEBUGGING
847 if (error
|| (sentlen
!= req
->r_mreqlen
))
848 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
849 req
->r_xid
, (int)sentlen
, (int)req
->r_mreqlen
, error
));
851 if (!error
&& (sentlen
!= req
->r_mreqlen
))
853 needrecon
= ((sotype
== SOCK_STREAM
) && sentlen
&& (sentlen
!= req
->r_mreqlen
));
855 lck_mtx_lock(&req
->r_mtx
);
857 if (rexmit
&& (++req
->r_rexmit
> NFS_MAXREXMIT
))
858 req
->r_rexmit
= NFS_MAXREXMIT
;
862 req
->r_flags
&= ~R_RESENDERR
;
864 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcretries
);
865 req
->r_flags
|= R_SENT
;
866 if (req
->r_flags
& R_WAITSENT
) {
867 req
->r_flags
&= ~R_WAITSENT
;
871 lck_mtx_unlock(&req
->r_mtx
);
876 req
->r_flags
|= R_MUSTRESEND
;
878 req
->r_flags
|= R_RESENDERR
;
879 if ((error
== EINTR
) || (error
== ERESTART
))
880 req
->r_error
= error
;
881 lck_mtx_unlock(&req
->r_mtx
);
883 if (sotype
== SOCK_DGRAM
) {
885 * Note: even though a first send may fail, we consider
886 * the request sent for congestion window purposes.
887 * So we don't need to undo any of the changes made above.
890 * Socket errors ignored for connectionless sockets??
891 * For now, ignore them all
893 if ((error
!= EINTR
) && (error
!= ERESTART
) &&
894 (error
!= EWOULDBLOCK
) && (error
!= EIO
)) {
895 int clearerror
= 0, optlen
= sizeof(clearerror
);
896 sock_getsockopt(so
, SOL_SOCKET
, SO_ERROR
, &clearerror
, &optlen
);
897 #ifdef NFS_SOCKET_DEBUGGING
899 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
905 /* check if it appears we should reconnect the socket */
908 /* if send timed out, reconnect if on TCP */
909 if (sotype
!= SOCK_STREAM
)
926 if (needrecon
) { /* mark socket as needing reconnect */
927 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req
->r_xid
, error
));
928 nfs_need_reconnect(nmp
);
934 * Don't log some errors:
935 * EPIPE errors may be common with servers that drop idle connections.
936 * EADDRNOTAVAIL may occur on network transitions.
937 * ENOTCONN may occur under some network conditions.
939 if ((error
== EPIPE
) || (error
== EADDRNOTAVAIL
) || (error
== ENOTCONN
))
941 if (error
&& (error
!= EINTR
) && (error
!= ERESTART
))
942 log(LOG_INFO
, "nfs send error %d for server %s\n", error
,
943 !req
->r_nmp
? "<unmounted>" :
944 vfs_statfs(req
->r_nmp
->nm_mountp
)->f_mntfromname
);
946 /* prefer request termination error over other errors */
947 error2
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0);
951 /* only allow the following errors to be returned */
952 if ((error
!= EINTR
) && (error
!= ERESTART
) && (error
!= EIO
) &&
953 (error
!= ENXIO
) && (error
!= ETIMEDOUT
))
959 * NFS client socket upcalls
961 * Pull RPC replies out of an NFS mount's socket and match them
962 * up with the pending request.
964 * The datagram code is simple because we always get whole
965 * messages out of the socket.
967 * The stream code is more involved because we have to parse
968 * the RPC records out of the stream.
971 /* NFS client UDP socket upcall */
973 nfs_udp_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
975 struct nfsmount
*nmp
= arg
;
980 if (nmp
->nm_sockflags
& NMSOCK_CONNECTING
) {
985 /* make sure we're on the current socket */
986 if (nmp
->nm_so
!= so
)
992 error
= sock_receivembuf(so
, NULL
, &m
, MSG_DONTWAIT
, &rcvlen
);
994 nfs_request_match_reply(nmp
, m
);
995 } while (m
&& !error
);
997 if (error
&& (error
!= EWOULDBLOCK
)) {
998 /* problems with the socket... mark for reconnection */
999 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error
));
1000 nfs_need_reconnect(nmp
);
1004 /* NFS client TCP socket upcall */
1006 nfs_tcp_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
1008 struct nfsmount
*nmp
= arg
;
1009 struct iovec_32 aio
;
1016 if (nmp
->nm_sockflags
& NMSOCK_CONNECTING
) {
1017 wakeup(&nmp
->nm_so
);
1021 /* make sure we're on the current socket */
1022 if (nmp
->nm_so
!= so
)
1025 lck_mtx_lock(&nmp
->nm_lock
);
1026 if (nmp
->nm_sockflags
& NMSOCK_UPCALL
) {
1027 /* upcall is already receiving data - just return */
1028 lck_mtx_unlock(&nmp
->nm_lock
);
1031 nmp
->nm_sockflags
|= NMSOCK_UPCALL
;
1036 /* read the TCP RPC record marker */
1037 while (!error
&& nmp
->nm_markerleft
) {
1038 aio
.iov_base
= (uintptr_t)((char*)&nmp
->nm_fragleft
+
1039 sizeof(nmp
->nm_fragleft
) - nmp
->nm_markerleft
);
1040 aio
.iov_len
= nmp
->nm_markerleft
;
1041 bzero(&msg
, sizeof(msg
));
1042 msg
.msg_iov
= (struct iovec
*) &aio
;
1044 lck_mtx_unlock(&nmp
->nm_lock
);
1045 error
= sock_receive(so
, &msg
, MSG_DONTWAIT
, &rcvlen
);
1046 lck_mtx_lock(&nmp
->nm_lock
);
1047 if (error
|| !rcvlen
)
1050 nmp
->nm_markerleft
-= rcvlen
;
1051 if (nmp
->nm_markerleft
)
1053 /* record marker complete */
1054 nmp
->nm_fragleft
= ntohl(nmp
->nm_fragleft
);
1055 if (nmp
->nm_fragleft
& 0x80000000) {
1056 nmp
->nm_sockflags
|= NMSOCK_LASTFRAG
;
1057 nmp
->nm_fragleft
&= ~0x80000000;
1059 nmp
->nm_reclen
+= nmp
->nm_fragleft
;
1060 if (nmp
->nm_reclen
> NFS_MAXPACKET
) {
1062 * This is SERIOUS! We are out of sync with the sender
1063 * and forcing a disconnect/reconnect is all I can do.
1065 log(LOG_ERR
, "%s (%d) from nfs server %s\n",
1066 "impossible RPC record length", nmp
->nm_reclen
,
1067 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
1072 /* read the TCP RPC record fragment */
1073 while (!error
&& !nmp
->nm_markerleft
&& nmp
->nm_fragleft
) {
1075 rcvlen
= nmp
->nm_fragleft
;
1076 lck_mtx_unlock(&nmp
->nm_lock
);
1077 error
= sock_receivembuf(so
, NULL
, &m
, MSG_DONTWAIT
, &rcvlen
);
1078 lck_mtx_lock(&nmp
->nm_lock
);
1079 if (error
|| !rcvlen
|| !m
)
1082 /* append mbufs to list */
1083 nmp
->nm_fragleft
-= rcvlen
;
1087 error
= mbuf_setnext(nmp
->nm_mlast
, m
);
1089 printf("nfs_tcp_rcv: mbuf_setnext failed %d\n", error
);
1094 while (mbuf_next(m
))
1099 /* done reading fragment? */
1101 if (!error
&& !nmp
->nm_markerleft
&& !nmp
->nm_fragleft
) {
1102 /* reset socket fragment parsing state */
1103 nmp
->nm_markerleft
= sizeof(nmp
->nm_fragleft
);
1104 if (nmp
->nm_sockflags
& NMSOCK_LASTFRAG
) {
1105 /* RPC record complete */
1107 /* reset socket record parsing state */
1109 nmp
->nm_m
= nmp
->nm_mlast
= NULL
;
1110 nmp
->nm_sockflags
&= ~NMSOCK_LASTFRAG
;
1114 if (m
) { /* match completed response with request */
1115 lck_mtx_unlock(&nmp
->nm_lock
);
1116 nfs_request_match_reply(nmp
, m
);
1117 lck_mtx_lock(&nmp
->nm_lock
);
1120 /* loop if we've been making error-free progress */
1124 nmp
->nm_sockflags
&= ~NMSOCK_UPCALL
;
1125 lck_mtx_unlock(&nmp
->nm_lock
);
1126 #ifdef NFS_SOCKET_DEBUGGING
1127 if (!recv
&& (error
!= EWOULDBLOCK
))
1128 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error
));
1130 /* note: no error and no data indicates server closed its end */
1131 if ((error
!= EWOULDBLOCK
) && (error
|| !recv
)) {
1132 /* problems with the socket... mark for reconnection */
1133 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error
));
1134 nfs_need_reconnect(nmp
);
1139 * "poke" a socket to try to provoke any pending errors
1142 nfs_sock_poke(struct nfsmount
*nmp
)
1144 struct iovec_32 aio
;
1150 lck_mtx_lock(&nmp
->nm_lock
);
1151 if ((nmp
->nm_sockflags
& NMSOCK_UNMOUNT
) || !nmp
->nm_so
) {
1152 lck_mtx_unlock(&nmp
->nm_lock
);
1155 lck_mtx_unlock(&nmp
->nm_lock
);
1156 aio
.iov_base
= (uintptr_t)&dummy
;
1159 bzero(&msg
, sizeof(msg
));
1160 msg
.msg_iov
= (struct iovec
*) &aio
;
1162 error
= sock_send(nmp
->nm_so
, &msg
, MSG_DONTWAIT
, &len
);
1163 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error
));
1167 * Match an RPC reply with the corresponding request
1170 nfs_request_match_reply(struct nfsmount
*nmp
, mbuf_t mrep
)
1173 struct nfsm_chain nmrep
;
1174 u_long reply
= 0, rxid
= 0;
1176 int error
= 0, asyncioq
, asyncgss
;
1178 /* Get the xid and check that it is an rpc reply */
1179 nfsm_chain_dissect_init(error
, &nmrep
, mrep
);
1180 nfsm_chain_get_32(error
, &nmrep
, rxid
);
1181 nfsm_chain_get_32(error
, &nmrep
, reply
);
1182 if (error
|| (reply
!= RPC_REPLY
)) {
1183 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcinvalid
);
1189 * Loop through the request list to match up the reply
1190 * Iff no match, just drop it.
1192 lck_mtx_lock(nfs_request_mutex
);
1193 TAILQ_FOREACH(req
, &nfs_reqq
, r_chain
) {
1194 if (req
->r_nmrep
.nmc_mhead
|| (rxid
!= R_XID32(req
->r_xid
)))
1196 /* looks like we have it, grab lock and double check */
1197 lck_mtx_lock(&req
->r_mtx
);
1198 if (req
->r_nmrep
.nmc_mhead
|| (rxid
!= R_XID32(req
->r_xid
))) {
1199 lck_mtx_unlock(&req
->r_mtx
);
1203 req
->r_nmrep
= nmrep
;
1204 lck_mtx_lock(&nmp
->nm_lock
);
1205 if (nmp
->nm_sotype
== SOCK_DGRAM
) {
1207 * Update congestion window.
1208 * Do the additive increase of one rpc/rtt.
1210 FSDBG(530, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
1211 if (nmp
->nm_cwnd
<= nmp
->nm_sent
) {
1213 ((NFS_CWNDSCALE
* NFS_CWNDSCALE
) +
1214 (nmp
->nm_cwnd
>> 1)) / nmp
->nm_cwnd
;
1215 if (nmp
->nm_cwnd
> NFS_MAXCWND
)
1216 nmp
->nm_cwnd
= NFS_MAXCWND
;
1218 if (req
->r_flags
& R_CWND
) {
1219 nmp
->nm_sent
-= NFS_CWNDSCALE
;
1220 req
->r_flags
&= ~R_CWND
;
1222 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
1223 /* congestion window is open, poke the cwnd queue */
1224 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
1225 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
1226 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1231 * Update rtt using a gain of 0.125 on the mean
1232 * and a gain of 0.25 on the deviation.
1234 if (req
->r_flags
& R_TIMING
) {
1236 * Since the timer resolution of
1237 * NFS_HZ is so course, it can often
1238 * result in r_rtt == 0. Since
1239 * r_rtt == N means that the actual
1240 * rtt is between N+dt and N+2-dt ticks,
1243 if (proct
[req
->r_procnum
] == 0)
1244 panic("nfs_request_match_reply: proct[%d] is zero", req
->r_procnum
);
1245 t1
= req
->r_rtt
+ 1;
1246 t1
-= (NFS_SRTT(req
) >> 3);
1247 NFS_SRTT(req
) += t1
;
1250 t1
-= (NFS_SDRTT(req
) >> 2);
1251 NFS_SDRTT(req
) += t1
;
1253 nmp
->nm_timeouts
= 0;
1254 lck_mtx_unlock(&nmp
->nm_lock
);
1255 /* signal anyone waiting on this request */
1257 asyncioq
= (req
->r_callback
.rcb_func
!= NULL
);
1258 if ((asyncgss
= ((req
->r_gss_ctx
!= NULL
) && ((req
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
|R_ALLOCATED
)) == (R_ASYNC
|R_ALLOCATED
)))))
1259 nfs_request_ref(req
, 1);
1260 lck_mtx_unlock(&req
->r_mtx
);
1261 lck_mtx_unlock(nfs_request_mutex
);
1263 nfs_gss_clnt_rpcdone(req
);
1264 nfs_request_rele(req
);
1266 /* if it's an async RPC with a callback, queue it up */
1268 nfs_asyncio_finish(req
);
1273 /* not matched to a request, so drop it. */
1274 lck_mtx_unlock(nfs_request_mutex
);
1275 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcunexpected
);
1281 * Wait for the reply for a given request...
1282 * ...potentially resending the request if necessary.
1285 nfs_wait_reply(struct nfsreq
*req
)
1287 struct nfsmount
*nmp
= req
->r_nmp
;
1288 struct timespec ts
= { 30, 0 };
1289 int error
= 0, slpflag
;
1291 if ((nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
)
1296 lck_mtx_lock(&req
->r_mtx
);
1297 while (!req
->r_nmrep
.nmc_mhead
) {
1298 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0)))
1300 if (((error
= req
->r_error
)) || req
->r_nmrep
.nmc_mhead
)
1302 /* check if we need to resend */
1303 if (req
->r_flags
& R_MUSTRESEND
) {
1304 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
1305 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
1306 lck_mtx_unlock(&req
->r_mtx
);
1307 if (req
->r_gss_ctx
) {
1309 * It's an RPCSEC_GSS mount.
1310 * Can't just resend the original request
1311 * without bumping the cred sequence number.
1312 * Go back and re-build the request.
1316 error
= nfs_send(req
, 1);
1317 lck_mtx_lock(&req
->r_mtx
);
1318 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
1319 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
, error
));
1322 if (((error
= req
->r_error
)) || req
->r_nmrep
.nmc_mhead
)
1325 /* need to poll if we're P_NOREMOTEHANG */
1326 if (nfs_noremotehang(req
->r_thread
))
1328 msleep(req
, &req
->r_mtx
, slpflag
| (PZERO
- 1), "nfswaitreply", &ts
);
1331 lck_mtx_unlock(&req
->r_mtx
);
1337 * An NFS request goes something like this:
1338 * (nb: always frees up mreq mbuf list)
1339 * nfs_request_create()
1340 * - allocates a request struct if one is not provided
1341 * - initial fill-in of the request struct
1342 * nfs_request_add_header()
1343 * - add the RPC header
1344 * nfs_request_send()
1345 * - link it into list
1346 * - call nfs_send() for first transmit
1347 * nfs_request_wait()
1348 * - call nfs_wait_reply() to wait for the reply
1349 * nfs_request_finish()
1350 * - break down rpc header and return with error or nfs reply
1351 * pointed to by nmrep.
1352 * nfs_request_rele()
1353 * nfs_request_destroy()
1354 * - clean up the request struct
1355 * - free the request struct if it was allocated by nfs_request_create()
1359 * Set up an NFS request struct (allocating if no request passed in).
1364 mount_t mp
, /* used only if !np */
1365 struct nfsm_chain
*nmrest
,
1369 struct nfsreq
**reqp
)
1371 struct nfsreq
*req
, *newreq
= NULL
;
1372 struct nfsmount
*nmp
;
1376 /* allocate a new NFS request structure */
1377 MALLOC_ZONE(newreq
, struct nfsreq
*, sizeof(*newreq
), M_NFSREQ
, M_WAITOK
);
1379 mbuf_freem(nmrest
->nmc_mhead
);
1380 nmrest
->nmc_mhead
= NULL
;
1386 bzero(req
, sizeof(*req
));
1388 req
->r_flags
= R_ALLOCATED
;
1390 nmp
= VFSTONFS(np
? NFSTOMP(np
) : mp
);
1393 FREE_ZONE(newreq
, sizeof(*newreq
), M_NFSREQ
);
1396 lck_mtx_lock(&nmp
->nm_lock
);
1397 if ((nmp
->nm_state
& (NFSSTA_FORCE
|NFSSTA_TIMEO
)) ==
1398 (NFSSTA_FORCE
|NFSSTA_TIMEO
)) {
1399 lck_mtx_unlock(&nmp
->nm_lock
);
1400 mbuf_freem(nmrest
->nmc_mhead
);
1401 nmrest
->nmc_mhead
= NULL
;
1403 FREE_ZONE(newreq
, sizeof(*newreq
), M_NFSREQ
);
1407 if ((nmp
->nm_vers
!= NFS_VER4
) && (procnum
>= 0) && (procnum
< NFS_NPROCS
))
1408 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpccnt
[procnum
]);
1409 if ((nmp
->nm_vers
== NFS_VER4
) && (procnum
!= NFSPROC4_COMPOUND
) && (procnum
!= NFSPROC4_NULL
))
1410 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum
);
1412 lck_mtx_init(&req
->r_mtx
, nfs_request_grp
, LCK_ATTR_NULL
);
1415 req
->r_thread
= thd
;
1416 if (IS_VALID_CRED(cred
)) {
1417 kauth_cred_ref(cred
);
1420 req
->r_procnum
= procnum
;
1421 if (proct
[procnum
] > 0)
1422 req
->r_flags
|= R_TIMING
;
1423 req
->r_nmrep
.nmc_mhead
= NULL
;
1424 SLIST_INIT(&req
->r_gss_seqlist
);
1425 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
1426 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
1427 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1429 lck_mtx_unlock(&nmp
->nm_lock
);
1431 /* move the request mbuf chain to the nfsreq */
1432 req
->r_mrest
= nmrest
->nmc_mhead
;
1433 nmrest
->nmc_mhead
= NULL
;
1435 req
->r_flags
|= R_INITTED
;
1443 * Clean up and free an NFS request structure.
1446 nfs_request_destroy(struct nfsreq
*req
)
1448 struct nfsmount
*nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1449 struct gss_seq
*gsp
, *ngsp
;
1450 struct timespec ts
= { 1, 0 };
1452 if (!req
|| !(req
->r_flags
& R_INITTED
))
1454 req
->r_flags
&= ~R_INITTED
;
1455 if (req
->r_lflags
& RL_QUEUED
)
1456 nfs_reqdequeue(req
);
1457 if (req
->r_achain
.tqe_next
!= NFSREQNOLIST
) {
1458 /* still on an async I/O queue? */
1459 lck_mtx_lock(nfsiod_mutex
);
1460 if (nmp
&& (req
->r_achain
.tqe_next
!= NFSREQNOLIST
)) {
1461 TAILQ_REMOVE(&nmp
->nm_iodq
, req
, r_achain
);
1462 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
1464 lck_mtx_unlock(nfsiod_mutex
);
1467 lck_mtx_lock(&nmp
->nm_lock
);
1468 if (req
->r_rchain
.tqe_next
!= NFSREQNOLIST
) {
1469 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
1470 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
1471 req
->r_flags
&= ~R_RESENDQ
;
1473 if (req
->r_cchain
.tqe_next
!= NFSREQNOLIST
) {
1474 TAILQ_REMOVE(&nmp
->nm_cwndq
, req
, r_cchain
);
1475 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1477 lck_mtx_unlock(&nmp
->nm_lock
);
1479 lck_mtx_lock(&req
->r_mtx
);
1480 while (req
->r_flags
& R_RESENDQ
)
1481 msleep(req
, &req
->r_mtx
, (PZERO
- 1), "nfsresendqwait", &ts
);
1482 lck_mtx_unlock(&req
->r_mtx
);
1484 mbuf_freem(req
->r_mhead
);
1485 else if (req
->r_mrest
)
1486 mbuf_freem(req
->r_mrest
);
1487 if (req
->r_nmrep
.nmc_mhead
)
1488 mbuf_freem(req
->r_nmrep
.nmc_mhead
);
1489 if (IS_VALID_CRED(req
->r_cred
))
1490 kauth_cred_unref(&req
->r_cred
);
1492 nfs_gss_clnt_rpcdone(req
);
1493 SLIST_FOREACH_SAFE(gsp
, &req
->r_gss_seqlist
, gss_seqnext
, ngsp
)
1496 nfs_gss_clnt_ctx_unref(req
);
1498 lck_mtx_destroy(&req
->r_mtx
, nfs_request_grp
);
1499 if (req
->r_flags
& R_ALLOCATED
)
1500 FREE_ZONE(req
, sizeof(*req
), M_NFSREQ
);
1504 nfs_request_ref(struct nfsreq
*req
, int locked
)
1507 lck_mtx_lock(&req
->r_mtx
);
1508 if (req
->r_refs
<= 0)
1509 panic("nfsreq reference error");
1512 lck_mtx_unlock(&req
->r_mtx
);
1516 nfs_request_rele(struct nfsreq
*req
)
1520 lck_mtx_lock(&req
->r_mtx
);
1521 if (req
->r_refs
<= 0)
1522 panic("nfsreq reference underflow");
1524 destroy
= (req
->r_refs
== 0);
1525 lck_mtx_unlock(&req
->r_mtx
);
1527 nfs_request_destroy(req
);
1532 * Add an (updated) RPC header with authorization to an NFS request.
1535 nfs_request_add_header(struct nfsreq
*req
)
1537 struct nfsmount
*nmp
;
1538 int error
= 0, auth_len
= 0;
1541 /* free up any previous header */
1542 if ((m
= req
->r_mhead
)) {
1543 while (m
&& (m
!= req
->r_mrest
))
1545 req
->r_mhead
= NULL
;
1548 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1552 if (!req
->r_cred
) /* RPCAUTH_NULL */
1554 else switch (nmp
->nm_auth
) {
1556 if (req
->r_cred
->cr_ngroups
< 1)
1558 auth_len
= ((((req
->r_cred
->cr_ngroups
- 1) > nmp
->nm_numgrps
) ?
1559 nmp
->nm_numgrps
: (req
->r_cred
->cr_ngroups
- 1)) << 2) +
1565 auth_len
= 5 * NFSX_UNSIGNED
+ 0; // zero context handle for now
1569 error
= nfsm_rpchead(req
, auth_len
, req
->r_mrest
, &req
->r_xid
, &req
->r_mhead
);
1573 req
->r_mreqlen
= mbuf_pkthdr_len(req
->r_mhead
);
1574 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1577 lck_mtx_lock(&nmp
->nm_lock
);
1578 if (nmp
->nm_flag
& NFSMNT_SOFT
)
1579 req
->r_retry
= nmp
->nm_retry
;
1581 req
->r_retry
= NFS_MAXREXMIT
+ 1; /* past clip limit */
1582 lck_mtx_unlock(&nmp
->nm_lock
);
1589 * Queue an NFS request up and send it out.
1592 nfs_request_send(struct nfsreq
*req
, int wait
)
1594 struct nfsmount
*nmp
;
1597 lck_mtx_lock(nfs_request_mutex
);
1599 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1601 lck_mtx_unlock(nfs_request_mutex
);
1606 if (!req
->r_start
) {
1607 req
->r_start
= now
.tv_sec
;
1608 req
->r_lastmsg
= now
.tv_sec
-
1609 ((nmp
->nm_tprintf_delay
) - (nmp
->nm_tprintf_initial_delay
));
1612 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcrequests
);
1615 * Chain request into list of outstanding requests. Be sure
1616 * to put it LAST so timer finds oldest requests first.
1617 * Make sure that the request queue timer is running
1618 * to check for possible request timeout.
1620 TAILQ_INSERT_TAIL(&nfs_reqq
, req
, r_chain
);
1621 req
->r_lflags
|= RL_QUEUED
;
1622 if (!nfs_request_timer_on
) {
1623 nfs_request_timer_on
= 1;
1624 nfs_interval_timer_start(nfs_request_timer_call
,
1627 lck_mtx_unlock(nfs_request_mutex
);
1629 /* Send the request... */
1630 return (nfs_send(req
, wait
));
1634 * Call nfs_wait_reply() to wait for the reply.
1637 nfs_request_wait(struct nfsreq
*req
)
1639 req
->r_error
= nfs_wait_reply(req
);
1643 * Finish up an NFS request by dequeueing it and
1644 * doing the initial NFS request reply processing.
1649 struct nfsm_chain
*nmrepp
,
1652 struct nfsmount
*nmp
;
1655 uint32_t verf_len
= 0;
1656 uint32_t reply_status
= 0;
1657 uint32_t rejected_status
= 0;
1658 uint32_t auth_status
= 0;
1659 uint32_t accepted_status
= 0;
1660 struct nfsm_chain nmrep
;
1663 error
= req
->r_error
;
1666 nmrepp
->nmc_mhead
= NULL
;
1668 /* RPC done, unlink the request. */
1669 nfs_reqdequeue(req
);
1671 mrep
= req
->r_nmrep
.nmc_mhead
;
1673 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1676 * Decrement the outstanding request count.
1678 if (req
->r_flags
& R_CWND
) {
1679 req
->r_flags
&= ~R_CWND
;
1680 lck_mtx_lock(&nmp
->nm_lock
);
1681 FSDBG(273, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
1682 nmp
->nm_sent
-= NFS_CWNDSCALE
;
1683 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
1684 /* congestion window is open, poke the cwnd queue */
1685 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
1686 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
1687 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1690 lck_mtx_unlock(&nmp
->nm_lock
);
1693 if (req
->r_gss_ctx
) { // Using gss cred ?
1695 * If the request had an RPCSEC_GSS credential
1696 * then reset its sequence number bit in the
1699 nfs_gss_clnt_rpcdone(req
);
1702 * If we need to re-send, go back and re-build the
1703 * request based on a new sequence number.
1704 * Note that we're using the original XID.
1706 if (error
== EAGAIN
) {
1710 error
= nfs_gss_clnt_args_restore(req
); // remove any trailer mbufs
1711 req
->r_nmrep
.nmc_mhead
= NULL
;
1712 req
->r_flags
|= R_RESTART
;
1713 if (error
== ENEEDAUTH
) {
1714 req
->r_xid
= 0; // get a new XID
1722 * If there was a successful reply, make sure to mark the mount as up.
1723 * If a tprintf message was given (or if this is a timed-out soft mount)
1724 * then post a tprintf message indicating the server is alive again.
1727 if ((req
->r_flags
& R_TPRINTFMSG
) ||
1728 (nmp
&& (nmp
->nm_flag
& NFSMNT_SOFT
) &&
1729 ((nmp
->nm_state
& (NFSSTA_TIMEO
|NFSSTA_FORCE
)) == NFSSTA_TIMEO
)))
1730 nfs_up(nmp
, req
->r_thread
, NFSSTA_TIMEO
, "is alive again");
1732 nfs_up(nmp
, req
->r_thread
, NFSSTA_TIMEO
, NULL
);
1739 * break down the RPC header and check if ok
1741 nmrep
= req
->r_nmrep
;
1742 nfsm_chain_get_32(error
, &nmrep
, reply_status
);
1744 if (reply_status
== RPC_MSGDENIED
) {
1745 nfsm_chain_get_32(error
, &nmrep
, rejected_status
);
1747 if (rejected_status
== RPC_MISMATCH
) {
1751 nfsm_chain_get_32(error
, &nmrep
, auth_status
);
1753 switch (auth_status
) {
1754 case RPCSEC_GSS_CREDPROBLEM
:
1755 case RPCSEC_GSS_CTXPROBLEM
:
1757 * An RPCSEC_GSS cred or context problem.
1758 * We can't use it anymore.
1759 * Restore the args, renew the context
1760 * and set up for a resend.
1762 error
= nfs_gss_clnt_args_restore(req
);
1763 if (error
&& error
!= ENEEDAUTH
)
1767 error
= nfs_gss_clnt_ctx_renew(req
);
1772 req
->r_nmrep
.nmc_mhead
= NULL
;
1773 req
->r_xid
= 0; // get a new XID
1774 req
->r_flags
|= R_RESTART
;
1783 /* Now check the verifier */
1784 nfsm_chain_get_32(error
, &nmrep
, verf_type
); // verifier flavor
1785 nfsm_chain_get_32(error
, &nmrep
, verf_len
); // verifier length
1788 auth
= !req
->r_cred
? RPCAUTH_NULL
: nmp
->nm_auth
;
1792 /* Any AUTH_UNIX verifier is ignored */
1794 nfsm_chain_adv(error
, &nmrep
, nfsm_rndup(verf_len
));
1795 nfsm_chain_get_32(error
, &nmrep
, accepted_status
);
1800 error
= nfs_gss_clnt_verf_get(req
, &nmrep
,
1801 verf_type
, verf_len
, &accepted_status
);
1806 switch (accepted_status
) {
1808 if (req
->r_procnum
== NFSPROC_NULL
) {
1810 * The NFS null procedure is unique,
1811 * in not returning an NFS status.
1815 nfsm_chain_get_32(error
, &nmrep
, *status
);
1819 if ((nmp
->nm_vers
!= NFS_VER2
) && (*status
== NFSERR_TRYLATER
)) {
1821 * It's a JUKEBOX error - delay and try again
1823 int delay
, slpflag
= (nmp
->nm_flag
& NFSMNT_INT
) ? PCATCH
: 0;
1826 req
->r_nmrep
.nmc_mhead
= NULL
;
1827 if ((req
->r_delay
>= 30) && !(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
1828 /* we're not yet completely mounted and */
1829 /* we can't complete an RPC, so we fail */
1830 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpctimeouts
);
1832 error
= req
->r_error
;
1835 req
->r_delay
= !req
->r_delay
? NFS_TRYLATERDEL
: (req
->r_delay
* 2);
1836 if (req
->r_delay
> 30)
1838 if (nmp
->nm_tprintf_initial_delay
&& (req
->r_delay
== 30)) {
1839 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_JUKEBOXTIMEO
,
1840 "resource temporarily unavailable (jukebox)");
1841 req
->r_flags
|= R_JBTPRINTFMSG
;
1843 delay
= req
->r_delay
;
1844 if (req
->r_callback
.rcb_func
) {
1847 req
->r_resendtime
= now
.tv_sec
+ delay
;
1850 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
1852 tsleep(&lbolt
, PSOCK
|slpflag
, "nfs_jukebox_trylater", 0);
1853 } while (--delay
> 0);
1855 req
->r_xid
= 0; // get a new XID
1856 req
->r_flags
|= R_RESTART
;
1858 FSDBG(273, R_XID32(req
->r_xid
), nmp
, req
, NFSERR_TRYLATER
);
1862 if (req
->r_flags
& R_JBTPRINTFMSG
)
1863 nfs_up(nmp
, req
->r_thread
, NFSSTA_JUKEBOXTIMEO
, "resource available again");
1865 if (*status
== NFS_OK
) {
1867 * Successful NFS request
1870 req
->r_nmrep
.nmc_mhead
= NULL
;
1873 /* Got an NFS error of some kind */
1876 * If the File Handle was stale, invalidate the
1877 * lookup cache, just in case.
1879 if ((*status
== ESTALE
) && req
->r_np
)
1880 cache_purge(NFSTOV(req
->r_np
));
1881 if (nmp
->nm_vers
== NFS_VER2
)
1885 req
->r_nmrep
.nmc_mhead
= NULL
;
1888 case RPC_PROGUNAVAIL
:
1889 error
= EPROGUNAVAIL
;
1891 case RPC_PROGMISMATCH
:
1892 error
= ERPCMISMATCH
;
1894 case RPC_PROCUNAVAIL
:
1895 error
= EPROCUNAVAIL
;
1900 case RPC_SYSTEM_ERR
:
1906 if (!error
&& (req
->r_flags
& R_JBTPRINTFMSG
))
1907 nfs_up(nmp
, req
->r_thread
, NFSSTA_JUKEBOXTIMEO
, NULL
);
1908 FSDBG(273, R_XID32(req
->r_xid
), nmp
, req
,
1909 (!error
&& (*status
== NFS_OK
)) ? 0xf0f0f0f0 : error
);
1915 * Perform an NFS request synchronously.
1921 mount_t mp
, /* used only if !np */
1922 struct nfsm_chain
*nmrest
,
1925 struct nfsm_chain
*nmrepp
,
1929 return nfs_request2(np
, mp
, nmrest
, procnum
,
1930 vfs_context_thread(ctx
), vfs_context_ucred(ctx
),
1931 0, nmrepp
, xidp
, status
);
1937 mount_t mp
, /* used only if !np */
1938 struct nfsm_chain
*nmrest
,
1943 struct nfsm_chain
*nmrepp
,
1947 struct nfsreq rq
, *req
= &rq
;
1950 if ((error
= nfs_request_create(np
, mp
, nmrest
, procnum
, thd
, cred
, &req
)))
1952 req
->r_flags
|= (flags
& R_OPTMASK
);
1954 FSDBG_TOP(273, R_XID32(req
->r_xid
), np
, procnum
, 0);
1957 req
->r_flags
&= ~R_RESTART
;
1958 if ((error
= nfs_request_add_header(req
)))
1962 if ((error
= nfs_request_send(req
, 1)))
1964 nfs_request_wait(req
);
1965 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
1967 } while (req
->r_flags
& R_RESTART
);
1969 FSDBG_BOT(273, R_XID32(req
->r_xid
), np
, procnum
, error
);
1970 nfs_request_rele(req
);
1975 * Create and start an asynchronous NFS request.
1980 mount_t mp
, /* used only if !np */
1981 struct nfsm_chain
*nmrest
,
1985 struct nfsreq_cbinfo
*cb
,
1986 struct nfsreq
**reqp
)
1991 error
= nfs_request_create(np
, mp
, nmrest
, procnum
, thd
, cred
, reqp
);
1993 FSDBG(274, (req
? R_XID32(req
->r_xid
) : 0), np
, procnum
, error
);
1996 req
->r_flags
|= R_ASYNC
;
1998 req
->r_callback
= *cb
;
1999 error
= nfs_request_add_header(req
);
2001 req
->r_flags
|= R_WAITSENT
;
2002 if (req
->r_callback
.rcb_func
)
2003 nfs_request_ref(req
, 0);
2004 error
= nfs_request_send(req
, 1);
2005 lck_mtx_lock(&req
->r_mtx
);
2006 if (!error
&& !(req
->r_flags
& R_SENT
) && req
->r_callback
.rcb_func
) {
2007 /* make sure to wait until this async I/O request gets sent */
2008 int slpflag
= (req
->r_nmp
&& (req
->r_nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
) ? PCATCH
: 0;
2009 struct timespec ts
= { 2, 0 };
2010 while (!(req
->r_flags
& R_SENT
)) {
2011 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
2013 msleep(req
, &req
->r_mtx
, slpflag
| (PZERO
- 1), "nfswaitsent", &ts
);
2017 sent
= req
->r_flags
& R_SENT
;
2018 lck_mtx_unlock(&req
->r_mtx
);
2019 if (error
&& req
->r_callback
.rcb_func
&& !sent
)
2020 nfs_request_rele(req
);
2022 FSDBG(274, R_XID32(req
->r_xid
), np
, procnum
, error
);
2023 if (error
|| req
->r_callback
.rcb_func
)
2024 nfs_request_rele(req
);
2029 * Wait for and finish an asynchronous NFS request.
2032 nfs_request_async_finish(
2034 struct nfsm_chain
*nmrepp
,
2038 int error
= 0, asyncio
= req
->r_callback
.rcb_func
? 1 : 0;
2040 lck_mtx_lock(&req
->r_mtx
);
2042 req
->r_flags
|= R_ASYNCWAIT
;
2043 while (req
->r_flags
& R_RESENDQ
) { /* wait until the request is off the resend queue */
2044 struct timespec ts
= { 2, 0 };
2045 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
2047 msleep(req
, &req
->r_mtx
, PZERO
-1, "nfsresendqwait", &ts
);
2049 lck_mtx_unlock(&req
->r_mtx
);
2052 nfs_request_wait(req
);
2053 error
= nfs_request_finish(req
, nmrepp
, status
);
2056 while (!error
&& (req
->r_flags
& R_RESTART
)) {
2057 if (asyncio
&& req
->r_resendtime
) { /* send later */
2058 lck_mtx_lock(&req
->r_mtx
);
2059 nfs_asyncio_resend(req
);
2060 lck_mtx_unlock(&req
->r_mtx
);
2061 return (EINPROGRESS
);
2064 req
->r_flags
&= ~R_RESTART
;
2065 if ((error
= nfs_request_add_header(req
)))
2067 if ((error
= nfs_request_send(req
, !asyncio
)))
2070 return (EINPROGRESS
);
2071 nfs_request_wait(req
);
2072 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
2078 FSDBG(275, R_XID32(req
->r_xid
), req
->r_np
, req
->r_procnum
, error
);
2079 nfs_request_rele(req
);
2084 * Cancel a pending asynchronous NFS request.
2087 nfs_request_async_cancel(struct nfsreq
*req
)
2089 nfs_reqdequeue(req
);
2090 FSDBG(275, R_XID32(req
->r_xid
), req
->r_np
, req
->r_procnum
, 0xD1ED1E);
2091 nfs_request_rele(req
);
2095 * Flag a request as being terminated.
2098 nfs_softterm(struct nfsreq
*req
)
2100 struct nfsmount
*nmp
= req
->r_nmp
;
2101 req
->r_flags
|= R_SOFTTERM
;
2102 req
->r_error
= ETIMEDOUT
;
2103 if (!(req
->r_flags
& R_CWND
) || !nmp
)
2105 /* update congestion window */
2106 req
->r_flags
&= ~R_CWND
;
2107 lck_mtx_lock(&nmp
->nm_lock
);
2108 FSDBG(532, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
2109 nmp
->nm_sent
-= NFS_CWNDSCALE
;
2110 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
2111 /* congestion window is open, poke the cwnd queue */
2112 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
2113 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
2114 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
2117 lck_mtx_unlock(&nmp
->nm_lock
);
2121 * Ensure req isn't in use by the timer, then dequeue it.
2124 nfs_reqdequeue(struct nfsreq
*req
)
2126 lck_mtx_lock(nfs_request_mutex
);
2127 while (req
->r_lflags
& RL_BUSY
) {
2128 req
->r_lflags
|= RL_WAITING
;
2129 msleep(&req
->r_lflags
, nfs_request_mutex
, PSOCK
, "reqdeq", NULL
);
2131 if (req
->r_lflags
& RL_QUEUED
) {
2132 TAILQ_REMOVE(&nfs_reqq
, req
, r_chain
);
2133 req
->r_lflags
&= ~RL_QUEUED
;
2135 lck_mtx_unlock(nfs_request_mutex
);
2139 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
2140 * free()'d out from under it.
2143 nfs_reqbusy(struct nfsreq
*req
)
2145 if (req
->r_lflags
& RL_BUSY
)
2146 panic("req locked");
2147 req
->r_lflags
|= RL_BUSY
;
2151 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
2153 static struct nfsreq
*
2154 nfs_reqnext(struct nfsreq
*req
)
2156 struct nfsreq
* nextreq
;
2161 * We need to get and busy the next req before signalling the
2162 * current one, otherwise wakeup() may block us and we'll race to
2163 * grab the next req.
2165 nextreq
= TAILQ_NEXT(req
, r_chain
);
2166 if (nextreq
!= NULL
)
2167 nfs_reqbusy(nextreq
);
2168 /* unbusy and signal. */
2169 req
->r_lflags
&= ~RL_BUSY
;
2170 if (req
->r_lflags
& RL_WAITING
) {
2171 req
->r_lflags
&= ~RL_WAITING
;
2172 wakeup(&req
->r_lflags
);
2178 * NFS request queue timer routine
2180 * Scan the NFS request queue for any requests that have timed out.
2182 * Alert the system of unresponsive servers.
2183 * Mark expired requests on soft mounts as terminated.
2184 * For UDP, mark/signal requests for retransmission.
2187 nfs_request_timer(__unused
void *param0
, __unused
void *param1
)
2190 struct nfsmount
*nmp
;
2191 int timeo
, maxtime
, finish_asyncio
, error
;
2193 TAILQ_HEAD(nfs_mount_pokeq
, nfsmount
) nfs_mount_poke_queue
;
2195 lck_mtx_lock(nfs_request_mutex
);
2196 req
= TAILQ_FIRST(&nfs_reqq
);
2197 if (req
== NULL
) { /* no requests - turn timer off */
2198 nfs_request_timer_on
= 0;
2199 lck_mtx_unlock(nfs_request_mutex
);
2204 TAILQ_INIT(&nfs_mount_poke_queue
);
2207 for ( ; req
!= NULL
; req
= nfs_reqnext(req
)) {
2209 if (!nmp
) /* unmounted */
2211 if (req
->r_error
|| req
->r_nmrep
.nmc_mhead
)
2213 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0))) {
2214 if (req
->r_callback
.rcb_func
!= NULL
) {
2215 /* async I/O RPC needs to be finished */
2216 lck_mtx_lock(&req
->r_mtx
);
2217 req
->r_error
= error
;
2218 finish_asyncio
= !(req
->r_flags
& R_WAITSENT
);
2220 lck_mtx_unlock(&req
->r_mtx
);
2222 nfs_asyncio_finish(req
);
2227 lck_mtx_lock(&req
->r_mtx
);
2229 if (nmp
->nm_tprintf_initial_delay
&&
2230 ((req
->r_rexmit
> 2) || (req
->r_flags
& R_RESENDERR
)) &&
2231 ((req
->r_lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
)) {
2232 req
->r_lastmsg
= now
.tv_sec
;
2233 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_TIMEO
,
2235 req
->r_flags
|= R_TPRINTFMSG
;
2236 lck_mtx_lock(&nmp
->nm_lock
);
2237 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
2238 lck_mtx_unlock(&nmp
->nm_lock
);
2239 /* we're not yet completely mounted and */
2240 /* we can't complete an RPC, so we fail */
2241 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpctimeouts
);
2243 finish_asyncio
= ((req
->r_callback
.rcb_func
!= NULL
) && !(req
->r_flags
& R_WAITSENT
));
2245 lck_mtx_unlock(&req
->r_mtx
);
2247 nfs_asyncio_finish(req
);
2250 lck_mtx_unlock(&nmp
->nm_lock
);
2254 * Put a reasonable limit on the maximum timeout,
2255 * and reduce that limit when soft mounts get timeouts or are in reconnect.
2257 if (!(nmp
->nm_flag
& NFSMNT_SOFT
))
2258 maxtime
= NFS_MAXTIMEO
;
2259 else if ((req
->r_flags
& R_SETUP
) || ((nmp
->nm_reconnect_start
<= 0) || ((now
.tv_sec
- nmp
->nm_reconnect_start
) < 8)))
2260 maxtime
= (NFS_MAXTIMEO
/ (nmp
->nm_timeouts
+1))/2;
2262 maxtime
= NFS_MINTIMEO
/4;
2265 * Check for request timeout.
2267 if (req
->r_rtt
>= 0) {
2269 lck_mtx_lock(&nmp
->nm_lock
);
2270 if (req
->r_flags
& R_RESENDERR
) {
2271 /* with resend errors, retry every few seconds */
2274 if (req
->r_procnum
== NFSPROC_NULL
&& req
->r_gss_ctx
!= NULL
)
2275 timeo
= NFS_MINIDEMTIMEO
; // gss context setup
2276 else if (nmp
->nm_flag
& NFSMNT_DUMBTIMR
)
2277 timeo
= nmp
->nm_timeo
;
2279 timeo
= NFS_RTO(nmp
, proct
[req
->r_procnum
]);
2281 /* ensure 62.5 ms floor */
2282 while (16 * timeo
< hz
)
2284 if (nmp
->nm_timeouts
> 0)
2285 timeo
*= nfs_backoff
[nmp
->nm_timeouts
- 1];
2287 /* limit timeout to max */
2288 if (timeo
> maxtime
)
2290 if (req
->r_rtt
<= timeo
) {
2291 lck_mtx_unlock(&nmp
->nm_lock
);
2292 lck_mtx_unlock(&req
->r_mtx
);
2295 /* The request has timed out */
2296 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
2297 req
->r_procnum
, proct
[req
->r_procnum
],
2298 req
->r_xid
, req
->r_rtt
, timeo
, nmp
->nm_timeouts
,
2299 (now
.tv_sec
- req
->r_start
)*NFS_HZ
, maxtime
));
2300 if (nmp
->nm_timeouts
< 8)
2302 /* if it's been a few seconds, try poking the socket */
2303 if ((nmp
->nm_sotype
== SOCK_STREAM
) &&
2304 ((now
.tv_sec
- req
->r_start
) >= 3) &&
2305 !(nmp
->nm_sockflags
& NMSOCK_POKE
)) {
2306 nmp
->nm_sockflags
|= NMSOCK_POKE
;
2307 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue
, nmp
, nm_pokeq
);
2309 lck_mtx_unlock(&nmp
->nm_lock
);
2312 /* For soft mounts (& SETUPs), check for too many retransmits/timeout. */
2313 if (((nmp
->nm_flag
& NFSMNT_SOFT
) || (req
->r_flags
& R_SETUP
)) &&
2314 ((req
->r_rexmit
>= req
->r_retry
) || /* too many */
2315 ((now
.tv_sec
- req
->r_start
)*NFS_HZ
> maxtime
))) { /* too long */
2316 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpctimeouts
);
2317 lck_mtx_lock(&nmp
->nm_lock
);
2318 if (!(nmp
->nm_state
& NFSSTA_TIMEO
)) {
2319 lck_mtx_unlock(&nmp
->nm_lock
);
2320 /* make sure we note the unresponsive server */
2321 /* (maxtime may be less than tprintf delay) */
2322 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_TIMEO
,
2324 req
->r_lastmsg
= now
.tv_sec
;
2325 req
->r_flags
|= R_TPRINTFMSG
;
2327 lck_mtx_unlock(&nmp
->nm_lock
);
2329 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
2330 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
,
2331 now
.tv_sec
- req
->r_start
));
2333 finish_asyncio
= ((req
->r_callback
.rcb_func
!= NULL
) && !(req
->r_flags
& R_WAITSENT
));
2335 lck_mtx_unlock(&req
->r_mtx
);
2337 nfs_asyncio_finish(req
);
2341 /* for TCP, only resend if explicitly requested */
2342 if ((nmp
->nm_sotype
== SOCK_STREAM
) && !(req
->r_flags
& R_MUSTRESEND
)) {
2343 if (++req
->r_rexmit
> NFS_MAXREXMIT
)
2344 req
->r_rexmit
= NFS_MAXREXMIT
;
2346 lck_mtx_unlock(&req
->r_mtx
);
2351 * The request needs to be (re)sent. Kick the requester to resend it.
2352 * (unless it's already marked as needing a resend)
2354 if ((req
->r_flags
& R_MUSTRESEND
) && (req
->r_rtt
== -1)) {
2355 lck_mtx_unlock(&req
->r_mtx
);
2358 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
2359 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
2360 req
->r_flags
|= R_MUSTRESEND
;
2363 if ((req
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
)) == R_ASYNC
)
2364 nfs_asyncio_resend(req
);
2365 lck_mtx_unlock(&req
->r_mtx
);
2368 lck_mtx_unlock(nfs_request_mutex
);
2370 /* poke any sockets */
2371 while ((nmp
= TAILQ_FIRST(&nfs_mount_poke_queue
))) {
2372 TAILQ_REMOVE(&nfs_mount_poke_queue
, nmp
, nm_pokeq
);
2374 lck_mtx_lock(&nmp
->nm_lock
);
2375 nmp
->nm_sockflags
&= ~NMSOCK_POKE
;
2376 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
))
2377 wakeup(&nmp
->nm_sockflags
);
2378 lck_mtx_unlock(&nmp
->nm_lock
);
2381 nfs_interval_timer_start(nfs_request_timer_call
, NFS_REQUESTDELAY
);
2385 * check a thread's proc for the "noremotehang" flag.
2388 nfs_noremotehang(thread_t thd
)
2390 proc_t p
= thd
? get_bsdthreadtask_info(thd
) : NULL
;
2391 return (p
&& proc_noremotehang(p
));
2395 * Test for a termination condition pending on the process.
2396 * This is used to determine if we need to bail on a mount.
2397 * ETIMEDOUT is returned if there has been a soft timeout.
2398 * EINTR is returned if there is a signal pending that is not being ignored
2399 * and the mount is interruptable, or if we are a thread that is in the process
2400 * of cancellation (also SIGKILL posted).
2403 nfs_sigintr(struct nfsmount
*nmp
, struct nfsreq
*req
, thread_t thd
, int nmplocked
)
2410 if (req
&& (req
->r_flags
& R_SOFTTERM
))
2411 return (ETIMEDOUT
); /* request has been terminated. */
2414 * If we're in the progress of a force unmount and there's
2415 * been a timeout, we're dead and fail IO.
2418 lck_mtx_lock(&nmp
->nm_lock
);
2419 if ((nmp
->nm_state
& NFSSTA_FORCE
) &&
2420 (nmp
->nm_state
& (NFSSTA_TIMEO
|NFSSTA_JUKEBOXTIMEO
|NFSSTA_LOCKTIMEO
))) {
2422 } else if (nmp
->nm_mountp
->mnt_kern_flag
& MNTK_FRCUNMOUNT
) {
2423 /* Someone is unmounting us, go soft and mark it. */
2424 nmp
->nm_flag
|= NFSMNT_SOFT
;
2425 nmp
->nm_state
|= NFSSTA_FORCE
;
2429 * If the mount is hung and we've requested not to hang
2430 * on remote filesystems, then bail now.
2432 if (!error
&& (nmp
->nm_state
& NFSSTA_TIMEO
) && nfs_noremotehang(thd
))
2436 lck_mtx_unlock(&nmp
->nm_lock
);
2440 /* may not have a thread for async I/O */
2444 /* If this thread belongs to kernel task; then abort check is not needed */
2445 if ((current_proc() != kernproc
) && current_thread_aborted())
2448 /* mask off thread and process blocked signals. */
2449 if ((nmp
->nm_flag
& NFSMNT_INT
) &&
2450 proc_pendingsignals(get_bsdthreadtask_info(thd
), NFSINT_SIGMASK
))
2456 * Lock a socket against others.
2457 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2458 * and also to avoid race conditions between the processes with nfs requests
2459 * in progress when a reconnect is necessary.
2462 nfs_sndlock(struct nfsreq
*req
)
2464 struct nfsmount
*nmp
= req
->r_nmp
;
2466 int error
= 0, slpflag
= 0;
2467 struct timespec ts
= { 0, 0 };
2472 lck_mtx_lock(&nmp
->nm_lock
);
2473 statep
= &nmp
->nm_state
;
2475 if ((nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
)
2477 while (*statep
& NFSSTA_SNDLOCK
) {
2478 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 1)))
2480 *statep
|= NFSSTA_WANTSND
;
2481 if (nfs_noremotehang(req
->r_thread
))
2483 msleep(statep
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfsndlck", &ts
);
2484 if (slpflag
== PCATCH
) {
2490 *statep
|= NFSSTA_SNDLOCK
;
2491 lck_mtx_unlock(&nmp
->nm_lock
);
2496 * Unlock the stream socket for others.
2499 nfs_sndunlock(struct nfsreq
*req
)
2501 struct nfsmount
*nmp
= req
->r_nmp
;
2502 int *statep
, wake
= 0;
2506 lck_mtx_lock(&nmp
->nm_lock
);
2507 statep
= &nmp
->nm_state
;
2508 if ((*statep
& NFSSTA_SNDLOCK
) == 0)
2509 panic("nfs sndunlock");
2510 *statep
&= ~NFSSTA_SNDLOCK
;
2511 if (*statep
& NFSSTA_WANTSND
) {
2512 *statep
&= ~NFSSTA_WANTSND
;
2515 lck_mtx_unlock(&nmp
->nm_lock
);
2520 #endif /* NFSCLIENT */
2525 * Generate the rpc reply header
2526 * siz arg. is used to decide if adding a cluster is worthwhile
2530 struct nfsrv_descript
*nd
,
2531 __unused
struct nfsrv_sock
*slp
,
2532 struct nfsm_chain
*nmrepp
,
2537 struct nfsm_chain nmrep
;
2540 err
= nd
->nd_repstat
;
2541 if (err
&& (nd
->nd_vers
== NFS_VER2
))
2545 * If this is a big reply, use a cluster else
2546 * try and leave leading space for the lower level headers.
2548 siz
+= RPC_REPLYSIZ
;
2549 if (siz
>= nfs_mbuf_minclsize
) {
2550 error
= mbuf_getpacket(MBUF_WAITOK
, &mrep
);
2552 error
= mbuf_gethdr(MBUF_WAITOK
, MBUF_TYPE_DATA
, &mrep
);
2555 /* unable to allocate packet */
2556 /* XXX should we keep statistics for these errors? */
2559 if (siz
< nfs_mbuf_minclsize
) {
2560 /* leave space for lower level headers */
2561 tl
= mbuf_data(mrep
);
2562 tl
+= 80/sizeof(*tl
); /* XXX max_hdr? XXX */
2563 mbuf_setdata(mrep
, tl
, 6 * NFSX_UNSIGNED
);
2565 nfsm_chain_init(&nmrep
, mrep
);
2566 nfsm_chain_add_32(error
, &nmrep
, nd
->nd_retxid
);
2567 nfsm_chain_add_32(error
, &nmrep
, RPC_REPLY
);
2568 if (err
== ERPCMISMATCH
|| (err
& NFSERR_AUTHERR
)) {
2569 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGDENIED
);
2570 if (err
& NFSERR_AUTHERR
) {
2571 nfsm_chain_add_32(error
, &nmrep
, RPC_AUTHERR
);
2572 nfsm_chain_add_32(error
, &nmrep
, (err
& ~NFSERR_AUTHERR
));
2574 nfsm_chain_add_32(error
, &nmrep
, RPC_MISMATCH
);
2575 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
2576 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
2580 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGACCEPTED
);
2581 if (nd
->nd_gss_context
!= NULL
) {
2582 /* RPCSEC_GSS verifier */
2583 error
= nfs_gss_svc_verf_put(nd
, &nmrep
);
2585 nfsm_chain_add_32(error
, &nmrep
, RPC_SYSTEM_ERR
);
2589 /* RPCAUTH_NULL verifier */
2590 nfsm_chain_add_32(error
, &nmrep
, RPCAUTH_NULL
);
2591 nfsm_chain_add_32(error
, &nmrep
, 0);
2593 /* accepted status */
2596 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGUNAVAIL
);
2599 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGMISMATCH
);
2600 /* XXX hard coded versions? */
2601 nfsm_chain_add_32(error
, &nmrep
, NFS_VER2
);
2602 nfsm_chain_add_32(error
, &nmrep
, NFS_VER3
);
2605 nfsm_chain_add_32(error
, &nmrep
, RPC_PROCUNAVAIL
);
2608 nfsm_chain_add_32(error
, &nmrep
, RPC_GARBAGE
);
2611 nfsm_chain_add_32(error
, &nmrep
, RPC_SUCCESS
);
2612 if (nd
->nd_gss_context
!= NULL
)
2613 error
= nfs_gss_svc_prepare_reply(nd
, &nmrep
);
2614 if (err
!= NFSERR_RETVOID
)
2615 nfsm_chain_add_32(error
, &nmrep
,
2616 (err
? nfsrv_errmap(nd
, err
) : 0));
2622 nfsm_chain_build_done(error
, &nmrep
);
2624 /* error composing reply header */
2625 /* XXX should we keep statistics for these errors? */
2631 if ((err
!= 0) && (err
!= NFSERR_RETVOID
))
2632 OSAddAtomic(1, (SInt32
*)&nfsstats
.srvrpc_errs
);
2637 * The nfs server send routine.
2639 * - return EINTR or ERESTART if interrupted by a signal
2640 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
2641 * - do any cleanup required by recoverable socket errors (???)
2644 nfsrv_send(struct nfsrv_sock
*slp
, mbuf_t nam
, mbuf_t top
)
2647 socket_t so
= slp
->ns_so
;
2648 struct sockaddr
*sendnam
;
2651 bzero(&msg
, sizeof(msg
));
2652 if (nam
&& !sock_isconnected(so
) && (slp
->ns_sotype
!= SOCK_STREAM
)) {
2653 if ((sendnam
= mbuf_data(nam
))) {
2654 msg
.msg_name
= (caddr_t
)sendnam
;
2655 msg
.msg_namelen
= sendnam
->sa_len
;
2658 error
= sock_sendmbuf(so
, &msg
, top
, 0, NULL
);
2661 log(LOG_INFO
, "nfsd send error %d\n", error
);
2663 if ((error
== EWOULDBLOCK
) && (slp
->ns_sotype
== SOCK_STREAM
))
2664 error
= EPIPE
; /* zap TCP sockets if they time out on send */
2666 /* Handle any recoverable (soft) socket errors here. (???) */
2667 if (error
!= EINTR
&& error
!= ERESTART
&& error
!= EIO
&&
2668 error
!= EWOULDBLOCK
&& error
!= EPIPE
)
2675 * Socket upcall routine for the nfsd sockets.
2676 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
2677 * Essentially do as much as possible non-blocking, else punt and it will
2678 * be called with MBUF_WAITOK from an nfsd.
2681 nfsrv_rcv(socket_t so
, caddr_t arg
, int waitflag
)
2683 struct nfsrv_sock
*slp
= (struct nfsrv_sock
*)arg
;
2685 if (!nfsd_thread_count
|| !(slp
->ns_flag
& SLP_VALID
))
2688 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
2689 nfsrv_rcv_locked(so
, slp
, waitflag
);
2690 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
2693 nfsrv_rcv_locked(socket_t so
, struct nfsrv_sock
*slp
, int waitflag
)
2695 mbuf_t m
, mp
, mhck
, m2
;
2696 int ns_flag
=0, error
;
2700 if ((slp
->ns_flag
& SLP_VALID
) == 0) {
2701 if (waitflag
== MBUF_DONTWAIT
)
2702 lck_rw_done(&slp
->ns_rwlock
);
2708 * Define this to test for nfsds handling this under heavy load.
2710 if (waitflag
== MBUF_DONTWAIT
) {
2711 ns_flag
= SLP_NEEDQ
;
2715 if (slp
->ns_sotype
== SOCK_STREAM
) {
2717 * If there are already records on the queue, defer soreceive()
2718 * to an nfsd so that there is feedback to the TCP layer that
2719 * the nfs servers are heavily loaded.
2721 if (slp
->ns_rec
&& waitflag
== MBUF_DONTWAIT
) {
2722 ns_flag
= SLP_NEEDQ
;
2729 bytes_read
= 1000000000;
2730 error
= sock_receivembuf(so
, NULL
, &mp
, MSG_DONTWAIT
, &bytes_read
);
2731 if (error
|| mp
== NULL
) {
2732 if (error
== EWOULDBLOCK
)
2733 ns_flag
= (waitflag
== MBUF_DONTWAIT
) ? SLP_NEEDQ
: 0;
2735 ns_flag
= SLP_DISCONN
;
2739 if (slp
->ns_rawend
) {
2740 if ((error
= mbuf_setnext(slp
->ns_rawend
, m
)))
2741 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error
);
2742 slp
->ns_cc
+= bytes_read
;
2745 slp
->ns_cc
= bytes_read
;
2747 while ((m2
= mbuf_next(m
)))
2752 * Now try and parse record(s) out of the raw stream data.
2754 error
= nfsrv_getstream(slp
, waitflag
);
2757 ns_flag
= SLP_DISCONN
;
2759 ns_flag
= SLP_NEEDQ
;
2762 struct sockaddr_storage nam
;
2764 if (slp
->ns_reccnt
>= nfsrv_sock_max_rec_queue_length
) {
2765 /* already have max # RPC records queued on this socket */
2766 ns_flag
= SLP_NEEDQ
;
2770 bzero(&msg
, sizeof(msg
));
2771 msg
.msg_name
= (caddr_t
)&nam
;
2772 msg
.msg_namelen
= sizeof(nam
);
2775 bytes_read
= 1000000000;
2776 error
= sock_receivembuf(so
, &msg
, &mp
, MSG_DONTWAIT
| MSG_NEEDSA
, &bytes_read
);
2778 if (msg
.msg_name
&& (mbuf_get(MBUF_WAITOK
, MBUF_TYPE_SONAME
, &mhck
) == 0)) {
2779 mbuf_setlen(mhck
, nam
.ss_len
);
2780 bcopy(&nam
, mbuf_data(mhck
), nam
.ss_len
);
2782 if (mbuf_setnext(m
, mp
)) {
2783 /* trouble... just drop it */
2784 printf("nfsrv_rcv: mbuf_setnext failed\n");
2792 mbuf_setnextpkt(slp
->ns_recend
, m
);
2795 slp
->ns_flag
|= SLP_DOREC
;
2798 mbuf_setnextpkt(m
, NULL
);
2805 * Now try and process the request records, non-blocking.
2809 slp
->ns_flag
|= ns_flag
;
2810 if (waitflag
== MBUF_DONTWAIT
) {
2811 int wake
= (slp
->ns_flag
& SLP_WORKTODO
);
2812 lck_rw_done(&slp
->ns_rwlock
);
2813 if (wake
&& nfsd_thread_count
) {
2814 lck_mtx_lock(nfsd_mutex
);
2815 nfsrv_wakenfsd(slp
);
2816 lck_mtx_unlock(nfsd_mutex
);
2822 * Try and extract an RPC request from the mbuf data list received on a
2823 * stream socket. The "waitflag" argument indicates whether or not it
2827 nfsrv_getstream(struct nfsrv_sock
*slp
, int waitflag
)
2830 char *cp1
, *cp2
, *mdata
;
2831 int len
, mlen
, error
;
2832 mbuf_t om
, m2
, recm
;
2835 if (slp
->ns_flag
& SLP_GETSTREAM
)
2836 panic("nfs getstream");
2837 slp
->ns_flag
|= SLP_GETSTREAM
;
2839 if (slp
->ns_reclen
== 0) {
2840 if (slp
->ns_cc
< NFSX_UNSIGNED
) {
2841 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2845 mdata
= mbuf_data(m
);
2847 if (mlen
>= NFSX_UNSIGNED
) {
2848 bcopy(mdata
, (caddr_t
)&recmark
, NFSX_UNSIGNED
);
2849 mdata
+= NFSX_UNSIGNED
;
2850 mlen
-= NFSX_UNSIGNED
;
2851 mbuf_setdata(m
, mdata
, mlen
);
2853 cp1
= (caddr_t
)&recmark
;
2855 while (cp1
< ((caddr_t
)&recmark
) + NFSX_UNSIGNED
) {
2863 mbuf_setdata(m
, cp2
, mlen
);
2866 slp
->ns_cc
-= NFSX_UNSIGNED
;
2867 recmark
= ntohl(recmark
);
2868 slp
->ns_reclen
= recmark
& ~0x80000000;
2869 if (recmark
& 0x80000000)
2870 slp
->ns_flag
|= SLP_LASTFRAG
;
2872 slp
->ns_flag
&= ~SLP_LASTFRAG
;
2873 if (slp
->ns_reclen
< NFS_MINPACKET
|| slp
->ns_reclen
> NFS_MAXPACKET
) {
2874 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2880 * Now get the record part.
2882 * Note that slp->ns_reclen may be 0. Linux sometimes
2883 * generates 0-length RPCs
2886 if (slp
->ns_cc
== slp
->ns_reclen
) {
2888 slp
->ns_raw
= slp
->ns_rawend
= NULL
;
2889 slp
->ns_cc
= slp
->ns_reclen
= 0;
2890 } else if (slp
->ns_cc
> slp
->ns_reclen
) {
2894 mdata
= mbuf_data(m
);
2896 while (len
< slp
->ns_reclen
) {
2897 if ((len
+ mlen
) > slp
->ns_reclen
) {
2898 if (mbuf_copym(m
, 0, slp
->ns_reclen
- len
, waitflag
, &m2
)) {
2899 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2900 return (EWOULDBLOCK
);
2903 if (mbuf_setnext(om
, m2
)) {
2904 /* trouble... just drop it */
2905 printf("nfsrv_getstream: mbuf_setnext failed\n");
2907 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2908 return (EWOULDBLOCK
);
2914 mdata
+= slp
->ns_reclen
- len
;
2915 mlen
-= slp
->ns_reclen
- len
;
2916 mbuf_setdata(m
, mdata
, mlen
);
2917 len
= slp
->ns_reclen
;
2918 } else if ((len
+ mlen
) == slp
->ns_reclen
) {
2923 if (mbuf_setnext(om
, NULL
)) {
2924 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
2925 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2926 return (EWOULDBLOCK
);
2929 mdata
= mbuf_data(m
);
2935 mdata
= mbuf_data(m
);
2942 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2947 * Accumulate the fragments into a record.
2949 if (slp
->ns_frag
== NULL
) {
2950 slp
->ns_frag
= recm
;
2953 while ((m2
= mbuf_next(m
)))
2955 if ((error
= mbuf_setnext(m
, recm
)))
2956 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error
);
2958 if (slp
->ns_flag
& SLP_LASTFRAG
) {
2960 mbuf_setnextpkt(slp
->ns_recend
, slp
->ns_frag
);
2962 slp
->ns_rec
= slp
->ns_frag
;
2963 slp
->ns_flag
|= SLP_DOREC
;
2965 slp
->ns_recend
= slp
->ns_frag
;
2966 slp
->ns_frag
= NULL
;
2972 * Parse an RPC header.
2976 struct nfsrv_sock
*slp
,
2978 struct nfsrv_descript
**ndp
)
2982 struct nfsrv_descript
*nd
;
2986 if (!(slp
->ns_flag
& (SLP_VALID
|SLP_DOREC
)) || (slp
->ns_rec
== NULL
))
2988 MALLOC_ZONE(nd
, struct nfsrv_descript
*,
2989 sizeof (struct nfsrv_descript
), M_NFSRVDESC
, M_WAITOK
);
2993 slp
->ns_rec
= mbuf_nextpkt(m
);
2995 mbuf_setnextpkt(m
, NULL
);
2997 slp
->ns_flag
&= ~SLP_DOREC
;
2998 slp
->ns_recend
= NULL
;
3001 if (mbuf_type(m
) == MBUF_TYPE_SONAME
) {
3004 if ((error
= mbuf_setnext(nam
, NULL
)))
3005 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error
);
3009 nfsm_chain_dissect_init(error
, &nd
->nd_nmreq
, m
);
3011 error
= nfsrv_getreq(nd
);
3015 FREE_ZONE(nd
, sizeof(*nd
), M_NFSRVDESC
);
3025 * Parse an RPC request
3027 * - fill in the cred struct.
3030 nfsrv_getreq(struct nfsrv_descript
*nd
)
3032 struct nfsm_chain
*nmreq
;
3034 u_long nfsvers
, auth_type
;
3039 struct ucred temp_cred
;
3043 nd
->nd_gss_context
= NULL
;
3044 nd
->nd_gss_seqnum
= 0;
3045 nd
->nd_gss_mb
= NULL
;
3047 user_id
= group_id
= -2;
3048 val
= auth_type
= len
= 0;
3050 nmreq
= &nd
->nd_nmreq
;
3051 nfsm_chain_get_32(error
, nmreq
, nd
->nd_retxid
); // XID
3052 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Call
3053 if (!error
&& (val
!= RPC_CALL
))
3057 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Version
3059 if (val
!= RPC_VER2
) {
3060 nd
->nd_repstat
= ERPCMISMATCH
;
3061 nd
->nd_procnum
= NFSPROC_NOOP
;
3064 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Program Number
3066 if (val
!= NFS_PROG
) {
3067 nd
->nd_repstat
= EPROGUNAVAIL
;
3068 nd
->nd_procnum
= NFSPROC_NOOP
;
3071 nfsm_chain_get_32(error
, nmreq
, nfsvers
);// NFS Version Number
3073 if ((nfsvers
< NFS_VER2
) || (nfsvers
> NFS_VER3
)) {
3074 nd
->nd_repstat
= EPROGMISMATCH
;
3075 nd
->nd_procnum
= NFSPROC_NOOP
;
3078 nd
->nd_vers
= nfsvers
;
3079 nfsm_chain_get_32(error
, nmreq
, nd
->nd_procnum
);// NFS Procedure Number
3081 if ((nd
->nd_procnum
>= NFS_NPROCS
) ||
3082 ((nd
->nd_vers
== NFS_VER2
) && (nd
->nd_procnum
> NFSV2PROC_STATFS
))) {
3083 nd
->nd_repstat
= EPROCUNAVAIL
;
3084 nd
->nd_procnum
= NFSPROC_NOOP
;
3087 if (nfsvers
!= NFS_VER3
)
3088 nd
->nd_procnum
= nfsv3_procid
[nd
->nd_procnum
];
3089 nfsm_chain_get_32(error
, nmreq
, auth_type
); // Auth Flavor
3090 nfsm_chain_get_32(error
, nmreq
, len
); // Auth Length
3091 if (!error
&& (len
< 0 || len
> RPCAUTH_MAXSIZ
))
3095 /* Handle authentication */
3096 if (auth_type
== RPCAUTH_UNIX
) {
3097 if (nd
->nd_procnum
== NFSPROC_NULL
)
3099 nd
->nd_sec
= RPCAUTH_UNIX
;
3100 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
); // skip stamp
3101 nfsm_chain_get_32(error
, nmreq
, len
); // hostname length
3102 if (len
< 0 || len
> NFS_MAXNAMLEN
)
3104 nfsm_chain_adv(error
, nmreq
, nfsm_rndup(len
)); // skip hostname
3107 /* create a temporary credential using the bits from the wire */
3108 bzero(&temp_cred
, sizeof(temp_cred
));
3109 nfsm_chain_get_32(error
, nmreq
, user_id
);
3110 nfsm_chain_get_32(error
, nmreq
, group_id
);
3111 temp_cred
.cr_groups
[0] = group_id
;
3112 nfsm_chain_get_32(error
, nmreq
, len
); // extra GID count
3113 if ((len
< 0) || (len
> RPCAUTH_UNIXGIDS
))
3116 for (i
= 1; i
<= len
; i
++)
3118 nfsm_chain_get_32(error
, nmreq
, temp_cred
.cr_groups
[i
]);
3120 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
);
3122 ngroups
= (len
>= NGROUPS
) ? NGROUPS
: (len
+ 1);
3124 nfsrv_group_sort(&temp_cred
.cr_groups
[0], ngroups
);
3125 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
); // verifier flavor (should be AUTH_NONE)
3126 nfsm_chain_get_32(error
, nmreq
, len
); // verifier length
3127 if (len
< 0 || len
> RPCAUTH_MAXSIZ
)
3130 nfsm_chain_adv(error
, nmreq
, nfsm_rndup(len
));
3132 /* request creation of a real credential */
3133 temp_cred
.cr_uid
= user_id
;
3134 temp_cred
.cr_ngroups
= ngroups
;
3135 nd
->nd_cr
= kauth_cred_create(&temp_cred
);
3136 if (nd
->nd_cr
== NULL
) {
3137 nd
->nd_repstat
= ENOMEM
;
3138 nd
->nd_procnum
= NFSPROC_NOOP
;
3141 } else if (auth_type
== RPCSEC_GSS
) {
3142 error
= nfs_gss_svc_cred_get(nd
, nmreq
);
3144 if (error
== EINVAL
)
3145 goto nfsmout
; // drop the request
3146 nd
->nd_repstat
= error
;
3147 nd
->nd_procnum
= NFSPROC_NOOP
;
3151 if (nd
->nd_procnum
== NFSPROC_NULL
) // assume it's AUTH_NONE
3153 nd
->nd_repstat
= (NFSERR_AUTHERR
| AUTH_REJECTCRED
);
3154 nd
->nd_procnum
= NFSPROC_NOOP
;
3159 if (IS_VALID_CRED(nd
->nd_cr
))
3160 kauth_cred_unref(&nd
->nd_cr
);
3161 nfsm_chain_cleanup(nmreq
);
3166 * Search for a sleeping nfsd and wake it up.
3167 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
3168 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
3169 * Note: Must be called with nfsd_mutex held.
3172 nfsrv_wakenfsd(struct nfsrv_sock
*slp
)
3176 if ((slp
->ns_flag
& SLP_VALID
) == 0)
3179 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
3180 /* if there's work to do on this socket, make sure it's queued up */
3181 if ((slp
->ns_flag
& SLP_WORKTODO
) && !(slp
->ns_flag
& SLP_QUEUED
)) {
3182 TAILQ_INSERT_TAIL(&nfsrv_sockwait
, slp
, ns_svcq
);
3183 slp
->ns_flag
|= SLP_WAITQ
;
3185 lck_rw_done(&slp
->ns_rwlock
);
3187 /* wake up a waiting nfsd, if possible */
3188 nd
= TAILQ_FIRST(&nfsd_queue
);
3192 TAILQ_REMOVE(&nfsd_queue
, nd
, nfsd_queue
);
3193 nd
->nfsd_flag
&= ~NFSD_WAITING
;
3197 #endif /* NFSSERVER */
3200 nfs_msg(thread_t thd
,
3205 proc_t p
= thd
? get_bsdthreadtask_info(thd
) : NULL
;
3209 tpr
= tprintf_open(p
);
3213 tprintf(tpr
, "nfs server %s: %s, error %d\n", server
, msg
, error
);
3215 tprintf(tpr
, "nfs server %s: %s\n", server
, msg
);
3221 nfs_down(struct nfsmount
*nmp
, thread_t thd
, int error
, int flags
, const char *msg
)
3223 int ostate
, do_vfs_signal
;
3228 lck_mtx_lock(&nmp
->nm_lock
);
3229 ostate
= nmp
->nm_state
;
3230 if ((flags
& NFSSTA_TIMEO
) && !(ostate
& NFSSTA_TIMEO
))
3231 nmp
->nm_state
|= NFSSTA_TIMEO
;
3232 if ((flags
& NFSSTA_LOCKTIMEO
) && !(ostate
& NFSSTA_LOCKTIMEO
))
3233 nmp
->nm_state
|= NFSSTA_LOCKTIMEO
;
3234 if ((flags
& NFSSTA_JUKEBOXTIMEO
) && !(ostate
& NFSSTA_JUKEBOXTIMEO
))
3235 nmp
->nm_state
|= NFSSTA_JUKEBOXTIMEO
;
3236 lck_mtx_unlock(&nmp
->nm_lock
);
3238 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
3239 if ((nmp
->nm_flag
& NFSMNT_SOFT
) && (vfs_flags(nmp
->nm_mountp
) & MNT_DONTBROWSE
))
3242 do_vfs_signal
= !(ostate
& (NFSSTA_TIMEO
|NFSSTA_LOCKTIMEO
|NFSSTA_JUKEBOXTIMEO
));
3244 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, VQ_NOTRESP
, 0);
3246 nfs_msg(thd
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, msg
, error
);
3250 nfs_up(struct nfsmount
*nmp
, thread_t thd
, int flags
, const char *msg
)
3252 int ostate
, state
, do_vfs_signal
;
3258 nfs_msg(thd
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, msg
, 0);
3260 lck_mtx_lock(&nmp
->nm_lock
);
3261 ostate
= nmp
->nm_state
;
3262 if ((flags
& NFSSTA_TIMEO
) && (ostate
& NFSSTA_TIMEO
))
3263 nmp
->nm_state
&= ~NFSSTA_TIMEO
;
3264 if ((flags
& NFSSTA_LOCKTIMEO
) && (ostate
& NFSSTA_LOCKTIMEO
))
3265 nmp
->nm_state
&= ~NFSSTA_LOCKTIMEO
;
3266 if ((flags
& NFSSTA_JUKEBOXTIMEO
) && (ostate
& NFSSTA_JUKEBOXTIMEO
))
3267 nmp
->nm_state
&= ~NFSSTA_JUKEBOXTIMEO
;
3268 state
= nmp
->nm_state
;
3269 lck_mtx_unlock(&nmp
->nm_lock
);
3271 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
3272 if ((nmp
->nm_flag
& NFSMNT_SOFT
) && (vfs_flags(nmp
->nm_mountp
) & MNT_DONTBROWSE
))
3275 do_vfs_signal
= (ostate
& (NFSSTA_TIMEO
|NFSSTA_LOCKTIMEO
|NFSSTA_JUKEBOXTIMEO
)) &&
3276 !(state
& (NFSSTA_TIMEO
|NFSSTA_LOCKTIMEO
|NFSSTA_JUKEBOXTIMEO
));
3278 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, VQ_NOTRESP
, 1);