2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
69 * Socket operations for use by nfs
72 #include <sys/param.h>
73 #include <sys/systm.h>
75 #include <sys/kauth.h>
76 #include <sys/mount_internal.h>
77 #include <sys/kernel.h>
78 #include <sys/kpi_mbuf.h>
79 #include <sys/malloc.h>
80 #include <sys/vnode.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/socket.h>
84 #include <sys/syslog.h>
85 #include <sys/tprintf.h>
86 #include <sys/uio_internal.h>
87 #include <libkern/OSAtomic.h>
90 #include <kern/clock.h>
91 #include <kern/task.h>
92 #include <kern/thread.h>
93 #include <kern/thread_call.h>
96 #include <netinet/in.h>
97 #include <netinet/tcp.h>
99 #include <nfs/rpcv2.h>
100 #include <nfs/nfsproto.h>
102 #include <nfs/xdr_subs.h>
103 #include <nfs/nfsm_subs.h>
104 #include <nfs/nfs_gss.h>
105 #include <nfs/nfsmount.h>
106 #include <nfs/nfsnode.h>
109 boolean_t
current_thread_aborted(void);
110 kern_return_t
thread_terminate(thread_t
);
114 int nfsrv_sock_max_rec_queue_length
= 128; /* max # RPC records queued on (UDP) socket */
116 static int nfsrv_getstream(struct nfsrv_sock
*,int);
117 static int nfsrv_getreq(struct nfsrv_descript
*);
118 extern int nfsv3_procid
[NFS_NPROCS
];
119 #endif /* NFSSERVER */
123 static int nfs_connect_setup(struct nfsmount
*);
124 static void nfs_reqdequeue(struct nfsreq
*);
125 static void nfs_udp_rcv(socket_t
, void*, int);
126 static void nfs_tcp_rcv(socket_t
, void*, int);
127 static void nfs_request_match_reply(struct nfsmount
*, mbuf_t
);
128 static void nfs_softterm(struct nfsreq
*);
130 #ifdef NFS_SOCKET_DEBUGGING
131 #define NFS_SOCK_DBG(X) printf X
133 #define NFS_SOCK_DBG(X)
137 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
138 * Use the mean and mean deviation of rtt for the appropriate type of rpc
139 * for the frequent rpcs and a default for the others.
140 * The justification for doing "other" this way is that these rpcs
141 * happen so infrequently that timer est. would probably be stale.
142 * Also, since many of these rpcs are
143 * non-idempotent, a conservative timeout is desired.
144 * getattr, lookup - A+2D
148 #define NFS_RTO(n, t) \
149 ((t) == 0 ? (n)->nm_timeo : \
151 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
152 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
153 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
154 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
157 * Defines which timer to use for the procnum.
164 static int proct
[NFS_NPROCS
] = {
165 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
169 * There is a congestion window for outstanding rpcs maintained per mount
170 * point. The cwnd size is adjusted in roughly the way that:
171 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
172 * SIGCOMM '88". ACM, August 1988.
173 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
174 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
175 * of rpcs is in progress.
176 * (The sent count and cwnd are scaled for integer arith.)
177 * Variants of "slow start" were tried and were found to be too much of a
178 * performance hit (ave. rtt 3 times larger),
179 * I suspect due to the large rtt that nfs rpcs have.
181 #define NFS_CWNDSCALE 256
182 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
183 static int nfs_backoff
[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
186 * Initialize socket state and perform setup for a new NFS connection.
189 nfs_connect(struct nfsmount
*nmp
)
192 int error
, on
= 1, proto
;
194 struct sockaddr
*saddr
;
195 struct sockaddr_in sin
;
196 struct timeval timeo
;
199 lck_mtx_lock(&nmp
->nm_lock
);
200 nmp
->nm_sockflags
|= NMSOCK_CONNECTING
;
201 saddr
= mbuf_data(nmp
->nm_nam
);
202 upcall
= (nmp
->nm_sotype
== SOCK_STREAM
) ? nfs_tcp_rcv
: nfs_udp_rcv
;
203 lck_mtx_unlock(&nmp
->nm_lock
);
204 error
= sock_socket(saddr
->sa_family
, nmp
->nm_sotype
,
205 nmp
->nm_soproto
, upcall
, nmp
, &nmp
->nm_so
);
208 lck_mtx_lock(&nmp
->nm_lock
);
212 * Some servers require that the client port be a reserved port number.
214 if (saddr
->sa_family
== AF_INET
&& (nmp
->nm_flag
& NFSMNT_RESVPORT
)) {
215 lck_mtx_unlock(&nmp
->nm_lock
);
216 sin
.sin_len
= sizeof (struct sockaddr_in
);
217 sin
.sin_family
= AF_INET
;
218 sin
.sin_addr
.s_addr
= INADDR_ANY
;
219 tport
= IPPORT_RESERVED
- 1;
220 sin
.sin_port
= htons(tport
);
221 while (((error
= sock_bind(so
, (struct sockaddr
*) &sin
)) == EADDRINUSE
) &&
222 (--tport
> IPPORT_RESERVED
/ 2))
223 sin
.sin_port
= htons(tport
);
226 lck_mtx_lock(&nmp
->nm_lock
);
230 * Protocols that do not require connections may be optionally left
231 * unconnected for servers that reply from a different address/port.
233 if (nmp
->nm_flag
& NFSMNT_NOCONN
) {
234 if (nmp
->nm_sotype
== SOCK_STREAM
) {
236 lck_mtx_unlock(&nmp
->nm_lock
);
240 int tocnt
= 0, optlen
= sizeof(error
);
241 struct timespec ts
= { 2, 0 };
243 lck_mtx_unlock(&nmp
->nm_lock
);
244 error
= sock_connect(so
, mbuf_data(nmp
->nm_nam
), MSG_DONTWAIT
);
245 if (error
&& (error
!= EINPROGRESS
))
247 lck_mtx_lock(&nmp
->nm_lock
);
248 while (!sock_isconnected(so
)) {
249 if (tocnt
++ == 15) /* log a warning if connect is taking a while */
250 log(LOG_INFO
, "nfs_connect: socket connect taking a while for %s\n",
251 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
252 /* check for error on socket */
253 sock_getsockopt(so
, SOL_SOCKET
, SO_ERROR
, &error
, &optlen
);
255 log(LOG_INFO
, "nfs_connect: socket error %d for %s\n",
256 error
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
260 /* abort if this is taking too long */
264 if ((error
= nfs_sigintr(nmp
, NULL
, current_thread(), 1)))
266 error
= msleep(&nmp
->nm_so
, &nmp
->nm_lock
, PSOCK
, "nfs_socket_connect", &ts
);
267 if (error
== EWOULDBLOCK
)
273 log(LOG_INFO
, "nfs_connect: socket connect %s for %s\n",
274 error
? "aborted" : "completed",
275 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
277 lck_mtx_unlock(&nmp
->nm_lock
);
283 * Set socket send/receive timeouts
284 * - Receive timeout shouldn't matter because all receives are performed
285 * in the socket upcall non-blocking.
286 * - Send timeout should allow us to react to a blocked socket.
287 * Soft mounts will want to abort sooner.
290 timeo
.tv_sec
= (nmp
->nm_flag
& NFSMNT_SOFT
) ? 10 : 60;
291 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
292 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
294 log(LOG_INFO
, "nfs_connect: socket timeout setting errors for %s\n",
295 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
299 if (nmp
->nm_sotype
== SOCK_STREAM
) {
300 /* Assume that SOCK_STREAM always requires a connection */
301 sock_setsockopt(so
, SOL_SOCKET
, SO_KEEPALIVE
, &on
, sizeof(on
));
302 /* set nodelay for TCP */
303 sock_gettype(so
, NULL
, NULL
, &proto
);
304 if (proto
== IPPROTO_TCP
)
305 sock_setsockopt(so
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
308 if (nmp
->nm_sotype
== SOCK_DGRAM
) { /* set socket buffer sizes for UDP */
309 int reserve
= NFS_UDPSOCKBUF
;
310 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDBUF
, &reserve
, sizeof(reserve
));
311 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVBUF
, &reserve
, sizeof(reserve
));
313 log(LOG_INFO
, "nfs_connect: socket buffer setting errors for %s\n",
314 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
319 /* set SO_NOADDRERR to detect network changes ASAP */
320 error
= sock_setsockopt(so
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
));
322 lck_mtx_unlock(&nmp
->nm_lock
);
326 if (!(nmp
->nm_flag
& NFSMNT_INT
))
327 sock_nointerrupt(so
, 1);
329 /* Initialize socket state variables */
330 nmp
->nm_srtt
[0] = nmp
->nm_srtt
[1] = nmp
->nm_srtt
[2] =
331 nmp
->nm_srtt
[3] = (NFS_TIMEO
<< 3);
332 nmp
->nm_sdrtt
[0] = nmp
->nm_sdrtt
[1] = nmp
->nm_sdrtt
[2] =
333 nmp
->nm_sdrtt
[3] = 0;
334 if (nmp
->nm_sotype
== SOCK_DGRAM
) {
335 /* XXX do we really want to reset this on each reconnect? */
336 nmp
->nm_cwnd
= NFS_MAXCWND
/ 2; /* Initial send window */
338 } else if (nmp
->nm_sotype
== SOCK_STREAM
) {
339 nmp
->nm_markerleft
= sizeof(nmp
->nm_fragleft
);
340 nmp
->nm_fragleft
= nmp
->nm_reclen
= 0;
341 nmp
->nm_timeouts
= 0;
343 nmp
->nm_sockflags
&= ~NMSOCK_CONNECTING
;
344 nmp
->nm_sockflags
|= NMSOCK_SETUP
;
345 FSDBG(529, nmp
, nmp
->nm_state
, nmp
->nm_flag
, nmp
->nm_cwnd
);
346 lck_mtx_unlock(&nmp
->nm_lock
);
347 error
= nfs_connect_setup(nmp
);
349 lck_mtx_lock(&nmp
->nm_lock
);
350 nmp
->nm_sockflags
&= ~(NMSOCK_CONNECTING
|NMSOCK_SETUP
);
352 nmp
->nm_sockflags
|= NMSOCK_READY
;
353 wakeup(&nmp
->nm_sockflags
);
355 lck_mtx_unlock(&nmp
->nm_lock
);
361 /* setup & confirm socket connection is functional */
363 nfs_connect_setup(struct nfsmount
*nmp
)
365 struct nfsm_chain nmreq
, nmrep
;
366 int error
= 0, status
;
369 if (nmp
->nm_vers
>= NFS_VER4
) {
370 error
= nfs4_setclientid(nmp
);
372 /* verify connection's OK by sending a NULL request */
373 nfsm_chain_null(&nmreq
);
374 nfsm_chain_null(&nmrep
);
375 nfsm_chain_build_alloc_init(error
, &nmreq
, 0);
376 nfsm_chain_build_done(error
, &nmreq
);
378 error
= nfs_request2(NULL
, nmp
->nm_mountp
, &nmreq
, NFSPROC_NULL
,
379 current_thread(), NULL
, R_SETUP
, &nmrep
, &xid
, &status
);
383 nfsm_chain_cleanup(&nmreq
);
384 nfsm_chain_cleanup(&nmrep
);
390 * NFS socket reconnect routine:
391 * Called when a connection is broken.
392 * - disconnect the old socket
393 * - nfs_connect() again
394 * - set R_MUSTRESEND for all outstanding requests on mount point
395 * If this fails the mount point is DEAD!
398 nfs_reconnect(struct nfsmount
*nmp
)
402 thread_t thd
= current_thread();
403 int error
, lastmsg
, wentdown
= 0;
406 lastmsg
= now
.tv_sec
- (nmp
->nm_tprintf_delay
- nmp
->nm_tprintf_initial_delay
);
410 while ((error
= nfs_connect(nmp
))) {
411 if (error
== EINTR
|| error
== ERESTART
)
416 if ((lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
) {
417 lastmsg
= now
.tv_sec
;
418 nfs_down(nmp
, thd
, error
, NFSSTA_TIMEO
, "can not connect");
421 lck_mtx_lock(&nmp
->nm_lock
);
422 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
423 /* we're not yet completely mounted and */
424 /* we can't reconnect, so we fail */
425 lck_mtx_unlock(&nmp
->nm_lock
);
428 if ((error
= nfs_sigintr(nmp
, NULL
, thd
, 1))) {
429 lck_mtx_unlock(&nmp
->nm_lock
);
432 lck_mtx_unlock(&nmp
->nm_lock
);
433 tsleep(&lbolt
, PSOCK
, "nfs_reconnect_delay", 0);
434 if ((error
= nfs_sigintr(nmp
, NULL
, thd
, 0)))
439 nfs_up(nmp
, thd
, NFSSTA_TIMEO
, "connected");
442 * Loop through outstanding request list and mark all requests
443 * as needing a resend. (Though nfs_need_reconnect() probably
444 * marked them all already.)
446 lck_mtx_lock(nfs_request_mutex
);
447 TAILQ_FOREACH(rq
, &nfs_reqq
, r_chain
) {
448 if (rq
->r_nmp
== nmp
) {
449 lck_mtx_lock(&rq
->r_mtx
);
450 if (!rq
->r_error
&& !rq
->r_nmrep
.nmc_mhead
&& !(rq
->r_flags
& R_MUSTRESEND
)) {
451 rq
->r_flags
|= R_MUSTRESEND
;
454 if ((rq
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
)) == R_ASYNC
)
455 nfs_asyncio_resend(rq
);
457 lck_mtx_unlock(&rq
->r_mtx
);
460 lck_mtx_unlock(nfs_request_mutex
);
465 * NFS disconnect. Clean up and unlink.
468 nfs_disconnect(struct nfsmount
*nmp
)
472 lck_mtx_lock(&nmp
->nm_lock
);
473 if ((nmp
->nm_sotype
== SOCK_STREAM
) && nmp
->nm_m
) {
474 mbuf_freem(nmp
->nm_m
);
475 nmp
->nm_m
= nmp
->nm_mlast
= NULL
;
480 lck_mtx_unlock(&nmp
->nm_lock
);
481 sock_shutdown(so
, SHUT_RDWR
);
484 lck_mtx_unlock(&nmp
->nm_lock
);
489 * mark an NFS mount as needing a reconnect/resends.
492 nfs_need_reconnect(struct nfsmount
*nmp
)
496 lck_mtx_lock(&nmp
->nm_lock
);
497 nmp
->nm_sockflags
&= ~(NMSOCK_READY
|NMSOCK_SETUP
);
498 lck_mtx_unlock(&nmp
->nm_lock
);
501 * Loop through outstanding request list and
502 * mark all requests as needing a resend.
504 lck_mtx_lock(nfs_request_mutex
);
505 TAILQ_FOREACH(rq
, &nfs_reqq
, r_chain
) {
506 if (rq
->r_nmp
== nmp
) {
507 lck_mtx_lock(&rq
->r_mtx
);
508 if (!rq
->r_error
&& !rq
->r_nmrep
.nmc_mhead
&& !(rq
->r_flags
& R_MUSTRESEND
)) {
509 rq
->r_flags
|= R_MUSTRESEND
;
512 if ((rq
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
)) == R_ASYNC
)
513 nfs_asyncio_resend(rq
);
515 lck_mtx_unlock(&rq
->r_mtx
);
518 lck_mtx_unlock(nfs_request_mutex
);
522 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
525 nfs_mount_sock_thread(void *arg
, __unused wait_result_t wr
)
527 struct nfsmount
*nmp
= arg
;
528 struct timespec ts
= { 30, 0 };
529 thread_t thd
= current_thread();
532 int error
, dofinish
, force
;
534 lck_mtx_lock(&nmp
->nm_lock
);
536 while (!(nmp
->nm_sockflags
& NMSOCK_READY
) || !TAILQ_EMPTY(&nmp
->nm_resendq
)) {
537 if (nmp
->nm_sockflags
& NMSOCK_UNMOUNT
)
539 force
= (nmp
->nm_state
& NFSSTA_FORCE
);
540 /* do reconnect, if necessary */
541 if (!(nmp
->nm_sockflags
& NMSOCK_READY
) && !force
) {
542 if (nmp
->nm_reconnect_start
<= 0) {
544 nmp
->nm_reconnect_start
= now
.tv_sec
;
546 lck_mtx_unlock(&nmp
->nm_lock
);
547 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
));
548 if ((error
= nfs_reconnect(nmp
)))
549 printf("nfs_reconnect failed %d for %s\n", error
,
550 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
552 nmp
->nm_reconnect_start
= 0;
553 lck_mtx_lock(&nmp
->nm_lock
);
555 /* do resends, if necessary/possible */
556 while (((nmp
->nm_sockflags
& NMSOCK_READY
) || force
) && ((req
= TAILQ_FIRST(&nmp
->nm_resendq
)))) {
557 if (req
->r_resendtime
)
559 while (req
&& !force
&& req
->r_resendtime
&& (now
.tv_sec
< req
->r_resendtime
))
560 req
= TAILQ_NEXT(req
, r_rchain
);
563 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
564 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
565 lck_mtx_unlock(&nmp
->nm_lock
);
566 lck_mtx_lock(&req
->r_mtx
);
567 if (req
->r_error
|| req
->r_nmrep
.nmc_mhead
) {
568 dofinish
= req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
569 req
->r_flags
&= ~R_RESENDQ
;
571 lck_mtx_unlock(&req
->r_mtx
);
573 nfs_asyncio_finish(req
);
574 lck_mtx_lock(&nmp
->nm_lock
);
577 if ((req
->r_flags
& R_RESTART
) || req
->r_gss_ctx
) {
578 req
->r_flags
&= ~R_RESTART
;
579 req
->r_resendtime
= 0;
580 lck_mtx_unlock(&req
->r_mtx
);
581 /* async RPCs on GSS mounts need to be rebuilt and resent. */
583 if (req
->r_gss_ctx
) {
584 nfs_gss_clnt_rpcdone(req
);
585 error
= nfs_gss_clnt_args_restore(req
);
586 if (error
== ENEEDAUTH
)
589 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
590 req
->r_gss_ctx
? " gss" : "", req
->r_procnum
, req
->r_xid
,
591 req
->r_flags
, req
->r_rtt
));
592 error
= !req
->r_nmp
? ENXIO
: 0; /* unmounted? */
594 error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0);
596 error
= nfs_request_add_header(req
);
598 error
= nfs_request_send(req
, 0);
599 lck_mtx_lock(&req
->r_mtx
);
600 if (req
->r_rchain
.tqe_next
== NFSREQNOLIST
)
601 req
->r_flags
&= ~R_RESENDQ
;
603 req
->r_error
= error
;
605 dofinish
= error
&& req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
606 lck_mtx_unlock(&req
->r_mtx
);
608 nfs_asyncio_finish(req
);
609 lck_mtx_lock(&nmp
->nm_lock
);
613 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
614 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
615 error
= !req
->r_nmp
? ENXIO
: 0; /* unmounted? */
617 error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0);
619 lck_mtx_unlock(&req
->r_mtx
);
620 error
= nfs_send(req
, 0);
621 lck_mtx_lock(&req
->r_mtx
);
623 if (req
->r_rchain
.tqe_next
== NFSREQNOLIST
)
624 req
->r_flags
&= ~R_RESENDQ
;
626 lck_mtx_unlock(&req
->r_mtx
);
627 lck_mtx_lock(&nmp
->nm_lock
);
631 req
->r_error
= error
;
632 if (req
->r_rchain
.tqe_next
== NFSREQNOLIST
)
633 req
->r_flags
&= ~R_RESENDQ
;
635 dofinish
= req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
636 lck_mtx_unlock(&req
->r_mtx
);
638 nfs_asyncio_finish(req
);
639 lck_mtx_lock(&nmp
->nm_lock
);
641 if (nmp
->nm_sockflags
& NMSOCK_READY
) {
642 ts
.tv_sec
= TAILQ_EMPTY(&nmp
->nm_resendq
) ? 30 : 1;
643 msleep(&nmp
->nm_sockthd
, &nmp
->nm_lock
, PSOCK
, "nfssockthread", &ts
);
648 if (nmp
->nm_sockthd
== thd
)
649 nmp
->nm_sockthd
= NULL
;
650 lck_mtx_unlock(&nmp
->nm_lock
);
651 wakeup(&nmp
->nm_sockthd
);
652 thread_terminate(thd
);
655 /* start or wake a mount's socket thread */
657 nfs_mount_sock_thread_wake(struct nfsmount
*nmp
)
660 wakeup(&nmp
->nm_sockthd
);
661 else if (kernel_thread_start(nfs_mount_sock_thread
, nmp
, &nmp
->nm_sockthd
) == KERN_SUCCESS
)
662 thread_deallocate(nmp
->nm_sockthd
);
666 * The NFS client send routine.
668 * Send the given NFS request out the mount's socket.
669 * Holds nfs_sndlock() for the duration of this call.
671 * - check for request termination (sigintr)
672 * - perform reconnect, if necessary
673 * - UDP: check the congestion window
674 * - make a copy of the request to send
675 * - UDP: update the congestion window
678 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
679 * rexmit count is also updated if this isn't the first send.
681 * If the send is not successful, make sure R_MUSTRESEND is set.
682 * If this wasn't the first transmit, set R_RESENDERR.
683 * Also, undo any UDP congestion window changes made.
685 * If the error appears to indicate that the socket should
686 * be reconnected, mark the socket for reconnection.
688 * Only return errors when the request should be aborted.
691 nfs_send(struct nfsreq
*req
, int wait
)
693 struct nfsmount
*nmp
;
695 int error
, error2
, sotype
, rexmit
, slpflag
= PSOCK
, needrecon
;
697 struct sockaddr
*sendnam
;
700 struct timespec ts
= { 2, 0 };
703 error
= nfs_sndlock(req
);
707 error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0);
713 sotype
= nmp
->nm_sotype
;
715 if ((req
->r_flags
& R_SETUP
) && !(nmp
->nm_sockflags
& NMSOCK_SETUP
)) {
716 /* a setup RPC but we're not in SETUP... must need reconnect */
721 /* If the socket needs reconnection, do that now. */
722 /* wait until socket is ready - unless this request is part of setup */
723 lck_mtx_lock(&nmp
->nm_lock
);
724 if (!(nmp
->nm_sockflags
& NMSOCK_READY
) &&
725 !((nmp
->nm_sockflags
& NMSOCK_SETUP
) && (req
->r_flags
& R_SETUP
))) {
726 if (nmp
->nm_flag
& NFSMNT_INT
)
728 lck_mtx_unlock(&nmp
->nm_lock
);
731 lck_mtx_lock(&req
->r_mtx
);
732 req
->r_flags
|= R_MUSTRESEND
;
734 lck_mtx_unlock(&req
->r_mtx
);
737 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req
->r_xid
));
738 lck_mtx_lock(&req
->r_mtx
);
739 req
->r_flags
&= ~R_MUSTRESEND
;
741 lck_mtx_unlock(&req
->r_mtx
);
742 lck_mtx_lock(&nmp
->nm_lock
);
743 while (!(nmp
->nm_sockflags
& NMSOCK_READY
)) {
744 /* don't bother waiting if the socket thread won't be reconnecting it */
745 if (nmp
->nm_state
& NFSSTA_FORCE
) {
749 /* make sure socket thread is running, then wait */
750 nfs_mount_sock_thread_wake(nmp
);
751 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 1)))
753 error
= msleep(req
, &nmp
->nm_lock
, slpflag
, "nfsconnectwait", &ts
);
754 if (error
== EWOULDBLOCK
)
756 if ((error
== EINTR
) || (error
== ERESTART
))
759 lck_mtx_unlock(&nmp
->nm_lock
);
765 lck_mtx_unlock(&nmp
->nm_lock
);
768 lck_mtx_lock(&req
->r_mtx
);
769 req
->r_flags
|= R_MUSTRESEND
;
771 lck_mtx_unlock(&req
->r_mtx
);
775 lck_mtx_lock(&req
->r_mtx
);
776 rexmit
= (req
->r_flags
& R_SENT
);
778 if (sotype
== SOCK_DGRAM
) {
779 lck_mtx_lock(&nmp
->nm_lock
);
780 if (!(req
->r_flags
& R_CWND
) && (nmp
->nm_sent
>= nmp
->nm_cwnd
)) {
781 /* if we can't send this out yet, wait on the cwnd queue */
782 slpflag
= ((nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
) ? PCATCH
: 0;
783 lck_mtx_unlock(&nmp
->nm_lock
);
785 req
->r_flags
|= R_MUSTRESEND
;
786 lck_mtx_unlock(&req
->r_mtx
);
791 lck_mtx_lock(&nmp
->nm_lock
);
792 while (nmp
->nm_sent
>= nmp
->nm_cwnd
) {
793 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 1)))
795 TAILQ_INSERT_TAIL(&nmp
->nm_cwndq
, req
, r_cchain
);
796 error
= msleep(req
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfswaitcwnd", &ts
);
797 if ((req
->r_cchain
.tqe_next
!= NFSREQNOLIST
)) {
798 TAILQ_REMOVE(&nmp
->nm_cwndq
, req
, r_cchain
);
799 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
801 if ((error
== EINTR
) || (error
== ERESTART
))
804 lck_mtx_unlock(&nmp
->nm_lock
);
805 if ((error
== EINTR
) || (error
== ERESTART
))
810 * We update these *before* the send to avoid racing
811 * against others who may be looking to send requests.
815 req
->r_flags
|= R_CWND
;
816 nmp
->nm_sent
+= NFS_CWNDSCALE
;
819 * When retransmitting, turn timing off
820 * and divide congestion window by 2.
822 req
->r_flags
&= ~R_TIMING
;
824 if (nmp
->nm_cwnd
< NFS_CWNDSCALE
)
825 nmp
->nm_cwnd
= NFS_CWNDSCALE
;
827 lck_mtx_unlock(&nmp
->nm_lock
);
830 req
->r_flags
&= ~R_MUSTRESEND
;
831 lck_mtx_unlock(&req
->r_mtx
);
833 error
= mbuf_copym(req
->r_mhead
, 0, MBUF_COPYALL
,
834 wait
? MBUF_WAITOK
: MBUF_DONTWAIT
, &mreqcopy
);
837 log(LOG_INFO
, "nfs_send: mbuf copy failed %d\n", error
);
839 lck_mtx_lock(&req
->r_mtx
);
840 req
->r_flags
|= R_MUSTRESEND
;
842 lck_mtx_unlock(&req
->r_mtx
);
846 bzero(&msg
, sizeof(msg
));
847 if (nmp
->nm_nam
&& (sotype
!= SOCK_STREAM
) && !sock_isconnected(so
)) {
848 if ((sendnam
= mbuf_data(nmp
->nm_nam
))) {
849 msg
.msg_name
= (caddr_t
)sendnam
;
850 msg
.msg_namelen
= sendnam
->sa_len
;
853 error
= sock_sendmbuf(so
, &msg
, mreqcopy
, 0, &sentlen
);
854 #ifdef NFS_SOCKET_DEBUGGING
855 if (error
|| (sentlen
!= req
->r_mreqlen
))
856 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
857 req
->r_xid
, (int)sentlen
, (int)req
->r_mreqlen
, error
));
859 if (!error
&& (sentlen
!= req
->r_mreqlen
))
861 needrecon
= ((sotype
== SOCK_STREAM
) && sentlen
&& (sentlen
!= req
->r_mreqlen
));
863 lck_mtx_lock(&req
->r_mtx
);
865 if (rexmit
&& (++req
->r_rexmit
> NFS_MAXREXMIT
))
866 req
->r_rexmit
= NFS_MAXREXMIT
;
870 req
->r_flags
&= ~R_RESENDERR
;
872 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcretries
);
873 req
->r_flags
|= R_SENT
;
874 if (req
->r_flags
& R_WAITSENT
) {
875 req
->r_flags
&= ~R_WAITSENT
;
879 lck_mtx_unlock(&req
->r_mtx
);
884 req
->r_flags
|= R_MUSTRESEND
;
886 req
->r_flags
|= R_RESENDERR
;
887 if ((error
== EINTR
) || (error
== ERESTART
))
888 req
->r_error
= error
;
889 lck_mtx_unlock(&req
->r_mtx
);
891 if (sotype
== SOCK_DGRAM
) {
893 * Note: even though a first send may fail, we consider
894 * the request sent for congestion window purposes.
895 * So we don't need to undo any of the changes made above.
898 * Socket errors ignored for connectionless sockets??
899 * For now, ignore them all
901 if ((error
!= EINTR
) && (error
!= ERESTART
) &&
902 (error
!= EWOULDBLOCK
) && (error
!= EIO
)) {
903 int clearerror
= 0, optlen
= sizeof(clearerror
);
904 sock_getsockopt(so
, SOL_SOCKET
, SO_ERROR
, &clearerror
, &optlen
);
905 #ifdef NFS_SOCKET_DEBUGGING
907 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
913 /* check if it appears we should reconnect the socket */
916 /* if send timed out, reconnect if on TCP */
917 if (sotype
!= SOCK_STREAM
)
934 if (needrecon
) { /* mark socket as needing reconnect */
935 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req
->r_xid
, error
));
936 nfs_need_reconnect(nmp
);
942 * Don't log some errors:
943 * EPIPE errors may be common with servers that drop idle connections.
944 * EADDRNOTAVAIL may occur on network transitions.
945 * ENOTCONN may occur under some network conditions.
947 if ((error
== EPIPE
) || (error
== EADDRNOTAVAIL
) || (error
== ENOTCONN
))
949 if (error
&& (error
!= EINTR
) && (error
!= ERESTART
))
950 log(LOG_INFO
, "nfs send error %d for server %s\n", error
,
951 !req
->r_nmp
? "<unmounted>" :
952 vfs_statfs(req
->r_nmp
->nm_mountp
)->f_mntfromname
);
954 /* prefer request termination error over other errors */
955 error2
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0);
959 /* only allow the following errors to be returned */
960 if ((error
!= EINTR
) && (error
!= ERESTART
) && (error
!= EIO
) &&
961 (error
!= ENXIO
) && (error
!= ETIMEDOUT
))
967 * NFS client socket upcalls
969 * Pull RPC replies out of an NFS mount's socket and match them
970 * up with the pending request.
972 * The datagram code is simple because we always get whole
973 * messages out of the socket.
975 * The stream code is more involved because we have to parse
976 * the RPC records out of the stream.
979 /* NFS client UDP socket upcall */
981 nfs_udp_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
983 struct nfsmount
*nmp
= arg
;
988 if (nmp
->nm_sockflags
& NMSOCK_CONNECTING
) {
993 /* make sure we're on the current socket */
994 if (nmp
->nm_so
!= so
)
1000 error
= sock_receivembuf(so
, NULL
, &m
, MSG_DONTWAIT
, &rcvlen
);
1002 nfs_request_match_reply(nmp
, m
);
1003 } while (m
&& !error
);
1005 if (error
&& (error
!= EWOULDBLOCK
)) {
1006 /* problems with the socket... mark for reconnection */
1007 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error
));
1008 nfs_need_reconnect(nmp
);
1012 /* NFS client TCP socket upcall */
1014 nfs_tcp_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
1016 struct nfsmount
*nmp
= arg
;
1017 struct iovec_32 aio
;
1024 if (nmp
->nm_sockflags
& NMSOCK_CONNECTING
) {
1025 wakeup(&nmp
->nm_so
);
1029 /* make sure we're on the current socket */
1030 if (nmp
->nm_so
!= so
)
1033 lck_mtx_lock(&nmp
->nm_lock
);
1034 if (nmp
->nm_sockflags
& NMSOCK_UPCALL
) {
1035 /* upcall is already receiving data - just return */
1036 lck_mtx_unlock(&nmp
->nm_lock
);
1039 nmp
->nm_sockflags
|= NMSOCK_UPCALL
;
1044 /* read the TCP RPC record marker */
1045 while (!error
&& nmp
->nm_markerleft
) {
1046 aio
.iov_base
= (uintptr_t)((char*)&nmp
->nm_fragleft
+
1047 sizeof(nmp
->nm_fragleft
) - nmp
->nm_markerleft
);
1048 aio
.iov_len
= nmp
->nm_markerleft
;
1049 bzero(&msg
, sizeof(msg
));
1050 msg
.msg_iov
= (struct iovec
*) &aio
;
1052 lck_mtx_unlock(&nmp
->nm_lock
);
1053 error
= sock_receive(so
, &msg
, MSG_DONTWAIT
, &rcvlen
);
1054 lck_mtx_lock(&nmp
->nm_lock
);
1055 if (error
|| !rcvlen
)
1058 nmp
->nm_markerleft
-= rcvlen
;
1059 if (nmp
->nm_markerleft
)
1061 /* record marker complete */
1062 nmp
->nm_fragleft
= ntohl(nmp
->nm_fragleft
);
1063 if (nmp
->nm_fragleft
& 0x80000000) {
1064 nmp
->nm_sockflags
|= NMSOCK_LASTFRAG
;
1065 nmp
->nm_fragleft
&= ~0x80000000;
1067 nmp
->nm_reclen
+= nmp
->nm_fragleft
;
1068 if (nmp
->nm_reclen
> NFS_MAXPACKET
) {
1070 * This is SERIOUS! We are out of sync with the sender
1071 * and forcing a disconnect/reconnect is all I can do.
1073 log(LOG_ERR
, "%s (%d) from nfs server %s\n",
1074 "impossible RPC record length", nmp
->nm_reclen
,
1075 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
1080 /* read the TCP RPC record fragment */
1081 while (!error
&& !nmp
->nm_markerleft
&& nmp
->nm_fragleft
) {
1083 rcvlen
= nmp
->nm_fragleft
;
1084 lck_mtx_unlock(&nmp
->nm_lock
);
1085 error
= sock_receivembuf(so
, NULL
, &m
, MSG_DONTWAIT
, &rcvlen
);
1086 lck_mtx_lock(&nmp
->nm_lock
);
1087 if (error
|| !rcvlen
|| !m
)
1090 /* append mbufs to list */
1091 nmp
->nm_fragleft
-= rcvlen
;
1095 error
= mbuf_setnext(nmp
->nm_mlast
, m
);
1097 printf("nfs_tcp_rcv: mbuf_setnext failed %d\n", error
);
1102 while (mbuf_next(m
))
1107 /* done reading fragment? */
1109 if (!error
&& !nmp
->nm_markerleft
&& !nmp
->nm_fragleft
) {
1110 /* reset socket fragment parsing state */
1111 nmp
->nm_markerleft
= sizeof(nmp
->nm_fragleft
);
1112 if (nmp
->nm_sockflags
& NMSOCK_LASTFRAG
) {
1113 /* RPC record complete */
1115 /* reset socket record parsing state */
1117 nmp
->nm_m
= nmp
->nm_mlast
= NULL
;
1118 nmp
->nm_sockflags
&= ~NMSOCK_LASTFRAG
;
1122 if (m
) { /* match completed response with request */
1123 lck_mtx_unlock(&nmp
->nm_lock
);
1124 nfs_request_match_reply(nmp
, m
);
1125 lck_mtx_lock(&nmp
->nm_lock
);
1128 /* loop if we've been making error-free progress */
1132 nmp
->nm_sockflags
&= ~NMSOCK_UPCALL
;
1133 lck_mtx_unlock(&nmp
->nm_lock
);
1134 #ifdef NFS_SOCKET_DEBUGGING
1135 if (!recv
&& (error
!= EWOULDBLOCK
))
1136 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error
));
1138 /* note: no error and no data indicates server closed its end */
1139 if ((error
!= EWOULDBLOCK
) && (error
|| !recv
)) {
1140 /* problems with the socket... mark for reconnection */
1141 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error
));
1142 nfs_need_reconnect(nmp
);
1147 * "poke" a socket to try to provoke any pending errors
1150 nfs_sock_poke(struct nfsmount
*nmp
)
1152 struct iovec_32 aio
;
1158 lck_mtx_lock(&nmp
->nm_lock
);
1159 if ((nmp
->nm_sockflags
& NMSOCK_UNMOUNT
) || !nmp
->nm_so
) {
1160 lck_mtx_unlock(&nmp
->nm_lock
);
1163 lck_mtx_unlock(&nmp
->nm_lock
);
1164 aio
.iov_base
= (uintptr_t)&dummy
;
1167 bzero(&msg
, sizeof(msg
));
1168 msg
.msg_iov
= (struct iovec
*) &aio
;
1170 error
= sock_send(nmp
->nm_so
, &msg
, MSG_DONTWAIT
, &len
);
1171 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error
));
1175 * Match an RPC reply with the corresponding request
1178 nfs_request_match_reply(struct nfsmount
*nmp
, mbuf_t mrep
)
1181 struct nfsm_chain nmrep
;
1182 u_long reply
= 0, rxid
= 0;
1184 int error
= 0, asyncioq
, asyncgss
;
1186 /* Get the xid and check that it is an rpc reply */
1187 nfsm_chain_dissect_init(error
, &nmrep
, mrep
);
1188 nfsm_chain_get_32(error
, &nmrep
, rxid
);
1189 nfsm_chain_get_32(error
, &nmrep
, reply
);
1190 if (error
|| (reply
!= RPC_REPLY
)) {
1191 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcinvalid
);
1197 * Loop through the request list to match up the reply
1198 * Iff no match, just drop it.
1200 lck_mtx_lock(nfs_request_mutex
);
1201 TAILQ_FOREACH(req
, &nfs_reqq
, r_chain
) {
1202 if (req
->r_nmrep
.nmc_mhead
|| (rxid
!= R_XID32(req
->r_xid
)))
1204 /* looks like we have it, grab lock and double check */
1205 lck_mtx_lock(&req
->r_mtx
);
1206 if (req
->r_nmrep
.nmc_mhead
|| (rxid
!= R_XID32(req
->r_xid
))) {
1207 lck_mtx_unlock(&req
->r_mtx
);
1211 req
->r_nmrep
= nmrep
;
1212 lck_mtx_lock(&nmp
->nm_lock
);
1213 if (nmp
->nm_sotype
== SOCK_DGRAM
) {
1215 * Update congestion window.
1216 * Do the additive increase of one rpc/rtt.
1218 FSDBG(530, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
1219 if (nmp
->nm_cwnd
<= nmp
->nm_sent
) {
1221 ((NFS_CWNDSCALE
* NFS_CWNDSCALE
) +
1222 (nmp
->nm_cwnd
>> 1)) / nmp
->nm_cwnd
;
1223 if (nmp
->nm_cwnd
> NFS_MAXCWND
)
1224 nmp
->nm_cwnd
= NFS_MAXCWND
;
1226 if (req
->r_flags
& R_CWND
) {
1227 nmp
->nm_sent
-= NFS_CWNDSCALE
;
1228 req
->r_flags
&= ~R_CWND
;
1230 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
1231 /* congestion window is open, poke the cwnd queue */
1232 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
1233 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
1234 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1239 * Update rtt using a gain of 0.125 on the mean
1240 * and a gain of 0.25 on the deviation.
1242 if (req
->r_flags
& R_TIMING
) {
1244 * Since the timer resolution of
1245 * NFS_HZ is so course, it can often
1246 * result in r_rtt == 0. Since
1247 * r_rtt == N means that the actual
1248 * rtt is between N+dt and N+2-dt ticks,
1251 if (proct
[req
->r_procnum
] == 0)
1252 panic("nfs_request_match_reply: proct[%d] is zero", req
->r_procnum
);
1253 t1
= req
->r_rtt
+ 1;
1254 t1
-= (NFS_SRTT(req
) >> 3);
1255 NFS_SRTT(req
) += t1
;
1258 t1
-= (NFS_SDRTT(req
) >> 2);
1259 NFS_SDRTT(req
) += t1
;
1261 nmp
->nm_timeouts
= 0;
1262 lck_mtx_unlock(&nmp
->nm_lock
);
1263 /* signal anyone waiting on this request */
1265 asyncioq
= (req
->r_callback
.rcb_func
!= NULL
);
1266 if ((asyncgss
= ((req
->r_gss_ctx
!= NULL
) && ((req
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
|R_ALLOCATED
)) == (R_ASYNC
|R_ALLOCATED
)))))
1267 nfs_request_ref(req
, 1);
1268 lck_mtx_unlock(&req
->r_mtx
);
1269 lck_mtx_unlock(nfs_request_mutex
);
1271 nfs_gss_clnt_rpcdone(req
);
1272 nfs_request_rele(req
);
1274 /* if it's an async RPC with a callback, queue it up */
1276 nfs_asyncio_finish(req
);
1281 /* not matched to a request, so drop it. */
1282 lck_mtx_unlock(nfs_request_mutex
);
1283 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcunexpected
);
1289 * Wait for the reply for a given request...
1290 * ...potentially resending the request if necessary.
1293 nfs_wait_reply(struct nfsreq
*req
)
1295 struct nfsmount
*nmp
= req
->r_nmp
;
1296 struct timespec ts
= { 30, 0 };
1297 int error
= 0, slpflag
;
1299 if ((nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
)
1304 lck_mtx_lock(&req
->r_mtx
);
1305 while (!req
->r_nmrep
.nmc_mhead
) {
1306 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0)))
1308 if (((error
= req
->r_error
)) || req
->r_nmrep
.nmc_mhead
)
1310 /* check if we need to resend */
1311 if (req
->r_flags
& R_MUSTRESEND
) {
1312 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
1313 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
1314 lck_mtx_unlock(&req
->r_mtx
);
1315 if (req
->r_gss_ctx
) {
1317 * It's an RPCSEC_GSS mount.
1318 * Can't just resend the original request
1319 * without bumping the cred sequence number.
1320 * Go back and re-build the request.
1324 error
= nfs_send(req
, 1);
1325 lck_mtx_lock(&req
->r_mtx
);
1326 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
1327 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
, error
));
1330 if (((error
= req
->r_error
)) || req
->r_nmrep
.nmc_mhead
)
1333 /* need to poll if we're P_NOREMOTEHANG */
1334 if (nfs_noremotehang(req
->r_thread
))
1336 error
= msleep(req
, &req
->r_mtx
, slpflag
| (PZERO
- 1), "nfswaitreply", &ts
);
1337 if (error
== EWOULDBLOCK
)
1339 if ((error
== EINTR
) || (error
== ERESTART
))
1342 lck_mtx_unlock(&req
->r_mtx
);
1348 * An NFS request goes something like this:
1349 * (nb: always frees up mreq mbuf list)
1350 * nfs_request_create()
1351 * - allocates a request struct if one is not provided
1352 * - initial fill-in of the request struct
1353 * nfs_request_add_header()
1354 * - add the RPC header
1355 * nfs_request_send()
1356 * - link it into list
1357 * - call nfs_send() for first transmit
1358 * nfs_request_wait()
1359 * - call nfs_wait_reply() to wait for the reply
1360 * nfs_request_finish()
1361 * - break down rpc header and return with error or nfs reply
1362 * pointed to by nmrep.
1363 * nfs_request_rele()
1364 * nfs_request_destroy()
1365 * - clean up the request struct
1366 * - free the request struct if it was allocated by nfs_request_create()
1370 * Set up an NFS request struct (allocating if no request passed in).
1375 mount_t mp
, /* used only if !np */
1376 struct nfsm_chain
*nmrest
,
1380 struct nfsreq
**reqp
)
1382 struct nfsreq
*req
, *newreq
= NULL
;
1383 struct nfsmount
*nmp
;
1387 /* allocate a new NFS request structure */
1388 MALLOC_ZONE(newreq
, struct nfsreq
*, sizeof(*newreq
), M_NFSREQ
, M_WAITOK
);
1390 mbuf_freem(nmrest
->nmc_mhead
);
1391 nmrest
->nmc_mhead
= NULL
;
1397 bzero(req
, sizeof(*req
));
1399 req
->r_flags
= R_ALLOCATED
;
1401 nmp
= VFSTONFS(np
? NFSTOMP(np
) : mp
);
1404 FREE_ZONE(newreq
, sizeof(*newreq
), M_NFSREQ
);
1407 lck_mtx_lock(&nmp
->nm_lock
);
1408 if ((nmp
->nm_state
& (NFSSTA_FORCE
|NFSSTA_TIMEO
)) ==
1409 (NFSSTA_FORCE
|NFSSTA_TIMEO
)) {
1410 lck_mtx_unlock(&nmp
->nm_lock
);
1411 mbuf_freem(nmrest
->nmc_mhead
);
1412 nmrest
->nmc_mhead
= NULL
;
1414 FREE_ZONE(newreq
, sizeof(*newreq
), M_NFSREQ
);
1418 if ((nmp
->nm_vers
!= NFS_VER4
) && (procnum
>= 0) && (procnum
< NFS_NPROCS
))
1419 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpccnt
[procnum
]);
1420 if ((nmp
->nm_vers
== NFS_VER4
) && (procnum
!= NFSPROC4_COMPOUND
) && (procnum
!= NFSPROC4_NULL
))
1421 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum
);
1423 lck_mtx_init(&req
->r_mtx
, nfs_request_grp
, LCK_ATTR_NULL
);
1426 req
->r_thread
= thd
;
1427 if (IS_VALID_CRED(cred
)) {
1428 kauth_cred_ref(cred
);
1431 req
->r_procnum
= procnum
;
1432 if (proct
[procnum
] > 0)
1433 req
->r_flags
|= R_TIMING
;
1434 req
->r_nmrep
.nmc_mhead
= NULL
;
1435 SLIST_INIT(&req
->r_gss_seqlist
);
1436 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
1437 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
1438 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1440 lck_mtx_unlock(&nmp
->nm_lock
);
1442 /* move the request mbuf chain to the nfsreq */
1443 req
->r_mrest
= nmrest
->nmc_mhead
;
1444 nmrest
->nmc_mhead
= NULL
;
1446 req
->r_flags
|= R_INITTED
;
1454 * Clean up and free an NFS request structure.
1457 nfs_request_destroy(struct nfsreq
*req
)
1459 struct nfsmount
*nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1460 struct gss_seq
*gsp
, *ngsp
;
1461 struct timespec ts
= { 1, 0 };
1463 if (!req
|| !(req
->r_flags
& R_INITTED
))
1465 req
->r_flags
&= ~R_INITTED
;
1466 if (req
->r_lflags
& RL_QUEUED
)
1467 nfs_reqdequeue(req
);
1468 if (req
->r_achain
.tqe_next
!= NFSREQNOLIST
) {
1469 /* still on an async I/O queue? */
1470 lck_mtx_lock(nfsiod_mutex
);
1471 if (nmp
&& (req
->r_achain
.tqe_next
!= NFSREQNOLIST
)) {
1472 TAILQ_REMOVE(&nmp
->nm_iodq
, req
, r_achain
);
1473 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
1475 lck_mtx_unlock(nfsiod_mutex
);
1478 lck_mtx_lock(&nmp
->nm_lock
);
1479 if (req
->r_rchain
.tqe_next
!= NFSREQNOLIST
) {
1480 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
1481 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
1482 req
->r_flags
&= ~R_RESENDQ
;
1484 if (req
->r_cchain
.tqe_next
!= NFSREQNOLIST
) {
1485 TAILQ_REMOVE(&nmp
->nm_cwndq
, req
, r_cchain
);
1486 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1488 lck_mtx_unlock(&nmp
->nm_lock
);
1490 lck_mtx_lock(&req
->r_mtx
);
1491 while (req
->r_flags
& R_RESENDQ
)
1492 msleep(req
, &req
->r_mtx
, (PZERO
- 1), "nfsresendqwait", &ts
);
1493 lck_mtx_unlock(&req
->r_mtx
);
1495 mbuf_freem(req
->r_mhead
);
1496 else if (req
->r_mrest
)
1497 mbuf_freem(req
->r_mrest
);
1498 if (req
->r_nmrep
.nmc_mhead
)
1499 mbuf_freem(req
->r_nmrep
.nmc_mhead
);
1500 if (IS_VALID_CRED(req
->r_cred
))
1501 kauth_cred_unref(&req
->r_cred
);
1503 nfs_gss_clnt_rpcdone(req
);
1504 SLIST_FOREACH_SAFE(gsp
, &req
->r_gss_seqlist
, gss_seqnext
, ngsp
)
1507 nfs_gss_clnt_ctx_unref(req
);
1509 lck_mtx_destroy(&req
->r_mtx
, nfs_request_grp
);
1510 if (req
->r_flags
& R_ALLOCATED
)
1511 FREE_ZONE(req
, sizeof(*req
), M_NFSREQ
);
1515 nfs_request_ref(struct nfsreq
*req
, int locked
)
1518 lck_mtx_lock(&req
->r_mtx
);
1519 if (req
->r_refs
<= 0)
1520 panic("nfsreq reference error");
1523 lck_mtx_unlock(&req
->r_mtx
);
1527 nfs_request_rele(struct nfsreq
*req
)
1531 lck_mtx_lock(&req
->r_mtx
);
1532 if (req
->r_refs
<= 0)
1533 panic("nfsreq reference underflow");
1535 destroy
= (req
->r_refs
== 0);
1536 lck_mtx_unlock(&req
->r_mtx
);
1538 nfs_request_destroy(req
);
1543 * Add an (updated) RPC header with authorization to an NFS request.
1546 nfs_request_add_header(struct nfsreq
*req
)
1548 struct nfsmount
*nmp
;
1549 int error
= 0, auth_len
= 0;
1552 /* free up any previous header */
1553 if ((m
= req
->r_mhead
)) {
1554 while (m
&& (m
!= req
->r_mrest
))
1556 req
->r_mhead
= NULL
;
1559 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1563 if (!req
->r_cred
) /* RPCAUTH_NULL */
1565 else switch (nmp
->nm_auth
) {
1567 if (req
->r_cred
->cr_ngroups
< 1)
1569 auth_len
= ((((req
->r_cred
->cr_ngroups
- 1) > nmp
->nm_numgrps
) ?
1570 nmp
->nm_numgrps
: (req
->r_cred
->cr_ngroups
- 1)) << 2) +
1576 auth_len
= 5 * NFSX_UNSIGNED
+ 0; // zero context handle for now
1580 error
= nfsm_rpchead(req
, auth_len
, req
->r_mrest
, &req
->r_xid
, &req
->r_mhead
);
1584 req
->r_mreqlen
= mbuf_pkthdr_len(req
->r_mhead
);
1585 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1588 lck_mtx_lock(&nmp
->nm_lock
);
1589 if (nmp
->nm_flag
& NFSMNT_SOFT
)
1590 req
->r_retry
= nmp
->nm_retry
;
1592 req
->r_retry
= NFS_MAXREXMIT
+ 1; /* past clip limit */
1593 lck_mtx_unlock(&nmp
->nm_lock
);
1600 * Queue an NFS request up and send it out.
1603 nfs_request_send(struct nfsreq
*req
, int wait
)
1605 struct nfsmount
*nmp
;
1608 lck_mtx_lock(nfs_request_mutex
);
1610 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1612 lck_mtx_unlock(nfs_request_mutex
);
1617 if (!req
->r_start
) {
1618 req
->r_start
= now
.tv_sec
;
1619 req
->r_lastmsg
= now
.tv_sec
-
1620 ((nmp
->nm_tprintf_delay
) - (nmp
->nm_tprintf_initial_delay
));
1623 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcrequests
);
1626 * Chain request into list of outstanding requests. Be sure
1627 * to put it LAST so timer finds oldest requests first.
1628 * Make sure that the request queue timer is running
1629 * to check for possible request timeout.
1631 TAILQ_INSERT_TAIL(&nfs_reqq
, req
, r_chain
);
1632 req
->r_lflags
|= RL_QUEUED
;
1633 if (!nfs_request_timer_on
) {
1634 nfs_request_timer_on
= 1;
1635 nfs_interval_timer_start(nfs_request_timer_call
,
1638 lck_mtx_unlock(nfs_request_mutex
);
1640 /* Send the request... */
1641 return (nfs_send(req
, wait
));
1645 * Call nfs_wait_reply() to wait for the reply.
1648 nfs_request_wait(struct nfsreq
*req
)
1650 req
->r_error
= nfs_wait_reply(req
);
1654 * Finish up an NFS request by dequeueing it and
1655 * doing the initial NFS request reply processing.
1660 struct nfsm_chain
*nmrepp
,
1663 struct nfsmount
*nmp
;
1666 uint32_t verf_len
= 0;
1667 uint32_t reply_status
= 0;
1668 uint32_t rejected_status
= 0;
1669 uint32_t auth_status
= 0;
1670 uint32_t accepted_status
= 0;
1671 struct nfsm_chain nmrep
;
1674 error
= req
->r_error
;
1677 nmrepp
->nmc_mhead
= NULL
;
1679 /* RPC done, unlink the request. */
1680 nfs_reqdequeue(req
);
1682 mrep
= req
->r_nmrep
.nmc_mhead
;
1684 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1687 * Decrement the outstanding request count.
1689 if (req
->r_flags
& R_CWND
) {
1690 req
->r_flags
&= ~R_CWND
;
1691 lck_mtx_lock(&nmp
->nm_lock
);
1692 FSDBG(273, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
1693 nmp
->nm_sent
-= NFS_CWNDSCALE
;
1694 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
1695 /* congestion window is open, poke the cwnd queue */
1696 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
1697 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
1698 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1701 lck_mtx_unlock(&nmp
->nm_lock
);
1704 if (req
->r_gss_ctx
) { // Using gss cred ?
1706 * If the request had an RPCSEC_GSS credential
1707 * then reset its sequence number bit in the
1710 nfs_gss_clnt_rpcdone(req
);
1713 * If we need to re-send, go back and re-build the
1714 * request based on a new sequence number.
1715 * Note that we're using the original XID.
1717 if (error
== EAGAIN
) {
1721 error
= nfs_gss_clnt_args_restore(req
); // remove any trailer mbufs
1722 req
->r_nmrep
.nmc_mhead
= NULL
;
1723 req
->r_flags
|= R_RESTART
;
1724 if (error
== ENEEDAUTH
) {
1725 req
->r_xid
= 0; // get a new XID
1733 * If there was a successful reply, make sure to mark the mount as up.
1734 * If a tprintf message was given (or if this is a timed-out soft mount)
1735 * then post a tprintf message indicating the server is alive again.
1738 if ((req
->r_flags
& R_TPRINTFMSG
) ||
1739 (nmp
&& (nmp
->nm_flag
& NFSMNT_SOFT
) &&
1740 ((nmp
->nm_state
& (NFSSTA_TIMEO
|NFSSTA_FORCE
)) == NFSSTA_TIMEO
)))
1741 nfs_up(nmp
, req
->r_thread
, NFSSTA_TIMEO
, "is alive again");
1743 nfs_up(nmp
, req
->r_thread
, NFSSTA_TIMEO
, NULL
);
1750 * break down the RPC header and check if ok
1752 nmrep
= req
->r_nmrep
;
1753 nfsm_chain_get_32(error
, &nmrep
, reply_status
);
1755 if (reply_status
== RPC_MSGDENIED
) {
1756 nfsm_chain_get_32(error
, &nmrep
, rejected_status
);
1758 if (rejected_status
== RPC_MISMATCH
) {
1762 nfsm_chain_get_32(error
, &nmrep
, auth_status
);
1764 switch (auth_status
) {
1765 case RPCSEC_GSS_CREDPROBLEM
:
1766 case RPCSEC_GSS_CTXPROBLEM
:
1768 * An RPCSEC_GSS cred or context problem.
1769 * We can't use it anymore.
1770 * Restore the args, renew the context
1771 * and set up for a resend.
1773 error
= nfs_gss_clnt_args_restore(req
);
1774 if (error
&& error
!= ENEEDAUTH
)
1778 error
= nfs_gss_clnt_ctx_renew(req
);
1783 req
->r_nmrep
.nmc_mhead
= NULL
;
1784 req
->r_xid
= 0; // get a new XID
1785 req
->r_flags
|= R_RESTART
;
1794 /* Now check the verifier */
1795 nfsm_chain_get_32(error
, &nmrep
, verf_type
); // verifier flavor
1796 nfsm_chain_get_32(error
, &nmrep
, verf_len
); // verifier length
1799 auth
= !req
->r_cred
? RPCAUTH_NULL
: nmp
->nm_auth
;
1803 /* Any AUTH_UNIX verifier is ignored */
1805 nfsm_chain_adv(error
, &nmrep
, nfsm_rndup(verf_len
));
1806 nfsm_chain_get_32(error
, &nmrep
, accepted_status
);
1811 error
= nfs_gss_clnt_verf_get(req
, &nmrep
,
1812 verf_type
, verf_len
, &accepted_status
);
1817 switch (accepted_status
) {
1819 if (req
->r_procnum
== NFSPROC_NULL
) {
1821 * The NFS null procedure is unique,
1822 * in not returning an NFS status.
1826 nfsm_chain_get_32(error
, &nmrep
, *status
);
1830 if ((nmp
->nm_vers
!= NFS_VER2
) && (*status
== NFSERR_TRYLATER
)) {
1832 * It's a JUKEBOX error - delay and try again
1834 int delay
, slpflag
= (nmp
->nm_flag
& NFSMNT_INT
) ? PCATCH
: 0;
1837 req
->r_nmrep
.nmc_mhead
= NULL
;
1838 if ((req
->r_delay
>= 30) && !(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
1839 /* we're not yet completely mounted and */
1840 /* we can't complete an RPC, so we fail */
1841 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpctimeouts
);
1843 error
= req
->r_error
;
1846 req
->r_delay
= !req
->r_delay
? NFS_TRYLATERDEL
: (req
->r_delay
* 2);
1847 if (req
->r_delay
> 30)
1849 if (nmp
->nm_tprintf_initial_delay
&& (req
->r_delay
== 30)) {
1850 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_JUKEBOXTIMEO
,
1851 "resource temporarily unavailable (jukebox)");
1852 req
->r_flags
|= R_JBTPRINTFMSG
;
1854 delay
= req
->r_delay
;
1855 if (req
->r_callback
.rcb_func
) {
1858 req
->r_resendtime
= now
.tv_sec
+ delay
;
1861 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
1863 tsleep(&lbolt
, PSOCK
|slpflag
, "nfs_jukebox_trylater", 0);
1864 } while (--delay
> 0);
1866 req
->r_xid
= 0; // get a new XID
1867 req
->r_flags
|= R_RESTART
;
1869 FSDBG(273, R_XID32(req
->r_xid
), nmp
, req
, NFSERR_TRYLATER
);
1873 if (req
->r_flags
& R_JBTPRINTFMSG
)
1874 nfs_up(nmp
, req
->r_thread
, NFSSTA_JUKEBOXTIMEO
, "resource available again");
1876 if (*status
== NFS_OK
) {
1878 * Successful NFS request
1881 req
->r_nmrep
.nmc_mhead
= NULL
;
1884 /* Got an NFS error of some kind */
1887 * If the File Handle was stale, invalidate the
1888 * lookup cache, just in case.
1890 if ((*status
== ESTALE
) && req
->r_np
)
1891 cache_purge(NFSTOV(req
->r_np
));
1892 if (nmp
->nm_vers
== NFS_VER2
)
1896 req
->r_nmrep
.nmc_mhead
= NULL
;
1899 case RPC_PROGUNAVAIL
:
1900 error
= EPROGUNAVAIL
;
1902 case RPC_PROGMISMATCH
:
1903 error
= ERPCMISMATCH
;
1905 case RPC_PROCUNAVAIL
:
1906 error
= EPROCUNAVAIL
;
1911 case RPC_SYSTEM_ERR
:
1917 if (!error
&& (req
->r_flags
& R_JBTPRINTFMSG
))
1918 nfs_up(nmp
, req
->r_thread
, NFSSTA_JUKEBOXTIMEO
, NULL
);
1919 FSDBG(273, R_XID32(req
->r_xid
), nmp
, req
,
1920 (!error
&& (*status
== NFS_OK
)) ? 0xf0f0f0f0 : error
);
1926 * Perform an NFS request synchronously.
1932 mount_t mp
, /* used only if !np */
1933 struct nfsm_chain
*nmrest
,
1936 struct nfsm_chain
*nmrepp
,
1940 return nfs_request2(np
, mp
, nmrest
, procnum
,
1941 vfs_context_thread(ctx
), vfs_context_ucred(ctx
),
1942 0, nmrepp
, xidp
, status
);
1948 mount_t mp
, /* used only if !np */
1949 struct nfsm_chain
*nmrest
,
1954 struct nfsm_chain
*nmrepp
,
1958 struct nfsreq rq
, *req
= &rq
;
1961 if ((error
= nfs_request_create(np
, mp
, nmrest
, procnum
, thd
, cred
, &req
)))
1963 req
->r_flags
|= (flags
& R_OPTMASK
);
1965 FSDBG_TOP(273, R_XID32(req
->r_xid
), np
, procnum
, 0);
1968 req
->r_flags
&= ~R_RESTART
;
1969 if ((error
= nfs_request_add_header(req
)))
1973 if ((error
= nfs_request_send(req
, 1)))
1975 nfs_request_wait(req
);
1976 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
1978 } while (req
->r_flags
& R_RESTART
);
1980 FSDBG_BOT(273, R_XID32(req
->r_xid
), np
, procnum
, error
);
1981 nfs_request_rele(req
);
1986 * Create and start an asynchronous NFS request.
1991 mount_t mp
, /* used only if !np */
1992 struct nfsm_chain
*nmrest
,
1996 struct nfsreq_cbinfo
*cb
,
1997 struct nfsreq
**reqp
)
2002 error
= nfs_request_create(np
, mp
, nmrest
, procnum
, thd
, cred
, reqp
);
2004 FSDBG(274, (req
? R_XID32(req
->r_xid
) : 0), np
, procnum
, error
);
2007 req
->r_flags
|= R_ASYNC
;
2009 req
->r_callback
= *cb
;
2010 error
= nfs_request_add_header(req
);
2012 req
->r_flags
|= R_WAITSENT
;
2013 if (req
->r_callback
.rcb_func
)
2014 nfs_request_ref(req
, 0);
2015 error
= nfs_request_send(req
, 1);
2016 lck_mtx_lock(&req
->r_mtx
);
2017 if (!error
&& !(req
->r_flags
& R_SENT
) && req
->r_callback
.rcb_func
) {
2018 /* make sure to wait until this async I/O request gets sent */
2019 int slpflag
= (req
->r_nmp
&& (req
->r_nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
) ? PCATCH
: 0;
2020 struct timespec ts
= { 2, 0 };
2021 while (!error
&& !(req
->r_flags
& R_SENT
)) {
2022 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
2024 error
= msleep(req
, &req
->r_mtx
, slpflag
| (PZERO
- 1), "nfswaitsent", &ts
);
2025 if (error
== EWOULDBLOCK
)
2029 sent
= req
->r_flags
& R_SENT
;
2030 lck_mtx_unlock(&req
->r_mtx
);
2031 if (error
&& req
->r_callback
.rcb_func
&& !sent
)
2032 nfs_request_rele(req
);
2034 FSDBG(274, R_XID32(req
->r_xid
), np
, procnum
, error
);
2035 if (error
|| req
->r_callback
.rcb_func
)
2036 nfs_request_rele(req
);
2041 * Wait for and finish an asynchronous NFS request.
2044 nfs_request_async_finish(
2046 struct nfsm_chain
*nmrepp
,
2050 int error
, asyncio
= req
->r_callback
.rcb_func
? 1 : 0;
2052 lck_mtx_lock(&req
->r_mtx
);
2054 req
->r_flags
|= R_ASYNCWAIT
;
2055 while (req
->r_flags
& R_RESENDQ
) /* wait until the request is off the resend queue */
2056 msleep(req
, &req
->r_mtx
, PZERO
-1, "nfsresendqwait", NULL
);
2057 lck_mtx_unlock(&req
->r_mtx
);
2059 nfs_request_wait(req
);
2060 error
= nfs_request_finish(req
, nmrepp
, status
);
2062 while (!error
&& (req
->r_flags
& R_RESTART
)) {
2063 if (asyncio
&& req
->r_resendtime
) { /* send later */
2064 lck_mtx_lock(&req
->r_mtx
);
2065 nfs_asyncio_resend(req
);
2066 lck_mtx_unlock(&req
->r_mtx
);
2067 return (EINPROGRESS
);
2070 req
->r_flags
&= ~R_RESTART
;
2071 if ((error
= nfs_request_add_header(req
)))
2073 if ((error
= nfs_request_send(req
, !asyncio
)))
2076 return (EINPROGRESS
);
2077 nfs_request_wait(req
);
2078 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
2084 FSDBG(275, R_XID32(req
->r_xid
), req
->r_np
, req
->r_procnum
, error
);
2085 nfs_request_rele(req
);
2090 * Cancel a pending asynchronous NFS request.
2093 nfs_request_async_cancel(struct nfsreq
*req
)
2095 nfs_reqdequeue(req
);
2096 FSDBG(275, R_XID32(req
->r_xid
), req
->r_np
, req
->r_procnum
, 0xD1ED1E);
2097 nfs_request_rele(req
);
2101 * Flag a request as being terminated.
2104 nfs_softterm(struct nfsreq
*req
)
2106 struct nfsmount
*nmp
= req
->r_nmp
;
2107 req
->r_flags
|= R_SOFTTERM
;
2108 req
->r_error
= ETIMEDOUT
;
2109 if (!(req
->r_flags
& R_CWND
) || !nmp
)
2111 /* update congestion window */
2112 req
->r_flags
&= ~R_CWND
;
2113 lck_mtx_lock(&nmp
->nm_lock
);
2114 FSDBG(532, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
2115 nmp
->nm_sent
-= NFS_CWNDSCALE
;
2116 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
2117 /* congestion window is open, poke the cwnd queue */
2118 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
2119 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
2120 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
2123 lck_mtx_unlock(&nmp
->nm_lock
);
2127 * Ensure req isn't in use by the timer, then dequeue it.
2130 nfs_reqdequeue(struct nfsreq
*req
)
2132 lck_mtx_lock(nfs_request_mutex
);
2133 while (req
->r_lflags
& RL_BUSY
) {
2134 req
->r_lflags
|= RL_WAITING
;
2135 msleep(&req
->r_lflags
, nfs_request_mutex
, PSOCK
, "reqdeq", NULL
);
2137 if (req
->r_lflags
& RL_QUEUED
) {
2138 TAILQ_REMOVE(&nfs_reqq
, req
, r_chain
);
2139 req
->r_lflags
&= ~RL_QUEUED
;
2141 lck_mtx_unlock(nfs_request_mutex
);
2145 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
2146 * free()'d out from under it.
2149 nfs_reqbusy(struct nfsreq
*req
)
2151 if (req
->r_lflags
& RL_BUSY
)
2152 panic("req locked");
2153 req
->r_lflags
|= RL_BUSY
;
2157 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
2159 static struct nfsreq
*
2160 nfs_reqnext(struct nfsreq
*req
)
2162 struct nfsreq
* nextreq
;
2167 * We need to get and busy the next req before signalling the
2168 * current one, otherwise wakeup() may block us and we'll race to
2169 * grab the next req.
2171 nextreq
= TAILQ_NEXT(req
, r_chain
);
2172 if (nextreq
!= NULL
)
2173 nfs_reqbusy(nextreq
);
2174 /* unbusy and signal. */
2175 req
->r_lflags
&= ~RL_BUSY
;
2176 if (req
->r_lflags
& RL_WAITING
) {
2177 req
->r_lflags
&= ~RL_WAITING
;
2178 wakeup(&req
->r_lflags
);
2184 * NFS request queue timer routine
2186 * Scan the NFS request queue for any requests that have timed out.
2188 * Alert the system of unresponsive servers.
2189 * Mark expired requests on soft mounts as terminated.
2190 * For UDP, mark/signal requests for retransmission.
2193 nfs_request_timer(__unused
void *param0
, __unused
void *param1
)
2196 struct nfsmount
*nmp
;
2197 int timeo
, maxtime
, finish_asyncio
, error
;
2199 TAILQ_HEAD(nfs_mount_pokeq
, nfsmount
) nfs_mount_poke_queue
;
2201 lck_mtx_lock(nfs_request_mutex
);
2202 req
= TAILQ_FIRST(&nfs_reqq
);
2203 if (req
== NULL
) { /* no requests - turn timer off */
2204 nfs_request_timer_on
= 0;
2205 lck_mtx_unlock(nfs_request_mutex
);
2210 TAILQ_INIT(&nfs_mount_poke_queue
);
2213 for ( ; req
!= NULL
; req
= nfs_reqnext(req
)) {
2215 if (!nmp
) /* unmounted */
2217 if (req
->r_error
|| req
->r_nmrep
.nmc_mhead
)
2219 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0))) {
2220 if (req
->r_callback
.rcb_func
!= NULL
) {
2221 /* async I/O RPC needs to be finished */
2222 lck_mtx_lock(&req
->r_mtx
);
2223 req
->r_error
= error
;
2224 finish_asyncio
= !(req
->r_flags
& R_WAITSENT
);
2226 lck_mtx_unlock(&req
->r_mtx
);
2228 nfs_asyncio_finish(req
);
2233 lck_mtx_lock(&req
->r_mtx
);
2235 if (nmp
->nm_tprintf_initial_delay
&&
2236 ((req
->r_rexmit
> 2) || (req
->r_flags
& R_RESENDERR
)) &&
2237 ((req
->r_lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
)) {
2238 req
->r_lastmsg
= now
.tv_sec
;
2239 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_TIMEO
,
2241 req
->r_flags
|= R_TPRINTFMSG
;
2242 lck_mtx_lock(&nmp
->nm_lock
);
2243 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
2244 lck_mtx_unlock(&nmp
->nm_lock
);
2245 /* we're not yet completely mounted and */
2246 /* we can't complete an RPC, so we fail */
2247 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpctimeouts
);
2249 finish_asyncio
= ((req
->r_callback
.rcb_func
!= NULL
) && !(req
->r_flags
& R_WAITSENT
));
2251 lck_mtx_unlock(&req
->r_mtx
);
2253 nfs_asyncio_finish(req
);
2256 lck_mtx_unlock(&nmp
->nm_lock
);
2260 * Put a reasonable limit on the maximum timeout,
2261 * and reduce that limit when soft mounts get timeouts or are in reconnect.
2263 if (!(nmp
->nm_flag
& NFSMNT_SOFT
))
2264 maxtime
= NFS_MAXTIMEO
;
2265 else if ((req
->r_flags
& R_SETUP
) || ((nmp
->nm_reconnect_start
<= 0) || ((now
.tv_sec
- nmp
->nm_reconnect_start
) < 8)))
2266 maxtime
= (NFS_MAXTIMEO
/ (nmp
->nm_timeouts
+1))/2;
2268 maxtime
= NFS_MINTIMEO
/4;
2271 * Check for request timeout.
2273 if (req
->r_rtt
>= 0) {
2275 lck_mtx_lock(&nmp
->nm_lock
);
2276 if (req
->r_flags
& R_RESENDERR
) {
2277 /* with resend errors, retry every few seconds */
2280 if (req
->r_procnum
== NFSPROC_NULL
&& req
->r_gss_ctx
!= NULL
)
2281 timeo
= NFS_MINIDEMTIMEO
; // gss context setup
2282 else if (nmp
->nm_flag
& NFSMNT_DUMBTIMR
)
2283 timeo
= nmp
->nm_timeo
;
2285 timeo
= NFS_RTO(nmp
, proct
[req
->r_procnum
]);
2287 /* ensure 62.5 ms floor */
2288 while (16 * timeo
< hz
)
2290 if (nmp
->nm_timeouts
> 0)
2291 timeo
*= nfs_backoff
[nmp
->nm_timeouts
- 1];
2293 /* limit timeout to max */
2294 if (timeo
> maxtime
)
2296 if (req
->r_rtt
<= timeo
) {
2297 lck_mtx_unlock(&nmp
->nm_lock
);
2298 lck_mtx_unlock(&req
->r_mtx
);
2301 /* The request has timed out */
2302 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
2303 req
->r_procnum
, proct
[req
->r_procnum
],
2304 req
->r_xid
, req
->r_rtt
, timeo
, nmp
->nm_timeouts
,
2305 (now
.tv_sec
- req
->r_start
)*NFS_HZ
, maxtime
));
2306 if (nmp
->nm_timeouts
< 8)
2308 /* if it's been a few seconds, try poking the socket */
2309 if ((nmp
->nm_sotype
== SOCK_STREAM
) &&
2310 ((now
.tv_sec
- req
->r_start
) >= 3) &&
2311 !(nmp
->nm_sockflags
& NMSOCK_POKE
)) {
2312 nmp
->nm_sockflags
|= NMSOCK_POKE
;
2313 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue
, nmp
, nm_pokeq
);
2315 lck_mtx_unlock(&nmp
->nm_lock
);
2318 /* For soft mounts (& SETUPs), check for too many retransmits/timeout. */
2319 if (((nmp
->nm_flag
& NFSMNT_SOFT
) || (req
->r_flags
& R_SETUP
)) &&
2320 ((req
->r_rexmit
>= req
->r_retry
) || /* too many */
2321 ((now
.tv_sec
- req
->r_start
)*NFS_HZ
> maxtime
))) { /* too long */
2322 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpctimeouts
);
2323 lck_mtx_lock(&nmp
->nm_lock
);
2324 if (!(nmp
->nm_state
& NFSSTA_TIMEO
)) {
2325 lck_mtx_unlock(&nmp
->nm_lock
);
2326 /* make sure we note the unresponsive server */
2327 /* (maxtime may be less than tprintf delay) */
2328 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_TIMEO
,
2330 req
->r_lastmsg
= now
.tv_sec
;
2331 req
->r_flags
|= R_TPRINTFMSG
;
2333 lck_mtx_unlock(&nmp
->nm_lock
);
2335 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
2336 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
,
2337 now
.tv_sec
- req
->r_start
));
2339 finish_asyncio
= ((req
->r_callback
.rcb_func
!= NULL
) && !(req
->r_flags
& R_WAITSENT
));
2341 lck_mtx_unlock(&req
->r_mtx
);
2343 nfs_asyncio_finish(req
);
2347 /* for TCP, only resend if explicitly requested */
2348 if ((nmp
->nm_sotype
== SOCK_STREAM
) && !(req
->r_flags
& R_MUSTRESEND
)) {
2349 if (++req
->r_rexmit
> NFS_MAXREXMIT
)
2350 req
->r_rexmit
= NFS_MAXREXMIT
;
2352 lck_mtx_unlock(&req
->r_mtx
);
2357 * The request needs to be (re)sent. Kick the requester to resend it.
2358 * (unless it's already marked as needing a resend)
2360 if ((req
->r_flags
& R_MUSTRESEND
) && (req
->r_rtt
== -1)) {
2361 lck_mtx_unlock(&req
->r_mtx
);
2364 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
2365 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
2366 req
->r_flags
|= R_MUSTRESEND
;
2369 if ((req
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
)) == R_ASYNC
)
2370 nfs_asyncio_resend(req
);
2371 lck_mtx_unlock(&req
->r_mtx
);
2374 lck_mtx_unlock(nfs_request_mutex
);
2376 /* poke any sockets */
2377 while ((nmp
= TAILQ_FIRST(&nfs_mount_poke_queue
))) {
2378 TAILQ_REMOVE(&nfs_mount_poke_queue
, nmp
, nm_pokeq
);
2380 lck_mtx_lock(&nmp
->nm_lock
);
2381 nmp
->nm_sockflags
&= ~NMSOCK_POKE
;
2382 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
))
2383 wakeup(&nmp
->nm_sockflags
);
2384 lck_mtx_unlock(&nmp
->nm_lock
);
2387 nfs_interval_timer_start(nfs_request_timer_call
, NFS_REQUESTDELAY
);
2391 * check a thread's proc for the "noremotehang" flag.
2394 nfs_noremotehang(thread_t thd
)
2396 proc_t p
= thd
? get_bsdthreadtask_info(thd
) : NULL
;
2397 return (p
&& proc_noremotehang(p
));
2401 * Test for a termination condition pending on the process.
2402 * This is used to determine if we need to bail on a mount.
2403 * ETIMEDOUT is returned if there has been a soft timeout.
2404 * EINTR is returned if there is a signal pending that is not being ignored
2405 * and the mount is interruptable, or if we are a thread that is in the process
2406 * of cancellation (also SIGKILL posted).
2409 nfs_sigintr(struct nfsmount
*nmp
, struct nfsreq
*req
, thread_t thd
, int nmplocked
)
2416 if (req
&& (req
->r_flags
& R_SOFTTERM
))
2417 return (ETIMEDOUT
); /* request has been terminated. */
2420 * If we're in the progress of a force unmount and there's
2421 * been a timeout, we're dead and fail IO.
2424 lck_mtx_lock(&nmp
->nm_lock
);
2425 if ((nmp
->nm_state
& NFSSTA_FORCE
) &&
2426 (nmp
->nm_state
& (NFSSTA_TIMEO
|NFSSTA_JUKEBOXTIMEO
|NFSSTA_LOCKTIMEO
))) {
2428 } else if (nmp
->nm_mountp
->mnt_kern_flag
& MNTK_FRCUNMOUNT
) {
2429 /* Someone is unmounting us, go soft and mark it. */
2430 nmp
->nm_flag
|= NFSMNT_SOFT
;
2431 nmp
->nm_state
|= NFSSTA_FORCE
;
2435 * If the mount is hung and we've requested not to hang
2436 * on remote filesystems, then bail now.
2438 if (!error
&& (nmp
->nm_state
& NFSSTA_TIMEO
) && nfs_noremotehang(thd
))
2442 lck_mtx_unlock(&nmp
->nm_lock
);
2446 /* may not have a thread for async I/O */
2450 /* If this thread belongs to kernel task; then abort check is not needed */
2451 if ((current_proc() != kernproc
) && current_thread_aborted())
2454 /* mask off thread and process blocked signals. */
2455 if ((nmp
->nm_flag
& NFSMNT_INT
) &&
2456 proc_pendingsignals(get_bsdthreadtask_info(thd
), NFSINT_SIGMASK
))
2462 * Lock a socket against others.
2463 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2464 * and also to avoid race conditions between the processes with nfs requests
2465 * in progress when a reconnect is necessary.
2468 nfs_sndlock(struct nfsreq
*req
)
2470 struct nfsmount
*nmp
= req
->r_nmp
;
2472 int error
= 0, slpflag
= 0;
2473 struct timespec ts
= { 0, 0 };
2478 lck_mtx_lock(&nmp
->nm_lock
);
2479 statep
= &nmp
->nm_state
;
2481 if ((nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
)
2483 while (!error
&& (*statep
& NFSSTA_SNDLOCK
)) {
2484 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 1)))
2486 *statep
|= NFSSTA_WANTSND
;
2487 if (nfs_noremotehang(req
->r_thread
))
2489 error
= msleep(statep
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfsndlck", &ts
);
2490 if (error
== EWOULDBLOCK
)
2492 if (slpflag
== PCATCH
) {
2498 *statep
|= NFSSTA_SNDLOCK
;
2499 lck_mtx_unlock(&nmp
->nm_lock
);
2504 * Unlock the stream socket for others.
2507 nfs_sndunlock(struct nfsreq
*req
)
2509 struct nfsmount
*nmp
= req
->r_nmp
;
2510 int *statep
, wake
= 0;
2514 lck_mtx_lock(&nmp
->nm_lock
);
2515 statep
= &nmp
->nm_state
;
2516 if ((*statep
& NFSSTA_SNDLOCK
) == 0)
2517 panic("nfs sndunlock");
2518 *statep
&= ~NFSSTA_SNDLOCK
;
2519 if (*statep
& NFSSTA_WANTSND
) {
2520 *statep
&= ~NFSSTA_WANTSND
;
2523 lck_mtx_unlock(&nmp
->nm_lock
);
2528 #endif /* NFSCLIENT */
2533 * Generate the rpc reply header
2534 * siz arg. is used to decide if adding a cluster is worthwhile
2538 struct nfsrv_descript
*nd
,
2539 __unused
struct nfsrv_sock
*slp
,
2540 struct nfsm_chain
*nmrepp
,
2545 struct nfsm_chain nmrep
;
2548 err
= nd
->nd_repstat
;
2549 if (err
&& (nd
->nd_vers
== NFS_VER2
))
2553 * If this is a big reply, use a cluster else
2554 * try and leave leading space for the lower level headers.
2556 siz
+= RPC_REPLYSIZ
;
2557 if (siz
>= nfs_mbuf_minclsize
) {
2558 error
= mbuf_getpacket(MBUF_WAITOK
, &mrep
);
2560 error
= mbuf_gethdr(MBUF_WAITOK
, MBUF_TYPE_DATA
, &mrep
);
2563 /* unable to allocate packet */
2564 /* XXX should we keep statistics for these errors? */
2567 if (siz
< nfs_mbuf_minclsize
) {
2568 /* leave space for lower level headers */
2569 tl
= mbuf_data(mrep
);
2570 tl
+= 80/sizeof(*tl
); /* XXX max_hdr? XXX */
2571 mbuf_setdata(mrep
, tl
, 6 * NFSX_UNSIGNED
);
2573 nfsm_chain_init(&nmrep
, mrep
);
2574 nfsm_chain_add_32(error
, &nmrep
, nd
->nd_retxid
);
2575 nfsm_chain_add_32(error
, &nmrep
, RPC_REPLY
);
2576 if (err
== ERPCMISMATCH
|| (err
& NFSERR_AUTHERR
)) {
2577 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGDENIED
);
2578 if (err
& NFSERR_AUTHERR
) {
2579 nfsm_chain_add_32(error
, &nmrep
, RPC_AUTHERR
);
2580 nfsm_chain_add_32(error
, &nmrep
, (err
& ~NFSERR_AUTHERR
));
2582 nfsm_chain_add_32(error
, &nmrep
, RPC_MISMATCH
);
2583 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
2584 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
2588 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGACCEPTED
);
2589 if (nd
->nd_gss_context
!= NULL
) {
2590 /* RPCSEC_GSS verifier */
2591 error
= nfs_gss_svc_verf_put(nd
, &nmrep
);
2593 nfsm_chain_add_32(error
, &nmrep
, RPC_SYSTEM_ERR
);
2597 /* RPCAUTH_NULL verifier */
2598 nfsm_chain_add_32(error
, &nmrep
, RPCAUTH_NULL
);
2599 nfsm_chain_add_32(error
, &nmrep
, 0);
2601 /* accepted status */
2604 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGUNAVAIL
);
2607 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGMISMATCH
);
2608 /* XXX hard coded versions? */
2609 nfsm_chain_add_32(error
, &nmrep
, NFS_VER2
);
2610 nfsm_chain_add_32(error
, &nmrep
, NFS_VER3
);
2613 nfsm_chain_add_32(error
, &nmrep
, RPC_PROCUNAVAIL
);
2616 nfsm_chain_add_32(error
, &nmrep
, RPC_GARBAGE
);
2619 nfsm_chain_add_32(error
, &nmrep
, RPC_SUCCESS
);
2620 if (nd
->nd_gss_context
!= NULL
)
2621 error
= nfs_gss_svc_prepare_reply(nd
, &nmrep
);
2622 if (err
!= NFSERR_RETVOID
)
2623 nfsm_chain_add_32(error
, &nmrep
,
2624 (err
? nfsrv_errmap(nd
, err
) : 0));
2630 nfsm_chain_build_done(error
, &nmrep
);
2632 /* error composing reply header */
2633 /* XXX should we keep statistics for these errors? */
2639 if ((err
!= 0) && (err
!= NFSERR_RETVOID
))
2640 OSAddAtomic(1, (SInt32
*)&nfsstats
.srvrpc_errs
);
2645 * The nfs server send routine.
2647 * - return EINTR or ERESTART if interrupted by a signal
2648 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
2649 * - do any cleanup required by recoverable socket errors (???)
2652 nfsrv_send(struct nfsrv_sock
*slp
, mbuf_t nam
, mbuf_t top
)
2655 socket_t so
= slp
->ns_so
;
2656 struct sockaddr
*sendnam
;
2659 bzero(&msg
, sizeof(msg
));
2660 if (nam
&& !sock_isconnected(so
) && (slp
->ns_sotype
!= SOCK_STREAM
)) {
2661 if ((sendnam
= mbuf_data(nam
))) {
2662 msg
.msg_name
= (caddr_t
)sendnam
;
2663 msg
.msg_namelen
= sendnam
->sa_len
;
2666 error
= sock_sendmbuf(so
, &msg
, top
, 0, NULL
);
2669 log(LOG_INFO
, "nfsd send error %d\n", error
);
2671 if ((error
== EWOULDBLOCK
) && (slp
->ns_sotype
== SOCK_STREAM
))
2672 error
= EPIPE
; /* zap TCP sockets if they time out on send */
2674 /* Handle any recoverable (soft) socket errors here. (???) */
2675 if (error
!= EINTR
&& error
!= ERESTART
&& error
!= EIO
&&
2676 error
!= EWOULDBLOCK
&& error
!= EPIPE
)
2683 * Socket upcall routine for the nfsd sockets.
2684 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
2685 * Essentially do as much as possible non-blocking, else punt and it will
2686 * be called with MBUF_WAITOK from an nfsd.
2689 nfsrv_rcv(socket_t so
, caddr_t arg
, int waitflag
)
2691 struct nfsrv_sock
*slp
= (struct nfsrv_sock
*)arg
;
2693 if (!nfsd_thread_count
|| !(slp
->ns_flag
& SLP_VALID
))
2696 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
2697 nfsrv_rcv_locked(so
, slp
, waitflag
);
2698 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
2701 nfsrv_rcv_locked(socket_t so
, struct nfsrv_sock
*slp
, int waitflag
)
2703 mbuf_t m
, mp
, mhck
, m2
;
2704 int ns_flag
=0, error
;
2708 if ((slp
->ns_flag
& SLP_VALID
) == 0) {
2709 if (waitflag
== MBUF_DONTWAIT
)
2710 lck_rw_done(&slp
->ns_rwlock
);
2716 * Define this to test for nfsds handling this under heavy load.
2718 if (waitflag
== MBUF_DONTWAIT
) {
2719 ns_flag
= SLP_NEEDQ
;
2723 if (slp
->ns_sotype
== SOCK_STREAM
) {
2725 * If there are already records on the queue, defer soreceive()
2726 * to an nfsd so that there is feedback to the TCP layer that
2727 * the nfs servers are heavily loaded.
2729 if (slp
->ns_rec
&& waitflag
== MBUF_DONTWAIT
) {
2730 ns_flag
= SLP_NEEDQ
;
2737 bytes_read
= 1000000000;
2738 error
= sock_receivembuf(so
, NULL
, &mp
, MSG_DONTWAIT
, &bytes_read
);
2739 if (error
|| mp
== NULL
) {
2740 if (error
== EWOULDBLOCK
)
2741 ns_flag
= (waitflag
== MBUF_DONTWAIT
) ? SLP_NEEDQ
: 0;
2743 ns_flag
= SLP_DISCONN
;
2747 if (slp
->ns_rawend
) {
2748 if ((error
= mbuf_setnext(slp
->ns_rawend
, m
)))
2749 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error
);
2750 slp
->ns_cc
+= bytes_read
;
2753 slp
->ns_cc
= bytes_read
;
2755 while ((m2
= mbuf_next(m
)))
2760 * Now try and parse record(s) out of the raw stream data.
2762 error
= nfsrv_getstream(slp
, waitflag
);
2765 ns_flag
= SLP_DISCONN
;
2767 ns_flag
= SLP_NEEDQ
;
2770 struct sockaddr_storage nam
;
2772 if (slp
->ns_reccnt
>= nfsrv_sock_max_rec_queue_length
) {
2773 /* already have max # RPC records queued on this socket */
2774 ns_flag
= SLP_NEEDQ
;
2778 bzero(&msg
, sizeof(msg
));
2779 msg
.msg_name
= (caddr_t
)&nam
;
2780 msg
.msg_namelen
= sizeof(nam
);
2783 bytes_read
= 1000000000;
2784 error
= sock_receivembuf(so
, &msg
, &mp
, MSG_DONTWAIT
| MSG_NEEDSA
, &bytes_read
);
2786 if (msg
.msg_name
&& (mbuf_get(MBUF_WAITOK
, MBUF_TYPE_SONAME
, &mhck
) == 0)) {
2787 mbuf_setlen(mhck
, nam
.ss_len
);
2788 bcopy(&nam
, mbuf_data(mhck
), nam
.ss_len
);
2790 if (mbuf_setnext(m
, mp
)) {
2791 /* trouble... just drop it */
2792 printf("nfsrv_rcv: mbuf_setnext failed\n");
2800 mbuf_setnextpkt(slp
->ns_recend
, m
);
2803 slp
->ns_flag
|= SLP_DOREC
;
2806 mbuf_setnextpkt(m
, NULL
);
2813 * Now try and process the request records, non-blocking.
2817 slp
->ns_flag
|= ns_flag
;
2818 if (waitflag
== MBUF_DONTWAIT
) {
2819 int wake
= (slp
->ns_flag
& SLP_WORKTODO
);
2820 lck_rw_done(&slp
->ns_rwlock
);
2821 if (wake
&& nfsd_thread_count
) {
2822 lck_mtx_lock(nfsd_mutex
);
2823 nfsrv_wakenfsd(slp
);
2824 lck_mtx_unlock(nfsd_mutex
);
2830 * Try and extract an RPC request from the mbuf data list received on a
2831 * stream socket. The "waitflag" argument indicates whether or not it
2835 nfsrv_getstream(struct nfsrv_sock
*slp
, int waitflag
)
2838 char *cp1
, *cp2
, *mdata
;
2839 int len
, mlen
, error
;
2840 mbuf_t om
, m2
, recm
;
2843 if (slp
->ns_flag
& SLP_GETSTREAM
)
2844 panic("nfs getstream");
2845 slp
->ns_flag
|= SLP_GETSTREAM
;
2847 if (slp
->ns_reclen
== 0) {
2848 if (slp
->ns_cc
< NFSX_UNSIGNED
) {
2849 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2853 mdata
= mbuf_data(m
);
2855 if (mlen
>= NFSX_UNSIGNED
) {
2856 bcopy(mdata
, (caddr_t
)&recmark
, NFSX_UNSIGNED
);
2857 mdata
+= NFSX_UNSIGNED
;
2858 mlen
-= NFSX_UNSIGNED
;
2859 mbuf_setdata(m
, mdata
, mlen
);
2861 cp1
= (caddr_t
)&recmark
;
2863 while (cp1
< ((caddr_t
)&recmark
) + NFSX_UNSIGNED
) {
2871 mbuf_setdata(m
, cp2
, mlen
);
2874 slp
->ns_cc
-= NFSX_UNSIGNED
;
2875 recmark
= ntohl(recmark
);
2876 slp
->ns_reclen
= recmark
& ~0x80000000;
2877 if (recmark
& 0x80000000)
2878 slp
->ns_flag
|= SLP_LASTFRAG
;
2880 slp
->ns_flag
&= ~SLP_LASTFRAG
;
2881 if (slp
->ns_reclen
< NFS_MINPACKET
|| slp
->ns_reclen
> NFS_MAXPACKET
) {
2882 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2888 * Now get the record part.
2890 * Note that slp->ns_reclen may be 0. Linux sometimes
2891 * generates 0-length RPCs
2894 if (slp
->ns_cc
== slp
->ns_reclen
) {
2896 slp
->ns_raw
= slp
->ns_rawend
= NULL
;
2897 slp
->ns_cc
= slp
->ns_reclen
= 0;
2898 } else if (slp
->ns_cc
> slp
->ns_reclen
) {
2902 mdata
= mbuf_data(m
);
2904 while (len
< slp
->ns_reclen
) {
2905 if ((len
+ mlen
) > slp
->ns_reclen
) {
2906 if (mbuf_copym(m
, 0, slp
->ns_reclen
- len
, waitflag
, &m2
)) {
2907 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2908 return (EWOULDBLOCK
);
2911 if (mbuf_setnext(om
, m2
)) {
2912 /* trouble... just drop it */
2913 printf("nfsrv_getstream: mbuf_setnext failed\n");
2915 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2916 return (EWOULDBLOCK
);
2922 mdata
+= slp
->ns_reclen
- len
;
2923 mlen
-= slp
->ns_reclen
- len
;
2924 mbuf_setdata(m
, mdata
, mlen
);
2925 len
= slp
->ns_reclen
;
2926 } else if ((len
+ mlen
) == slp
->ns_reclen
) {
2931 if (mbuf_setnext(om
, NULL
)) {
2932 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
2933 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2934 return (EWOULDBLOCK
);
2937 mdata
= mbuf_data(m
);
2943 mdata
= mbuf_data(m
);
2950 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2955 * Accumulate the fragments into a record.
2957 if (slp
->ns_frag
== NULL
) {
2958 slp
->ns_frag
= recm
;
2961 while ((m2
= mbuf_next(m
)))
2963 if ((error
= mbuf_setnext(m
, recm
)))
2964 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error
);
2966 if (slp
->ns_flag
& SLP_LASTFRAG
) {
2968 mbuf_setnextpkt(slp
->ns_recend
, slp
->ns_frag
);
2970 slp
->ns_rec
= slp
->ns_frag
;
2971 slp
->ns_flag
|= SLP_DOREC
;
2973 slp
->ns_recend
= slp
->ns_frag
;
2974 slp
->ns_frag
= NULL
;
2980 * Parse an RPC header.
2984 struct nfsrv_sock
*slp
,
2986 struct nfsrv_descript
**ndp
)
2990 struct nfsrv_descript
*nd
;
2994 if (!(slp
->ns_flag
& (SLP_VALID
|SLP_DOREC
)) || (slp
->ns_rec
== NULL
))
2996 MALLOC_ZONE(nd
, struct nfsrv_descript
*,
2997 sizeof (struct nfsrv_descript
), M_NFSRVDESC
, M_WAITOK
);
3001 slp
->ns_rec
= mbuf_nextpkt(m
);
3003 mbuf_setnextpkt(m
, NULL
);
3005 slp
->ns_flag
&= ~SLP_DOREC
;
3006 slp
->ns_recend
= NULL
;
3009 if (mbuf_type(m
) == MBUF_TYPE_SONAME
) {
3012 if ((error
= mbuf_setnext(nam
, NULL
)))
3013 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error
);
3017 nfsm_chain_dissect_init(error
, &nd
->nd_nmreq
, m
);
3019 error
= nfsrv_getreq(nd
);
3023 FREE_ZONE(nd
, sizeof(*nd
), M_NFSRVDESC
);
3033 * Parse an RPC request
3035 * - fill in the cred struct.
3038 nfsrv_getreq(struct nfsrv_descript
*nd
)
3040 struct nfsm_chain
*nmreq
;
3042 u_long nfsvers
, auth_type
;
3047 struct ucred temp_cred
;
3051 nd
->nd_gss_context
= NULL
;
3052 nd
->nd_gss_seqnum
= 0;
3053 nd
->nd_gss_mb
= NULL
;
3055 user_id
= group_id
= -2;
3056 val
= auth_type
= len
= 0;
3058 nmreq
= &nd
->nd_nmreq
;
3059 nfsm_chain_get_32(error
, nmreq
, nd
->nd_retxid
); // XID
3060 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Call
3061 if (!error
&& (val
!= RPC_CALL
))
3065 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Version
3067 if (val
!= RPC_VER2
) {
3068 nd
->nd_repstat
= ERPCMISMATCH
;
3069 nd
->nd_procnum
= NFSPROC_NOOP
;
3072 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Program Number
3074 if (val
!= NFS_PROG
) {
3075 nd
->nd_repstat
= EPROGUNAVAIL
;
3076 nd
->nd_procnum
= NFSPROC_NOOP
;
3079 nfsm_chain_get_32(error
, nmreq
, nfsvers
);// NFS Version Number
3081 if ((nfsvers
< NFS_VER2
) || (nfsvers
> NFS_VER3
)) {
3082 nd
->nd_repstat
= EPROGMISMATCH
;
3083 nd
->nd_procnum
= NFSPROC_NOOP
;
3086 nd
->nd_vers
= nfsvers
;
3087 nfsm_chain_get_32(error
, nmreq
, nd
->nd_procnum
);// NFS Procedure Number
3089 if ((nd
->nd_procnum
>= NFS_NPROCS
) ||
3090 ((nd
->nd_vers
== NFS_VER2
) && (nd
->nd_procnum
> NFSV2PROC_STATFS
))) {
3091 nd
->nd_repstat
= EPROCUNAVAIL
;
3092 nd
->nd_procnum
= NFSPROC_NOOP
;
3095 if (nfsvers
!= NFS_VER3
)
3096 nd
->nd_procnum
= nfsv3_procid
[nd
->nd_procnum
];
3097 nfsm_chain_get_32(error
, nmreq
, auth_type
); // Auth Flavor
3098 nfsm_chain_get_32(error
, nmreq
, len
); // Auth Length
3099 if (!error
&& (len
< 0 || len
> RPCAUTH_MAXSIZ
))
3103 /* Handle authentication */
3104 if (auth_type
== RPCAUTH_UNIX
) {
3105 if (nd
->nd_procnum
== NFSPROC_NULL
)
3107 nd
->nd_sec
= RPCAUTH_UNIX
;
3108 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
); // skip stamp
3109 nfsm_chain_get_32(error
, nmreq
, len
); // hostname length
3110 if (len
< 0 || len
> NFS_MAXNAMLEN
)
3112 nfsm_chain_adv(error
, nmreq
, nfsm_rndup(len
)); // skip hostname
3115 /* create a temporary credential using the bits from the wire */
3116 bzero(&temp_cred
, sizeof(temp_cred
));
3117 nfsm_chain_get_32(error
, nmreq
, user_id
);
3118 nfsm_chain_get_32(error
, nmreq
, group_id
);
3119 temp_cred
.cr_groups
[0] = group_id
;
3120 nfsm_chain_get_32(error
, nmreq
, len
); // extra GID count
3121 if ((len
< 0) || (len
> RPCAUTH_UNIXGIDS
))
3124 for (i
= 1; i
<= len
; i
++)
3126 nfsm_chain_get_32(error
, nmreq
, temp_cred
.cr_groups
[i
]);
3128 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
);
3130 ngroups
= (len
>= NGROUPS
) ? NGROUPS
: (len
+ 1);
3132 nfsrv_group_sort(&temp_cred
.cr_groups
[0], ngroups
);
3133 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
); // verifier flavor (should be AUTH_NONE)
3134 nfsm_chain_get_32(error
, nmreq
, len
); // verifier length
3135 if (len
< 0 || len
> RPCAUTH_MAXSIZ
)
3138 nfsm_chain_adv(error
, nmreq
, nfsm_rndup(len
));
3140 /* request creation of a real credential */
3141 temp_cred
.cr_uid
= user_id
;
3142 temp_cred
.cr_ngroups
= ngroups
;
3143 nd
->nd_cr
= kauth_cred_create(&temp_cred
);
3144 if (nd
->nd_cr
== NULL
) {
3145 nd
->nd_repstat
= ENOMEM
;
3146 nd
->nd_procnum
= NFSPROC_NOOP
;
3149 } else if (auth_type
== RPCSEC_GSS
) {
3150 error
= nfs_gss_svc_cred_get(nd
, nmreq
);
3152 if (error
== EINVAL
)
3153 goto nfsmout
; // drop the request
3154 nd
->nd_repstat
= error
;
3155 nd
->nd_procnum
= NFSPROC_NOOP
;
3159 if (nd
->nd_procnum
== NFSPROC_NULL
) // assume it's AUTH_NONE
3161 nd
->nd_repstat
= (NFSERR_AUTHERR
| AUTH_REJECTCRED
);
3162 nd
->nd_procnum
= NFSPROC_NOOP
;
3167 if (IS_VALID_CRED(nd
->nd_cr
))
3168 kauth_cred_unref(&nd
->nd_cr
);
3169 nfsm_chain_cleanup(nmreq
);
3174 * Search for a sleeping nfsd and wake it up.
3175 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
3176 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
3177 * Note: Must be called with nfsd_mutex held.
3180 nfsrv_wakenfsd(struct nfsrv_sock
*slp
)
3184 if ((slp
->ns_flag
& SLP_VALID
) == 0)
3187 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
3188 /* if there's work to do on this socket, make sure it's queued up */
3189 if ((slp
->ns_flag
& SLP_WORKTODO
) && !(slp
->ns_flag
& SLP_QUEUED
)) {
3190 TAILQ_INSERT_TAIL(&nfsrv_sockwait
, slp
, ns_svcq
);
3191 slp
->ns_flag
|= SLP_WAITQ
;
3193 lck_rw_done(&slp
->ns_rwlock
);
3195 /* wake up a waiting nfsd, if possible */
3196 nd
= TAILQ_FIRST(&nfsd_queue
);
3200 TAILQ_REMOVE(&nfsd_queue
, nd
, nfsd_queue
);
3201 nd
->nfsd_flag
&= ~NFSD_WAITING
;
3205 #endif /* NFSSERVER */
3208 nfs_msg(thread_t thd
,
3213 proc_t p
= thd
? get_bsdthreadtask_info(thd
) : NULL
;
3217 tpr
= tprintf_open(p
);
3221 tprintf(tpr
, "nfs server %s: %s, error %d\n", server
, msg
, error
);
3223 tprintf(tpr
, "nfs server %s: %s\n", server
, msg
);
3229 nfs_down(struct nfsmount
*nmp
, thread_t thd
, int error
, int flags
, const char *msg
)
3236 lck_mtx_lock(&nmp
->nm_lock
);
3237 ostate
= nmp
->nm_state
;
3238 if ((flags
& NFSSTA_TIMEO
) && !(ostate
& NFSSTA_TIMEO
))
3239 nmp
->nm_state
|= NFSSTA_TIMEO
;
3240 if ((flags
& NFSSTA_LOCKTIMEO
) && !(ostate
& NFSSTA_LOCKTIMEO
))
3241 nmp
->nm_state
|= NFSSTA_LOCKTIMEO
;
3242 if ((flags
& NFSSTA_JUKEBOXTIMEO
) && !(ostate
& NFSSTA_JUKEBOXTIMEO
))
3243 nmp
->nm_state
|= NFSSTA_JUKEBOXTIMEO
;
3244 lck_mtx_unlock(&nmp
->nm_lock
);
3246 if (!(ostate
& (NFSSTA_TIMEO
|NFSSTA_LOCKTIMEO
|NFSSTA_JUKEBOXTIMEO
)))
3247 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, VQ_NOTRESP
, 0);
3249 nfs_msg(thd
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, msg
, error
);
3253 nfs_up(struct nfsmount
*nmp
, thread_t thd
, int flags
, const char *msg
)
3261 nfs_msg(thd
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, msg
, 0);
3263 lck_mtx_lock(&nmp
->nm_lock
);
3264 ostate
= nmp
->nm_state
;
3265 if ((flags
& NFSSTA_TIMEO
) && (ostate
& NFSSTA_TIMEO
))
3266 nmp
->nm_state
&= ~NFSSTA_TIMEO
;
3267 if ((flags
& NFSSTA_LOCKTIMEO
) && (ostate
& NFSSTA_LOCKTIMEO
))
3268 nmp
->nm_state
&= ~NFSSTA_LOCKTIMEO
;
3269 if ((flags
& NFSSTA_JUKEBOXTIMEO
) && (ostate
& NFSSTA_JUKEBOXTIMEO
))
3270 nmp
->nm_state
&= ~NFSSTA_JUKEBOXTIMEO
;
3271 state
= nmp
->nm_state
;
3272 lck_mtx_unlock(&nmp
->nm_lock
);
3274 if ((ostate
& (NFSSTA_TIMEO
|NFSSTA_LOCKTIMEO
|NFSSTA_JUKEBOXTIMEO
)) &&
3275 !(state
& (NFSSTA_TIMEO
|NFSSTA_LOCKTIMEO
|NFSSTA_JUKEBOXTIMEO
)))
3276 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, VQ_NOTRESP
, 1);