2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
69 * Socket operations for use by nfs
72 #include <sys/param.h>
73 #include <sys/systm.h>
75 #include <sys/kauth.h>
76 #include <sys/mount_internal.h>
77 #include <sys/kernel.h>
78 #include <sys/kpi_mbuf.h>
79 #include <sys/malloc.h>
80 #include <sys/vnode.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/socket.h>
84 #include <sys/syslog.h>
85 #include <sys/tprintf.h>
86 #include <sys/uio_internal.h>
87 #include <libkern/OSAtomic.h>
90 #include <kern/clock.h>
91 #include <kern/task.h>
92 #include <kern/thread.h>
93 #include <kern/thread_call.h>
96 #include <netinet/in.h>
97 #include <netinet/tcp.h>
99 #include <nfs/rpcv2.h>
100 #include <nfs/nfsproto.h>
102 #include <nfs/xdr_subs.h>
103 #include <nfs/nfsm_subs.h>
104 #include <nfs/nfs_gss.h>
105 #include <nfs/nfsmount.h>
106 #include <nfs/nfsnode.h>
109 boolean_t
current_thread_aborted(void);
110 kern_return_t
thread_terminate(thread_t
);
114 int nfsrv_sock_max_rec_queue_length
= 128; /* max # RPC records queued on (UDP) socket */
116 static int nfsrv_getstream(struct nfsrv_sock
*,int);
117 static int nfsrv_getreq(struct nfsrv_descript
*);
118 extern int nfsv3_procid
[NFS_NPROCS
];
119 #endif /* NFSSERVER */
123 static int nfs_connect_setup(struct nfsmount
*);
124 static void nfs_reqdequeue(struct nfsreq
*);
125 static void nfs_udp_rcv(socket_t
, void*, int);
126 static void nfs_tcp_rcv(socket_t
, void*, int);
127 static void nfs_request_match_reply(struct nfsmount
*, mbuf_t
);
128 static void nfs_softterm(struct nfsreq
*);
130 #ifdef NFS_SOCKET_DEBUGGING
131 #define NFS_SOCK_DBG(X) printf X
133 #define NFS_SOCK_DBG(X)
137 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
138 * Use the mean and mean deviation of rtt for the appropriate type of rpc
139 * for the frequent rpcs and a default for the others.
140 * The justification for doing "other" this way is that these rpcs
141 * happen so infrequently that timer est. would probably be stale.
142 * Also, since many of these rpcs are
143 * non-idempotent, a conservative timeout is desired.
144 * getattr, lookup - A+2D
148 #define NFS_RTO(n, t) \
149 ((t) == 0 ? (n)->nm_timeo : \
151 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
152 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
153 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
154 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
157 * Defines which timer to use for the procnum.
164 static int proct
[NFS_NPROCS
] = {
165 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
169 * There is a congestion window for outstanding rpcs maintained per mount
170 * point. The cwnd size is adjusted in roughly the way that:
171 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
172 * SIGCOMM '88". ACM, August 1988.
173 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
174 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
175 * of rpcs is in progress.
176 * (The sent count and cwnd are scaled for integer arith.)
177 * Variants of "slow start" were tried and were found to be too much of a
178 * performance hit (ave. rtt 3 times larger),
179 * I suspect due to the large rtt that nfs rpcs have.
181 #define NFS_CWNDSCALE 256
182 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
183 static int nfs_backoff
[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
186 * Initialize socket state and perform setup for a new NFS connection.
189 nfs_connect(struct nfsmount
*nmp
)
192 int error
, on
= 1, proto
;
194 struct sockaddr
*saddr
;
195 struct sockaddr_in sin
;
196 struct timeval timeo
;
199 lck_mtx_lock(&nmp
->nm_lock
);
200 nmp
->nm_sockflags
|= NMSOCK_CONNECTING
;
201 saddr
= mbuf_data(nmp
->nm_nam
);
202 upcall
= (nmp
->nm_sotype
== SOCK_STREAM
) ? nfs_tcp_rcv
: nfs_udp_rcv
;
203 lck_mtx_unlock(&nmp
->nm_lock
);
204 error
= sock_socket(saddr
->sa_family
, nmp
->nm_sotype
,
205 nmp
->nm_soproto
, upcall
, nmp
, &nmp
->nm_so
);
208 lck_mtx_lock(&nmp
->nm_lock
);
212 * Some servers require that the client port be a reserved port number.
214 if (saddr
->sa_family
== AF_INET
&& (nmp
->nm_flag
& NFSMNT_RESVPORT
)) {
215 lck_mtx_unlock(&nmp
->nm_lock
);
216 sin
.sin_len
= sizeof (struct sockaddr_in
);
217 sin
.sin_family
= AF_INET
;
218 sin
.sin_addr
.s_addr
= INADDR_ANY
;
219 tport
= IPPORT_RESERVED
- 1;
220 sin
.sin_port
= htons(tport
);
221 while (((error
= sock_bind(so
, (struct sockaddr
*) &sin
)) == EADDRINUSE
) &&
222 (--tport
> IPPORT_RESERVED
/ 2))
223 sin
.sin_port
= htons(tport
);
226 lck_mtx_lock(&nmp
->nm_lock
);
230 * Protocols that do not require connections may be optionally left
231 * unconnected for servers that reply from a different address/port.
233 if (nmp
->nm_flag
& NFSMNT_NOCONN
) {
234 if (nmp
->nm_sotype
== SOCK_STREAM
) {
236 lck_mtx_unlock(&nmp
->nm_lock
);
240 int tocnt
= 0, optlen
= sizeof(error
);
241 struct timespec ts
= { 2, 0 };
243 lck_mtx_unlock(&nmp
->nm_lock
);
244 error
= sock_connect(so
, mbuf_data(nmp
->nm_nam
), MSG_DONTWAIT
);
245 if (error
&& (error
!= EINPROGRESS
))
247 lck_mtx_lock(&nmp
->nm_lock
);
248 while (!sock_isconnected(so
)) {
249 if (tocnt
++ == 15) /* log a warning if connect is taking a while */
250 log(LOG_INFO
, "nfs_connect: socket connect taking a while for %s\n",
251 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
252 /* check for error on socket */
253 sock_getsockopt(so
, SOL_SOCKET
, SO_ERROR
, &error
, &optlen
);
255 log(LOG_INFO
, "nfs_connect: socket error %d for %s\n",
256 error
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
260 /* abort if this is taking too long */
264 if ((error
= nfs_sigintr(nmp
, NULL
, current_thread(), 1)))
266 msleep(&nmp
->nm_so
, &nmp
->nm_lock
, PSOCK
, "nfs_socket_connect", &ts
);
269 log(LOG_INFO
, "nfs_connect: socket connect %s for %s\n",
270 error
? "aborted" : "completed",
271 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
273 lck_mtx_unlock(&nmp
->nm_lock
);
279 * Set socket send/receive timeouts
280 * - Receive timeout shouldn't matter because all receives are performed
281 * in the socket upcall non-blocking.
282 * - Send timeout should allow us to react to a blocked socket.
283 * Soft mounts will want to abort sooner.
286 timeo
.tv_sec
= (nmp
->nm_flag
& NFSMNT_SOFT
) ? 10 : 60;
287 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
288 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
290 log(LOG_INFO
, "nfs_connect: socket timeout setting errors for %s\n",
291 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
295 if (nmp
->nm_sotype
== SOCK_STREAM
) {
296 /* Assume that SOCK_STREAM always requires a connection */
297 sock_setsockopt(so
, SOL_SOCKET
, SO_KEEPALIVE
, &on
, sizeof(on
));
298 /* set nodelay for TCP */
299 sock_gettype(so
, NULL
, NULL
, &proto
);
300 if (proto
== IPPROTO_TCP
)
301 sock_setsockopt(so
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
304 if (nmp
->nm_sotype
== SOCK_DGRAM
) { /* set socket buffer sizes for UDP */
305 int reserve
= NFS_UDPSOCKBUF
;
306 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDBUF
, &reserve
, sizeof(reserve
));
307 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVBUF
, &reserve
, sizeof(reserve
));
309 log(LOG_INFO
, "nfs_connect: socket buffer setting errors for %s\n",
310 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
315 /* set SO_NOADDRERR to detect network changes ASAP */
316 error
= sock_setsockopt(so
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
));
318 lck_mtx_unlock(&nmp
->nm_lock
);
322 if (!(nmp
->nm_flag
& NFSMNT_INT
))
323 sock_nointerrupt(so
, 1);
325 /* Initialize socket state variables */
326 nmp
->nm_srtt
[0] = nmp
->nm_srtt
[1] = nmp
->nm_srtt
[2] =
327 nmp
->nm_srtt
[3] = (NFS_TIMEO
<< 3);
328 nmp
->nm_sdrtt
[0] = nmp
->nm_sdrtt
[1] = nmp
->nm_sdrtt
[2] =
329 nmp
->nm_sdrtt
[3] = 0;
330 if (nmp
->nm_sotype
== SOCK_DGRAM
) {
331 /* XXX do we really want to reset this on each reconnect? */
332 nmp
->nm_cwnd
= NFS_MAXCWND
/ 2; /* Initial send window */
334 } else if (nmp
->nm_sotype
== SOCK_STREAM
) {
335 nmp
->nm_markerleft
= sizeof(nmp
->nm_fragleft
);
336 nmp
->nm_fragleft
= nmp
->nm_reclen
= 0;
337 nmp
->nm_timeouts
= 0;
339 nmp
->nm_sockflags
&= ~NMSOCK_CONNECTING
;
340 nmp
->nm_sockflags
|= NMSOCK_SETUP
;
341 FSDBG(529, nmp
, nmp
->nm_state
, nmp
->nm_flag
, nmp
->nm_cwnd
);
342 lck_mtx_unlock(&nmp
->nm_lock
);
343 error
= nfs_connect_setup(nmp
);
345 lck_mtx_lock(&nmp
->nm_lock
);
346 nmp
->nm_sockflags
&= ~(NMSOCK_CONNECTING
|NMSOCK_SETUP
);
348 nmp
->nm_sockflags
|= NMSOCK_READY
;
349 wakeup(&nmp
->nm_sockflags
);
351 lck_mtx_unlock(&nmp
->nm_lock
);
357 /* setup & confirm socket connection is functional */
359 nfs_connect_setup(struct nfsmount
*nmp
)
361 struct nfsm_chain nmreq
, nmrep
;
362 int error
= 0, status
;
365 if (nmp
->nm_vers
>= NFS_VER4
) {
366 error
= nfs4_setclientid(nmp
);
368 /* verify connection's OK by sending a NULL request */
369 nfsm_chain_null(&nmreq
);
370 nfsm_chain_null(&nmrep
);
371 nfsm_chain_build_alloc_init(error
, &nmreq
, 0);
372 nfsm_chain_build_done(error
, &nmreq
);
374 error
= nfs_request2(NULL
, nmp
->nm_mountp
, &nmreq
, NFSPROC_NULL
,
375 current_thread(), NULL
, R_SETUP
, &nmrep
, &xid
, &status
);
379 nfsm_chain_cleanup(&nmreq
);
380 nfsm_chain_cleanup(&nmrep
);
386 * NFS socket reconnect routine:
387 * Called when a connection is broken.
388 * - disconnect the old socket
389 * - nfs_connect() again
390 * - set R_MUSTRESEND for all outstanding requests on mount point
391 * If this fails the mount point is DEAD!
394 nfs_reconnect(struct nfsmount
*nmp
)
398 thread_t thd
= current_thread();
399 int error
, lastmsg
, wentdown
= 0;
402 lastmsg
= now
.tv_sec
- (nmp
->nm_tprintf_delay
- nmp
->nm_tprintf_initial_delay
);
406 while ((error
= nfs_connect(nmp
))) {
407 if (error
== EINTR
|| error
== ERESTART
)
412 if ((lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
) {
413 lastmsg
= now
.tv_sec
;
414 nfs_down(nmp
, thd
, error
, NFSSTA_TIMEO
, "can not connect");
417 lck_mtx_lock(&nmp
->nm_lock
);
418 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
419 /* we're not yet completely mounted and */
420 /* we can't reconnect, so we fail */
421 lck_mtx_unlock(&nmp
->nm_lock
);
424 if ((error
= nfs_sigintr(nmp
, NULL
, thd
, 1))) {
425 lck_mtx_unlock(&nmp
->nm_lock
);
428 lck_mtx_unlock(&nmp
->nm_lock
);
429 tsleep(&lbolt
, PSOCK
, "nfs_reconnect_delay", 0);
430 if ((error
= nfs_sigintr(nmp
, NULL
, thd
, 0)))
435 nfs_up(nmp
, thd
, NFSSTA_TIMEO
, "connected");
438 * Loop through outstanding request list and mark all requests
439 * as needing a resend. (Though nfs_need_reconnect() probably
440 * marked them all already.)
442 lck_mtx_lock(nfs_request_mutex
);
443 TAILQ_FOREACH(rq
, &nfs_reqq
, r_chain
) {
444 if (rq
->r_nmp
== nmp
) {
445 lck_mtx_lock(&rq
->r_mtx
);
446 if (!rq
->r_error
&& !rq
->r_nmrep
.nmc_mhead
&& !(rq
->r_flags
& R_MUSTRESEND
)) {
447 rq
->r_flags
|= R_MUSTRESEND
;
450 if ((rq
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
)) == R_ASYNC
)
451 nfs_asyncio_resend(rq
);
453 lck_mtx_unlock(&rq
->r_mtx
);
456 lck_mtx_unlock(nfs_request_mutex
);
461 * NFS disconnect. Clean up and unlink.
464 nfs_disconnect(struct nfsmount
*nmp
)
468 lck_mtx_lock(&nmp
->nm_lock
);
469 if ((nmp
->nm_sotype
== SOCK_STREAM
) && nmp
->nm_m
) {
470 mbuf_freem(nmp
->nm_m
);
471 nmp
->nm_m
= nmp
->nm_mlast
= NULL
;
476 lck_mtx_unlock(&nmp
->nm_lock
);
477 sock_shutdown(so
, SHUT_RDWR
);
480 lck_mtx_unlock(&nmp
->nm_lock
);
485 * mark an NFS mount as needing a reconnect/resends.
488 nfs_need_reconnect(struct nfsmount
*nmp
)
492 lck_mtx_lock(&nmp
->nm_lock
);
493 nmp
->nm_sockflags
&= ~(NMSOCK_READY
|NMSOCK_SETUP
);
494 lck_mtx_unlock(&nmp
->nm_lock
);
497 * Loop through outstanding request list and
498 * mark all requests as needing a resend.
500 lck_mtx_lock(nfs_request_mutex
);
501 TAILQ_FOREACH(rq
, &nfs_reqq
, r_chain
) {
502 if (rq
->r_nmp
== nmp
) {
503 lck_mtx_lock(&rq
->r_mtx
);
504 if (!rq
->r_error
&& !rq
->r_nmrep
.nmc_mhead
&& !(rq
->r_flags
& R_MUSTRESEND
)) {
505 rq
->r_flags
|= R_MUSTRESEND
;
508 if ((rq
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
)) == R_ASYNC
)
509 nfs_asyncio_resend(rq
);
511 lck_mtx_unlock(&rq
->r_mtx
);
514 lck_mtx_unlock(nfs_request_mutex
);
518 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
521 nfs_mount_sock_thread(void *arg
, __unused wait_result_t wr
)
523 struct nfsmount
*nmp
= arg
;
524 struct timespec ts
= { 30, 0 };
525 thread_t thd
= current_thread();
528 int error
, dofinish
, force
;
530 lck_mtx_lock(&nmp
->nm_lock
);
532 while (!(nmp
->nm_sockflags
& NMSOCK_READY
) || !TAILQ_EMPTY(&nmp
->nm_resendq
)) {
533 if (nmp
->nm_sockflags
& NMSOCK_UNMOUNT
)
535 force
= (nmp
->nm_state
& NFSSTA_FORCE
);
536 /* do reconnect, if necessary */
537 if (!(nmp
->nm_sockflags
& NMSOCK_READY
) && !force
) {
538 if (nmp
->nm_reconnect_start
<= 0) {
540 nmp
->nm_reconnect_start
= now
.tv_sec
;
542 lck_mtx_unlock(&nmp
->nm_lock
);
543 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
));
544 if ((error
= nfs_reconnect(nmp
)))
545 printf("nfs_reconnect failed %d for %s\n", error
,
546 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
548 nmp
->nm_reconnect_start
= 0;
549 lck_mtx_lock(&nmp
->nm_lock
);
551 /* do resends, if necessary/possible */
552 while (((nmp
->nm_sockflags
& NMSOCK_READY
) || force
) && ((req
= TAILQ_FIRST(&nmp
->nm_resendq
)))) {
553 if (req
->r_resendtime
)
555 while (req
&& !force
&& req
->r_resendtime
&& (now
.tv_sec
< req
->r_resendtime
))
556 req
= TAILQ_NEXT(req
, r_rchain
);
559 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
560 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
561 lck_mtx_unlock(&nmp
->nm_lock
);
562 lck_mtx_lock(&req
->r_mtx
);
563 if (req
->r_error
|| req
->r_nmrep
.nmc_mhead
) {
564 dofinish
= req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
565 req
->r_flags
&= ~R_RESENDQ
;
567 lck_mtx_unlock(&req
->r_mtx
);
569 nfs_asyncio_finish(req
);
570 lck_mtx_lock(&nmp
->nm_lock
);
573 if ((req
->r_flags
& R_RESTART
) || req
->r_gss_ctx
) {
574 req
->r_flags
&= ~R_RESTART
;
575 req
->r_resendtime
= 0;
576 lck_mtx_unlock(&req
->r_mtx
);
577 /* async RPCs on GSS mounts need to be rebuilt and resent. */
579 if (req
->r_gss_ctx
) {
580 nfs_gss_clnt_rpcdone(req
);
581 error
= nfs_gss_clnt_args_restore(req
);
582 if (error
== ENEEDAUTH
)
585 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
586 req
->r_gss_ctx
? " gss" : "", req
->r_procnum
, req
->r_xid
,
587 req
->r_flags
, req
->r_rtt
));
588 error
= !req
->r_nmp
? ENXIO
: 0; /* unmounted? */
590 error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0);
592 error
= nfs_request_add_header(req
);
594 error
= nfs_request_send(req
, 0);
595 lck_mtx_lock(&req
->r_mtx
);
596 if (req
->r_rchain
.tqe_next
== NFSREQNOLIST
)
597 req
->r_flags
&= ~R_RESENDQ
;
599 req
->r_error
= error
;
601 dofinish
= error
&& req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
602 lck_mtx_unlock(&req
->r_mtx
);
604 nfs_asyncio_finish(req
);
605 lck_mtx_lock(&nmp
->nm_lock
);
609 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
610 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
611 error
= !req
->r_nmp
? ENXIO
: 0; /* unmounted? */
613 error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0);
615 lck_mtx_unlock(&req
->r_mtx
);
616 error
= nfs_send(req
, 0);
617 lck_mtx_lock(&req
->r_mtx
);
619 if (req
->r_rchain
.tqe_next
== NFSREQNOLIST
)
620 req
->r_flags
&= ~R_RESENDQ
;
622 lck_mtx_unlock(&req
->r_mtx
);
623 lck_mtx_lock(&nmp
->nm_lock
);
627 req
->r_error
= error
;
628 if (req
->r_rchain
.tqe_next
== NFSREQNOLIST
)
629 req
->r_flags
&= ~R_RESENDQ
;
631 dofinish
= req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
632 lck_mtx_unlock(&req
->r_mtx
);
634 nfs_asyncio_finish(req
);
635 lck_mtx_lock(&nmp
->nm_lock
);
637 if (nmp
->nm_sockflags
& NMSOCK_READY
) {
638 ts
.tv_sec
= TAILQ_EMPTY(&nmp
->nm_resendq
) ? 30 : 1;
639 msleep(&nmp
->nm_sockthd
, &nmp
->nm_lock
, PSOCK
, "nfssockthread", &ts
);
644 if (nmp
->nm_sockthd
== thd
)
645 nmp
->nm_sockthd
= NULL
;
646 lck_mtx_unlock(&nmp
->nm_lock
);
647 wakeup(&nmp
->nm_sockthd
);
648 thread_terminate(thd
);
651 /* start or wake a mount's socket thread */
653 nfs_mount_sock_thread_wake(struct nfsmount
*nmp
)
656 wakeup(&nmp
->nm_sockthd
);
657 else if (kernel_thread_start(nfs_mount_sock_thread
, nmp
, &nmp
->nm_sockthd
) == KERN_SUCCESS
)
658 thread_deallocate(nmp
->nm_sockthd
);
662 * The NFS client send routine.
664 * Send the given NFS request out the mount's socket.
665 * Holds nfs_sndlock() for the duration of this call.
667 * - check for request termination (sigintr)
668 * - perform reconnect, if necessary
669 * - UDP: check the congestion window
670 * - make a copy of the request to send
671 * - UDP: update the congestion window
674 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
675 * rexmit count is also updated if this isn't the first send.
677 * If the send is not successful, make sure R_MUSTRESEND is set.
678 * If this wasn't the first transmit, set R_RESENDERR.
679 * Also, undo any UDP congestion window changes made.
681 * If the error appears to indicate that the socket should
682 * be reconnected, mark the socket for reconnection.
684 * Only return errors when the request should be aborted.
687 nfs_send(struct nfsreq
*req
, int wait
)
689 struct nfsmount
*nmp
;
691 int error
, error2
, sotype
, rexmit
, slpflag
= 0, needrecon
;
693 struct sockaddr
*sendnam
;
696 struct timespec ts
= { 2, 0 };
699 error
= nfs_sndlock(req
);
703 error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0);
709 sotype
= nmp
->nm_sotype
;
711 if ((req
->r_flags
& R_SETUP
) && !(nmp
->nm_sockflags
& NMSOCK_SETUP
)) {
712 /* a setup RPC but we're not in SETUP... must need reconnect */
717 /* If the socket needs reconnection, do that now. */
718 /* wait until socket is ready - unless this request is part of setup */
719 lck_mtx_lock(&nmp
->nm_lock
);
720 if (!(nmp
->nm_sockflags
& NMSOCK_READY
) &&
721 !((nmp
->nm_sockflags
& NMSOCK_SETUP
) && (req
->r_flags
& R_SETUP
))) {
722 if (nmp
->nm_flag
& NFSMNT_INT
)
724 lck_mtx_unlock(&nmp
->nm_lock
);
727 lck_mtx_lock(&req
->r_mtx
);
728 req
->r_flags
|= R_MUSTRESEND
;
730 lck_mtx_unlock(&req
->r_mtx
);
733 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req
->r_xid
));
734 lck_mtx_lock(&req
->r_mtx
);
735 req
->r_flags
&= ~R_MUSTRESEND
;
737 lck_mtx_unlock(&req
->r_mtx
);
738 lck_mtx_lock(&nmp
->nm_lock
);
739 while (!(nmp
->nm_sockflags
& NMSOCK_READY
)) {
740 /* don't bother waiting if the socket thread won't be reconnecting it */
741 if (nmp
->nm_state
& NFSSTA_FORCE
) {
745 /* make sure socket thread is running, then wait */
746 nfs_mount_sock_thread_wake(nmp
);
747 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 1)))
749 msleep(req
, &nmp
->nm_lock
, slpflag
|PSOCK
, "nfsconnectwait", &ts
);
752 lck_mtx_unlock(&nmp
->nm_lock
);
758 lck_mtx_unlock(&nmp
->nm_lock
);
761 lck_mtx_lock(&req
->r_mtx
);
762 req
->r_flags
|= R_MUSTRESEND
;
764 lck_mtx_unlock(&req
->r_mtx
);
768 lck_mtx_lock(&req
->r_mtx
);
769 rexmit
= (req
->r_flags
& R_SENT
);
771 if (sotype
== SOCK_DGRAM
) {
772 lck_mtx_lock(&nmp
->nm_lock
);
773 if (!(req
->r_flags
& R_CWND
) && (nmp
->nm_sent
>= nmp
->nm_cwnd
)) {
774 /* if we can't send this out yet, wait on the cwnd queue */
775 slpflag
= ((nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
) ? PCATCH
: 0;
776 lck_mtx_unlock(&nmp
->nm_lock
);
778 req
->r_flags
|= R_MUSTRESEND
;
779 lck_mtx_unlock(&req
->r_mtx
);
784 lck_mtx_lock(&nmp
->nm_lock
);
785 while (nmp
->nm_sent
>= nmp
->nm_cwnd
) {
786 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 1)))
788 TAILQ_INSERT_TAIL(&nmp
->nm_cwndq
, req
, r_cchain
);
789 msleep(req
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfswaitcwnd", &ts
);
791 if ((req
->r_cchain
.tqe_next
!= NFSREQNOLIST
)) {
792 TAILQ_REMOVE(&nmp
->nm_cwndq
, req
, r_cchain
);
793 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
796 lck_mtx_unlock(&nmp
->nm_lock
);
800 * We update these *before* the send to avoid racing
801 * against others who may be looking to send requests.
805 req
->r_flags
|= R_CWND
;
806 nmp
->nm_sent
+= NFS_CWNDSCALE
;
809 * When retransmitting, turn timing off
810 * and divide congestion window by 2.
812 req
->r_flags
&= ~R_TIMING
;
814 if (nmp
->nm_cwnd
< NFS_CWNDSCALE
)
815 nmp
->nm_cwnd
= NFS_CWNDSCALE
;
817 lck_mtx_unlock(&nmp
->nm_lock
);
820 req
->r_flags
&= ~R_MUSTRESEND
;
821 lck_mtx_unlock(&req
->r_mtx
);
823 error
= mbuf_copym(req
->r_mhead
, 0, MBUF_COPYALL
,
824 wait
? MBUF_WAITOK
: MBUF_DONTWAIT
, &mreqcopy
);
827 log(LOG_INFO
, "nfs_send: mbuf copy failed %d\n", error
);
829 lck_mtx_lock(&req
->r_mtx
);
830 req
->r_flags
|= R_MUSTRESEND
;
832 lck_mtx_unlock(&req
->r_mtx
);
836 bzero(&msg
, sizeof(msg
));
837 if (nmp
->nm_nam
&& (sotype
!= SOCK_STREAM
) && !sock_isconnected(so
)) {
838 if ((sendnam
= mbuf_data(nmp
->nm_nam
))) {
839 msg
.msg_name
= (caddr_t
)sendnam
;
840 msg
.msg_namelen
= sendnam
->sa_len
;
843 error
= sock_sendmbuf(so
, &msg
, mreqcopy
, 0, &sentlen
);
844 #ifdef NFS_SOCKET_DEBUGGING
845 if (error
|| (sentlen
!= req
->r_mreqlen
))
846 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
847 req
->r_xid
, (int)sentlen
, (int)req
->r_mreqlen
, error
));
849 if (!error
&& (sentlen
!= req
->r_mreqlen
))
851 needrecon
= ((sotype
== SOCK_STREAM
) && sentlen
&& (sentlen
!= req
->r_mreqlen
));
853 lck_mtx_lock(&req
->r_mtx
);
855 if (rexmit
&& (++req
->r_rexmit
> NFS_MAXREXMIT
))
856 req
->r_rexmit
= NFS_MAXREXMIT
;
860 req
->r_flags
&= ~R_RESENDERR
;
862 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcretries
);
863 req
->r_flags
|= R_SENT
;
864 if (req
->r_flags
& R_WAITSENT
) {
865 req
->r_flags
&= ~R_WAITSENT
;
869 lck_mtx_unlock(&req
->r_mtx
);
874 req
->r_flags
|= R_MUSTRESEND
;
876 req
->r_flags
|= R_RESENDERR
;
877 if ((error
== EINTR
) || (error
== ERESTART
))
878 req
->r_error
= error
;
879 lck_mtx_unlock(&req
->r_mtx
);
881 if (sotype
== SOCK_DGRAM
) {
883 * Note: even though a first send may fail, we consider
884 * the request sent for congestion window purposes.
885 * So we don't need to undo any of the changes made above.
888 * Socket errors ignored for connectionless sockets??
889 * For now, ignore them all
891 if ((error
!= EINTR
) && (error
!= ERESTART
) &&
892 (error
!= EWOULDBLOCK
) && (error
!= EIO
)) {
893 int clearerror
= 0, optlen
= sizeof(clearerror
);
894 sock_getsockopt(so
, SOL_SOCKET
, SO_ERROR
, &clearerror
, &optlen
);
895 #ifdef NFS_SOCKET_DEBUGGING
897 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
903 /* check if it appears we should reconnect the socket */
906 /* if send timed out, reconnect if on TCP */
907 if (sotype
!= SOCK_STREAM
)
924 if (needrecon
) { /* mark socket as needing reconnect */
925 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req
->r_xid
, error
));
926 nfs_need_reconnect(nmp
);
932 * Don't log some errors:
933 * EPIPE errors may be common with servers that drop idle connections.
934 * EADDRNOTAVAIL may occur on network transitions.
935 * ENOTCONN may occur under some network conditions.
937 if ((error
== EPIPE
) || (error
== EADDRNOTAVAIL
) || (error
== ENOTCONN
))
939 if (error
&& (error
!= EINTR
) && (error
!= ERESTART
))
940 log(LOG_INFO
, "nfs send error %d for server %s\n", error
,
941 !req
->r_nmp
? "<unmounted>" :
942 vfs_statfs(req
->r_nmp
->nm_mountp
)->f_mntfromname
);
944 /* prefer request termination error over other errors */
945 error2
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0);
949 /* only allow the following errors to be returned */
950 if ((error
!= EINTR
) && (error
!= ERESTART
) && (error
!= EIO
) &&
951 (error
!= ENXIO
) && (error
!= ETIMEDOUT
))
957 * NFS client socket upcalls
959 * Pull RPC replies out of an NFS mount's socket and match them
960 * up with the pending request.
962 * The datagram code is simple because we always get whole
963 * messages out of the socket.
965 * The stream code is more involved because we have to parse
966 * the RPC records out of the stream.
969 /* NFS client UDP socket upcall */
971 nfs_udp_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
973 struct nfsmount
*nmp
= arg
;
978 if (nmp
->nm_sockflags
& NMSOCK_CONNECTING
) {
983 /* make sure we're on the current socket */
984 if (nmp
->nm_so
!= so
)
990 error
= sock_receivembuf(so
, NULL
, &m
, MSG_DONTWAIT
, &rcvlen
);
992 nfs_request_match_reply(nmp
, m
);
993 } while (m
&& !error
);
995 if (error
&& (error
!= EWOULDBLOCK
)) {
996 /* problems with the socket... mark for reconnection */
997 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error
));
998 nfs_need_reconnect(nmp
);
1002 /* NFS client TCP socket upcall */
1004 nfs_tcp_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
1006 struct nfsmount
*nmp
= arg
;
1007 struct iovec_32 aio
;
1014 if (nmp
->nm_sockflags
& NMSOCK_CONNECTING
) {
1015 wakeup(&nmp
->nm_so
);
1019 /* make sure we're on the current socket */
1020 if (nmp
->nm_so
!= so
)
1023 lck_mtx_lock(&nmp
->nm_lock
);
1024 if (nmp
->nm_sockflags
& NMSOCK_UPCALL
) {
1025 /* upcall is already receiving data - just return */
1026 lck_mtx_unlock(&nmp
->nm_lock
);
1029 nmp
->nm_sockflags
|= NMSOCK_UPCALL
;
1034 /* read the TCP RPC record marker */
1035 while (!error
&& nmp
->nm_markerleft
) {
1036 aio
.iov_base
= (uintptr_t)((char*)&nmp
->nm_fragleft
+
1037 sizeof(nmp
->nm_fragleft
) - nmp
->nm_markerleft
);
1038 aio
.iov_len
= nmp
->nm_markerleft
;
1039 bzero(&msg
, sizeof(msg
));
1040 msg
.msg_iov
= (struct iovec
*) &aio
;
1042 lck_mtx_unlock(&nmp
->nm_lock
);
1043 error
= sock_receive(so
, &msg
, MSG_DONTWAIT
, &rcvlen
);
1044 lck_mtx_lock(&nmp
->nm_lock
);
1045 if (error
|| !rcvlen
)
1048 nmp
->nm_markerleft
-= rcvlen
;
1049 if (nmp
->nm_markerleft
)
1051 /* record marker complete */
1052 nmp
->nm_fragleft
= ntohl(nmp
->nm_fragleft
);
1053 if (nmp
->nm_fragleft
& 0x80000000) {
1054 nmp
->nm_sockflags
|= NMSOCK_LASTFRAG
;
1055 nmp
->nm_fragleft
&= ~0x80000000;
1057 nmp
->nm_reclen
+= nmp
->nm_fragleft
;
1058 if (nmp
->nm_reclen
> NFS_MAXPACKET
) {
1060 * This is SERIOUS! We are out of sync with the sender
1061 * and forcing a disconnect/reconnect is all I can do.
1063 log(LOG_ERR
, "%s (%d) from nfs server %s\n",
1064 "impossible RPC record length", nmp
->nm_reclen
,
1065 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
1070 /* read the TCP RPC record fragment */
1071 while (!error
&& !nmp
->nm_markerleft
&& nmp
->nm_fragleft
) {
1073 rcvlen
= nmp
->nm_fragleft
;
1074 lck_mtx_unlock(&nmp
->nm_lock
);
1075 error
= sock_receivembuf(so
, NULL
, &m
, MSG_DONTWAIT
, &rcvlen
);
1076 lck_mtx_lock(&nmp
->nm_lock
);
1077 if (error
|| !rcvlen
|| !m
)
1080 /* append mbufs to list */
1081 nmp
->nm_fragleft
-= rcvlen
;
1085 error
= mbuf_setnext(nmp
->nm_mlast
, m
);
1087 printf("nfs_tcp_rcv: mbuf_setnext failed %d\n", error
);
1092 while (mbuf_next(m
))
1097 /* done reading fragment? */
1099 if (!error
&& !nmp
->nm_markerleft
&& !nmp
->nm_fragleft
) {
1100 /* reset socket fragment parsing state */
1101 nmp
->nm_markerleft
= sizeof(nmp
->nm_fragleft
);
1102 if (nmp
->nm_sockflags
& NMSOCK_LASTFRAG
) {
1103 /* RPC record complete */
1105 /* reset socket record parsing state */
1107 nmp
->nm_m
= nmp
->nm_mlast
= NULL
;
1108 nmp
->nm_sockflags
&= ~NMSOCK_LASTFRAG
;
1112 if (m
) { /* match completed response with request */
1113 lck_mtx_unlock(&nmp
->nm_lock
);
1114 nfs_request_match_reply(nmp
, m
);
1115 lck_mtx_lock(&nmp
->nm_lock
);
1118 /* loop if we've been making error-free progress */
1122 nmp
->nm_sockflags
&= ~NMSOCK_UPCALL
;
1123 lck_mtx_unlock(&nmp
->nm_lock
);
1124 #ifdef NFS_SOCKET_DEBUGGING
1125 if (!recv
&& (error
!= EWOULDBLOCK
))
1126 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error
));
1128 /* note: no error and no data indicates server closed its end */
1129 if ((error
!= EWOULDBLOCK
) && (error
|| !recv
)) {
1130 /* problems with the socket... mark for reconnection */
1131 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error
));
1132 nfs_need_reconnect(nmp
);
1137 * "poke" a socket to try to provoke any pending errors
1140 nfs_sock_poke(struct nfsmount
*nmp
)
1142 struct iovec_32 aio
;
1148 lck_mtx_lock(&nmp
->nm_lock
);
1149 if ((nmp
->nm_sockflags
& NMSOCK_UNMOUNT
) || !nmp
->nm_so
) {
1150 lck_mtx_unlock(&nmp
->nm_lock
);
1153 lck_mtx_unlock(&nmp
->nm_lock
);
1154 aio
.iov_base
= (uintptr_t)&dummy
;
1157 bzero(&msg
, sizeof(msg
));
1158 msg
.msg_iov
= (struct iovec
*) &aio
;
1160 error
= sock_send(nmp
->nm_so
, &msg
, MSG_DONTWAIT
, &len
);
1161 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error
));
1165 * Match an RPC reply with the corresponding request
1168 nfs_request_match_reply(struct nfsmount
*nmp
, mbuf_t mrep
)
1171 struct nfsm_chain nmrep
;
1172 u_long reply
= 0, rxid
= 0;
1174 int error
= 0, asyncioq
, asyncgss
;
1176 /* Get the xid and check that it is an rpc reply */
1177 nfsm_chain_dissect_init(error
, &nmrep
, mrep
);
1178 nfsm_chain_get_32(error
, &nmrep
, rxid
);
1179 nfsm_chain_get_32(error
, &nmrep
, reply
);
1180 if (error
|| (reply
!= RPC_REPLY
)) {
1181 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcinvalid
);
1187 * Loop through the request list to match up the reply
1188 * Iff no match, just drop it.
1190 lck_mtx_lock(nfs_request_mutex
);
1191 TAILQ_FOREACH(req
, &nfs_reqq
, r_chain
) {
1192 if (req
->r_nmrep
.nmc_mhead
|| (rxid
!= R_XID32(req
->r_xid
)))
1194 /* looks like we have it, grab lock and double check */
1195 lck_mtx_lock(&req
->r_mtx
);
1196 if (req
->r_nmrep
.nmc_mhead
|| (rxid
!= R_XID32(req
->r_xid
))) {
1197 lck_mtx_unlock(&req
->r_mtx
);
1201 req
->r_nmrep
= nmrep
;
1202 lck_mtx_lock(&nmp
->nm_lock
);
1203 if (nmp
->nm_sotype
== SOCK_DGRAM
) {
1205 * Update congestion window.
1206 * Do the additive increase of one rpc/rtt.
1208 FSDBG(530, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
1209 if (nmp
->nm_cwnd
<= nmp
->nm_sent
) {
1211 ((NFS_CWNDSCALE
* NFS_CWNDSCALE
) +
1212 (nmp
->nm_cwnd
>> 1)) / nmp
->nm_cwnd
;
1213 if (nmp
->nm_cwnd
> NFS_MAXCWND
)
1214 nmp
->nm_cwnd
= NFS_MAXCWND
;
1216 if (req
->r_flags
& R_CWND
) {
1217 nmp
->nm_sent
-= NFS_CWNDSCALE
;
1218 req
->r_flags
&= ~R_CWND
;
1220 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
1221 /* congestion window is open, poke the cwnd queue */
1222 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
1223 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
1224 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1229 * Update rtt using a gain of 0.125 on the mean
1230 * and a gain of 0.25 on the deviation.
1232 if (req
->r_flags
& R_TIMING
) {
1234 * Since the timer resolution of
1235 * NFS_HZ is so course, it can often
1236 * result in r_rtt == 0. Since
1237 * r_rtt == N means that the actual
1238 * rtt is between N+dt and N+2-dt ticks,
1241 if (proct
[req
->r_procnum
] == 0)
1242 panic("nfs_request_match_reply: proct[%d] is zero", req
->r_procnum
);
1243 t1
= req
->r_rtt
+ 1;
1244 t1
-= (NFS_SRTT(req
) >> 3);
1245 NFS_SRTT(req
) += t1
;
1248 t1
-= (NFS_SDRTT(req
) >> 2);
1249 NFS_SDRTT(req
) += t1
;
1251 nmp
->nm_timeouts
= 0;
1252 lck_mtx_unlock(&nmp
->nm_lock
);
1253 /* signal anyone waiting on this request */
1255 asyncioq
= (req
->r_callback
.rcb_func
!= NULL
);
1256 if ((asyncgss
= ((req
->r_gss_ctx
!= NULL
) && ((req
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
|R_ALLOCATED
)) == (R_ASYNC
|R_ALLOCATED
)))))
1257 nfs_request_ref(req
, 1);
1258 lck_mtx_unlock(&req
->r_mtx
);
1259 lck_mtx_unlock(nfs_request_mutex
);
1261 nfs_gss_clnt_rpcdone(req
);
1262 nfs_request_rele(req
);
1264 /* if it's an async RPC with a callback, queue it up */
1266 nfs_asyncio_finish(req
);
1271 /* not matched to a request, so drop it. */
1272 lck_mtx_unlock(nfs_request_mutex
);
1273 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcunexpected
);
1279 * Wait for the reply for a given request...
1280 * ...potentially resending the request if necessary.
1283 nfs_wait_reply(struct nfsreq
*req
)
1285 struct nfsmount
*nmp
= req
->r_nmp
;
1286 struct timespec ts
= { 30, 0 };
1287 int error
= 0, slpflag
;
1289 if ((nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
)
1294 lck_mtx_lock(&req
->r_mtx
);
1295 while (!req
->r_nmrep
.nmc_mhead
) {
1296 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0)))
1298 if (((error
= req
->r_error
)) || req
->r_nmrep
.nmc_mhead
)
1300 /* check if we need to resend */
1301 if (req
->r_flags
& R_MUSTRESEND
) {
1302 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
1303 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
1304 lck_mtx_unlock(&req
->r_mtx
);
1305 if (req
->r_gss_ctx
) {
1307 * It's an RPCSEC_GSS mount.
1308 * Can't just resend the original request
1309 * without bumping the cred sequence number.
1310 * Go back and re-build the request.
1314 error
= nfs_send(req
, 1);
1315 lck_mtx_lock(&req
->r_mtx
);
1316 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
1317 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
, error
));
1320 if (((error
= req
->r_error
)) || req
->r_nmrep
.nmc_mhead
)
1323 /* need to poll if we're P_NOREMOTEHANG */
1324 if (nfs_noremotehang(req
->r_thread
))
1326 msleep(req
, &req
->r_mtx
, slpflag
| (PZERO
- 1), "nfswaitreply", &ts
);
1329 lck_mtx_unlock(&req
->r_mtx
);
1335 * An NFS request goes something like this:
1336 * (nb: always frees up mreq mbuf list)
1337 * nfs_request_create()
1338 * - allocates a request struct if one is not provided
1339 * - initial fill-in of the request struct
1340 * nfs_request_add_header()
1341 * - add the RPC header
1342 * nfs_request_send()
1343 * - link it into list
1344 * - call nfs_send() for first transmit
1345 * nfs_request_wait()
1346 * - call nfs_wait_reply() to wait for the reply
1347 * nfs_request_finish()
1348 * - break down rpc header and return with error or nfs reply
1349 * pointed to by nmrep.
1350 * nfs_request_rele()
1351 * nfs_request_destroy()
1352 * - clean up the request struct
1353 * - free the request struct if it was allocated by nfs_request_create()
1357 * Set up an NFS request struct (allocating if no request passed in).
1362 mount_t mp
, /* used only if !np */
1363 struct nfsm_chain
*nmrest
,
1367 struct nfsreq
**reqp
)
1369 struct nfsreq
*req
, *newreq
= NULL
;
1370 struct nfsmount
*nmp
;
1374 /* allocate a new NFS request structure */
1375 MALLOC_ZONE(newreq
, struct nfsreq
*, sizeof(*newreq
), M_NFSREQ
, M_WAITOK
);
1377 mbuf_freem(nmrest
->nmc_mhead
);
1378 nmrest
->nmc_mhead
= NULL
;
1384 bzero(req
, sizeof(*req
));
1386 req
->r_flags
= R_ALLOCATED
;
1388 nmp
= VFSTONFS(np
? NFSTOMP(np
) : mp
);
1391 FREE_ZONE(newreq
, sizeof(*newreq
), M_NFSREQ
);
1394 lck_mtx_lock(&nmp
->nm_lock
);
1395 if ((nmp
->nm_state
& (NFSSTA_FORCE
|NFSSTA_TIMEO
)) ==
1396 (NFSSTA_FORCE
|NFSSTA_TIMEO
)) {
1397 lck_mtx_unlock(&nmp
->nm_lock
);
1398 mbuf_freem(nmrest
->nmc_mhead
);
1399 nmrest
->nmc_mhead
= NULL
;
1401 FREE_ZONE(newreq
, sizeof(*newreq
), M_NFSREQ
);
1405 if ((nmp
->nm_vers
!= NFS_VER4
) && (procnum
>= 0) && (procnum
< NFS_NPROCS
))
1406 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpccnt
[procnum
]);
1407 if ((nmp
->nm_vers
== NFS_VER4
) && (procnum
!= NFSPROC4_COMPOUND
) && (procnum
!= NFSPROC4_NULL
))
1408 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum
);
1410 lck_mtx_init(&req
->r_mtx
, nfs_request_grp
, LCK_ATTR_NULL
);
1413 req
->r_thread
= thd
;
1414 if (IS_VALID_CRED(cred
)) {
1415 kauth_cred_ref(cred
);
1418 req
->r_procnum
= procnum
;
1419 if (proct
[procnum
] > 0)
1420 req
->r_flags
|= R_TIMING
;
1421 req
->r_nmrep
.nmc_mhead
= NULL
;
1422 SLIST_INIT(&req
->r_gss_seqlist
);
1423 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
1424 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
1425 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1427 lck_mtx_unlock(&nmp
->nm_lock
);
1429 /* move the request mbuf chain to the nfsreq */
1430 req
->r_mrest
= nmrest
->nmc_mhead
;
1431 nmrest
->nmc_mhead
= NULL
;
1433 req
->r_flags
|= R_INITTED
;
1441 * Clean up and free an NFS request structure.
1444 nfs_request_destroy(struct nfsreq
*req
)
1446 struct nfsmount
*nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1447 struct gss_seq
*gsp
, *ngsp
;
1448 struct timespec ts
= { 1, 0 };
1450 if (!req
|| !(req
->r_flags
& R_INITTED
))
1452 req
->r_flags
&= ~R_INITTED
;
1453 if (req
->r_lflags
& RL_QUEUED
)
1454 nfs_reqdequeue(req
);
1455 if (req
->r_achain
.tqe_next
!= NFSREQNOLIST
) {
1456 /* still on an async I/O queue? */
1457 lck_mtx_lock(nfsiod_mutex
);
1458 if (nmp
&& (req
->r_achain
.tqe_next
!= NFSREQNOLIST
)) {
1459 TAILQ_REMOVE(&nmp
->nm_iodq
, req
, r_achain
);
1460 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
1462 lck_mtx_unlock(nfsiod_mutex
);
1465 lck_mtx_lock(&nmp
->nm_lock
);
1466 if (req
->r_rchain
.tqe_next
!= NFSREQNOLIST
) {
1467 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
1468 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
1469 req
->r_flags
&= ~R_RESENDQ
;
1471 if (req
->r_cchain
.tqe_next
!= NFSREQNOLIST
) {
1472 TAILQ_REMOVE(&nmp
->nm_cwndq
, req
, r_cchain
);
1473 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1475 lck_mtx_unlock(&nmp
->nm_lock
);
1477 lck_mtx_lock(&req
->r_mtx
);
1478 while (req
->r_flags
& R_RESENDQ
)
1479 msleep(req
, &req
->r_mtx
, (PZERO
- 1), "nfsresendqwait", &ts
);
1480 lck_mtx_unlock(&req
->r_mtx
);
1482 mbuf_freem(req
->r_mhead
);
1483 else if (req
->r_mrest
)
1484 mbuf_freem(req
->r_mrest
);
1485 if (req
->r_nmrep
.nmc_mhead
)
1486 mbuf_freem(req
->r_nmrep
.nmc_mhead
);
1487 if (IS_VALID_CRED(req
->r_cred
))
1488 kauth_cred_unref(&req
->r_cred
);
1490 nfs_gss_clnt_rpcdone(req
);
1491 SLIST_FOREACH_SAFE(gsp
, &req
->r_gss_seqlist
, gss_seqnext
, ngsp
)
1494 nfs_gss_clnt_ctx_unref(req
);
1496 lck_mtx_destroy(&req
->r_mtx
, nfs_request_grp
);
1497 if (req
->r_flags
& R_ALLOCATED
)
1498 FREE_ZONE(req
, sizeof(*req
), M_NFSREQ
);
1502 nfs_request_ref(struct nfsreq
*req
, int locked
)
1505 lck_mtx_lock(&req
->r_mtx
);
1506 if (req
->r_refs
<= 0)
1507 panic("nfsreq reference error");
1510 lck_mtx_unlock(&req
->r_mtx
);
1514 nfs_request_rele(struct nfsreq
*req
)
1518 lck_mtx_lock(&req
->r_mtx
);
1519 if (req
->r_refs
<= 0)
1520 panic("nfsreq reference underflow");
1522 destroy
= (req
->r_refs
== 0);
1523 lck_mtx_unlock(&req
->r_mtx
);
1525 nfs_request_destroy(req
);
1530 * Add an (updated) RPC header with authorization to an NFS request.
1533 nfs_request_add_header(struct nfsreq
*req
)
1535 struct nfsmount
*nmp
;
1536 int error
= 0, auth_len
= 0;
1539 /* free up any previous header */
1540 if ((m
= req
->r_mhead
)) {
1541 while (m
&& (m
!= req
->r_mrest
))
1543 req
->r_mhead
= NULL
;
1546 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1550 if (!req
->r_cred
) /* RPCAUTH_NULL */
1552 else switch (nmp
->nm_auth
) {
1554 if (req
->r_cred
->cr_ngroups
< 1)
1556 auth_len
= ((((req
->r_cred
->cr_ngroups
- 1) > nmp
->nm_numgrps
) ?
1557 nmp
->nm_numgrps
: (req
->r_cred
->cr_ngroups
- 1)) << 2) +
1563 auth_len
= 5 * NFSX_UNSIGNED
+ 0; // zero context handle for now
1567 error
= nfsm_rpchead(req
, auth_len
, req
->r_mrest
, &req
->r_xid
, &req
->r_mhead
);
1571 req
->r_mreqlen
= mbuf_pkthdr_len(req
->r_mhead
);
1572 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1575 lck_mtx_lock(&nmp
->nm_lock
);
1576 if (nmp
->nm_flag
& NFSMNT_SOFT
)
1577 req
->r_retry
= nmp
->nm_retry
;
1579 req
->r_retry
= NFS_MAXREXMIT
+ 1; /* past clip limit */
1580 lck_mtx_unlock(&nmp
->nm_lock
);
1587 * Queue an NFS request up and send it out.
1590 nfs_request_send(struct nfsreq
*req
, int wait
)
1592 struct nfsmount
*nmp
;
1595 lck_mtx_lock(nfs_request_mutex
);
1597 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1599 lck_mtx_unlock(nfs_request_mutex
);
1604 if (!req
->r_start
) {
1605 req
->r_start
= now
.tv_sec
;
1606 req
->r_lastmsg
= now
.tv_sec
-
1607 ((nmp
->nm_tprintf_delay
) - (nmp
->nm_tprintf_initial_delay
));
1610 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpcrequests
);
1613 * Chain request into list of outstanding requests. Be sure
1614 * to put it LAST so timer finds oldest requests first.
1615 * Make sure that the request queue timer is running
1616 * to check for possible request timeout.
1618 TAILQ_INSERT_TAIL(&nfs_reqq
, req
, r_chain
);
1619 req
->r_lflags
|= RL_QUEUED
;
1620 if (!nfs_request_timer_on
) {
1621 nfs_request_timer_on
= 1;
1622 nfs_interval_timer_start(nfs_request_timer_call
,
1625 lck_mtx_unlock(nfs_request_mutex
);
1627 /* Send the request... */
1628 return (nfs_send(req
, wait
));
1632 * Call nfs_wait_reply() to wait for the reply.
1635 nfs_request_wait(struct nfsreq
*req
)
1637 req
->r_error
= nfs_wait_reply(req
);
1641 * Finish up an NFS request by dequeueing it and
1642 * doing the initial NFS request reply processing.
1647 struct nfsm_chain
*nmrepp
,
1650 struct nfsmount
*nmp
;
1653 uint32_t verf_len
= 0;
1654 uint32_t reply_status
= 0;
1655 uint32_t rejected_status
= 0;
1656 uint32_t auth_status
= 0;
1657 uint32_t accepted_status
= 0;
1658 struct nfsm_chain nmrep
;
1661 error
= req
->r_error
;
1664 nmrepp
->nmc_mhead
= NULL
;
1666 /* RPC done, unlink the request. */
1667 nfs_reqdequeue(req
);
1669 mrep
= req
->r_nmrep
.nmc_mhead
;
1671 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
1674 * Decrement the outstanding request count.
1676 if (req
->r_flags
& R_CWND
) {
1677 req
->r_flags
&= ~R_CWND
;
1678 lck_mtx_lock(&nmp
->nm_lock
);
1679 FSDBG(273, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
1680 nmp
->nm_sent
-= NFS_CWNDSCALE
;
1681 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
1682 /* congestion window is open, poke the cwnd queue */
1683 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
1684 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
1685 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1688 lck_mtx_unlock(&nmp
->nm_lock
);
1691 if (req
->r_gss_ctx
) { // Using gss cred ?
1693 * If the request had an RPCSEC_GSS credential
1694 * then reset its sequence number bit in the
1697 nfs_gss_clnt_rpcdone(req
);
1700 * If we need to re-send, go back and re-build the
1701 * request based on a new sequence number.
1702 * Note that we're using the original XID.
1704 if (error
== EAGAIN
) {
1708 error
= nfs_gss_clnt_args_restore(req
); // remove any trailer mbufs
1709 req
->r_nmrep
.nmc_mhead
= NULL
;
1710 req
->r_flags
|= R_RESTART
;
1711 if (error
== ENEEDAUTH
) {
1712 req
->r_xid
= 0; // get a new XID
1720 * If there was a successful reply, make sure to mark the mount as up.
1721 * If a tprintf message was given (or if this is a timed-out soft mount)
1722 * then post a tprintf message indicating the server is alive again.
1725 if ((req
->r_flags
& R_TPRINTFMSG
) ||
1726 (nmp
&& (nmp
->nm_flag
& NFSMNT_SOFT
) &&
1727 ((nmp
->nm_state
& (NFSSTA_TIMEO
|NFSSTA_FORCE
)) == NFSSTA_TIMEO
)))
1728 nfs_up(nmp
, req
->r_thread
, NFSSTA_TIMEO
, "is alive again");
1730 nfs_up(nmp
, req
->r_thread
, NFSSTA_TIMEO
, NULL
);
1737 * break down the RPC header and check if ok
1739 nmrep
= req
->r_nmrep
;
1740 nfsm_chain_get_32(error
, &nmrep
, reply_status
);
1742 if (reply_status
== RPC_MSGDENIED
) {
1743 nfsm_chain_get_32(error
, &nmrep
, rejected_status
);
1745 if (rejected_status
== RPC_MISMATCH
) {
1749 nfsm_chain_get_32(error
, &nmrep
, auth_status
);
1751 switch (auth_status
) {
1752 case RPCSEC_GSS_CREDPROBLEM
:
1753 case RPCSEC_GSS_CTXPROBLEM
:
1755 * An RPCSEC_GSS cred or context problem.
1756 * We can't use it anymore.
1757 * Restore the args, renew the context
1758 * and set up for a resend.
1760 error
= nfs_gss_clnt_args_restore(req
);
1761 if (error
&& error
!= ENEEDAUTH
)
1765 error
= nfs_gss_clnt_ctx_renew(req
);
1770 req
->r_nmrep
.nmc_mhead
= NULL
;
1771 req
->r_xid
= 0; // get a new XID
1772 req
->r_flags
|= R_RESTART
;
1781 /* Now check the verifier */
1782 nfsm_chain_get_32(error
, &nmrep
, verf_type
); // verifier flavor
1783 nfsm_chain_get_32(error
, &nmrep
, verf_len
); // verifier length
1786 auth
= !req
->r_cred
? RPCAUTH_NULL
: nmp
->nm_auth
;
1790 /* Any AUTH_UNIX verifier is ignored */
1792 nfsm_chain_adv(error
, &nmrep
, nfsm_rndup(verf_len
));
1793 nfsm_chain_get_32(error
, &nmrep
, accepted_status
);
1798 error
= nfs_gss_clnt_verf_get(req
, &nmrep
,
1799 verf_type
, verf_len
, &accepted_status
);
1804 switch (accepted_status
) {
1806 if (req
->r_procnum
== NFSPROC_NULL
) {
1808 * The NFS null procedure is unique,
1809 * in not returning an NFS status.
1813 nfsm_chain_get_32(error
, &nmrep
, *status
);
1817 if ((nmp
->nm_vers
!= NFS_VER2
) && (*status
== NFSERR_TRYLATER
)) {
1819 * It's a JUKEBOX error - delay and try again
1821 int delay
, slpflag
= (nmp
->nm_flag
& NFSMNT_INT
) ? PCATCH
: 0;
1824 req
->r_nmrep
.nmc_mhead
= NULL
;
1825 if ((req
->r_delay
>= 30) && !(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
1826 /* we're not yet completely mounted and */
1827 /* we can't complete an RPC, so we fail */
1828 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpctimeouts
);
1830 error
= req
->r_error
;
1833 req
->r_delay
= !req
->r_delay
? NFS_TRYLATERDEL
: (req
->r_delay
* 2);
1834 if (req
->r_delay
> 30)
1836 if (nmp
->nm_tprintf_initial_delay
&& (req
->r_delay
== 30)) {
1837 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_JUKEBOXTIMEO
,
1838 "resource temporarily unavailable (jukebox)");
1839 req
->r_flags
|= R_JBTPRINTFMSG
;
1841 delay
= req
->r_delay
;
1842 if (req
->r_callback
.rcb_func
) {
1845 req
->r_resendtime
= now
.tv_sec
+ delay
;
1848 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
1850 tsleep(&lbolt
, PSOCK
|slpflag
, "nfs_jukebox_trylater", 0);
1851 } while (--delay
> 0);
1853 req
->r_xid
= 0; // get a new XID
1854 req
->r_flags
|= R_RESTART
;
1856 FSDBG(273, R_XID32(req
->r_xid
), nmp
, req
, NFSERR_TRYLATER
);
1860 if (req
->r_flags
& R_JBTPRINTFMSG
)
1861 nfs_up(nmp
, req
->r_thread
, NFSSTA_JUKEBOXTIMEO
, "resource available again");
1863 if (*status
== NFS_OK
) {
1865 * Successful NFS request
1868 req
->r_nmrep
.nmc_mhead
= NULL
;
1871 /* Got an NFS error of some kind */
1874 * If the File Handle was stale, invalidate the
1875 * lookup cache, just in case.
1877 if ((*status
== ESTALE
) && req
->r_np
)
1878 cache_purge(NFSTOV(req
->r_np
));
1879 if (nmp
->nm_vers
== NFS_VER2
)
1883 req
->r_nmrep
.nmc_mhead
= NULL
;
1886 case RPC_PROGUNAVAIL
:
1887 error
= EPROGUNAVAIL
;
1889 case RPC_PROGMISMATCH
:
1890 error
= ERPCMISMATCH
;
1892 case RPC_PROCUNAVAIL
:
1893 error
= EPROCUNAVAIL
;
1898 case RPC_SYSTEM_ERR
:
1904 if (!error
&& (req
->r_flags
& R_JBTPRINTFMSG
))
1905 nfs_up(nmp
, req
->r_thread
, NFSSTA_JUKEBOXTIMEO
, NULL
);
1906 FSDBG(273, R_XID32(req
->r_xid
), nmp
, req
,
1907 (!error
&& (*status
== NFS_OK
)) ? 0xf0f0f0f0 : error
);
1913 * Perform an NFS request synchronously.
1919 mount_t mp
, /* used only if !np */
1920 struct nfsm_chain
*nmrest
,
1923 struct nfsm_chain
*nmrepp
,
1927 return nfs_request2(np
, mp
, nmrest
, procnum
,
1928 vfs_context_thread(ctx
), vfs_context_ucred(ctx
),
1929 0, nmrepp
, xidp
, status
);
1935 mount_t mp
, /* used only if !np */
1936 struct nfsm_chain
*nmrest
,
1941 struct nfsm_chain
*nmrepp
,
1945 struct nfsreq rq
, *req
= &rq
;
1948 if ((error
= nfs_request_create(np
, mp
, nmrest
, procnum
, thd
, cred
, &req
)))
1950 req
->r_flags
|= (flags
& R_OPTMASK
);
1952 FSDBG_TOP(273, R_XID32(req
->r_xid
), np
, procnum
, 0);
1955 req
->r_flags
&= ~R_RESTART
;
1956 if ((error
= nfs_request_add_header(req
)))
1960 if ((error
= nfs_request_send(req
, 1)))
1962 nfs_request_wait(req
);
1963 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
1965 } while (req
->r_flags
& R_RESTART
);
1967 FSDBG_BOT(273, R_XID32(req
->r_xid
), np
, procnum
, error
);
1968 nfs_request_rele(req
);
1973 * Create and start an asynchronous NFS request.
1978 mount_t mp
, /* used only if !np */
1979 struct nfsm_chain
*nmrest
,
1983 struct nfsreq_cbinfo
*cb
,
1984 struct nfsreq
**reqp
)
1989 error
= nfs_request_create(np
, mp
, nmrest
, procnum
, thd
, cred
, reqp
);
1991 FSDBG(274, (req
? R_XID32(req
->r_xid
) : 0), np
, procnum
, error
);
1994 req
->r_flags
|= R_ASYNC
;
1996 req
->r_callback
= *cb
;
1997 error
= nfs_request_add_header(req
);
1999 req
->r_flags
|= R_WAITSENT
;
2000 if (req
->r_callback
.rcb_func
)
2001 nfs_request_ref(req
, 0);
2002 error
= nfs_request_send(req
, 1);
2003 lck_mtx_lock(&req
->r_mtx
);
2004 if (!error
&& !(req
->r_flags
& R_SENT
) && req
->r_callback
.rcb_func
) {
2005 /* make sure to wait until this async I/O request gets sent */
2006 int slpflag
= (req
->r_nmp
&& (req
->r_nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
) ? PCATCH
: 0;
2007 struct timespec ts
= { 2, 0 };
2008 while (!(req
->r_flags
& R_SENT
)) {
2009 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
2011 msleep(req
, &req
->r_mtx
, slpflag
| (PZERO
- 1), "nfswaitsent", &ts
);
2015 sent
= req
->r_flags
& R_SENT
;
2016 lck_mtx_unlock(&req
->r_mtx
);
2017 if (error
&& req
->r_callback
.rcb_func
&& !sent
)
2018 nfs_request_rele(req
);
2020 FSDBG(274, R_XID32(req
->r_xid
), np
, procnum
, error
);
2021 if (error
|| req
->r_callback
.rcb_func
)
2022 nfs_request_rele(req
);
2027 * Wait for and finish an asynchronous NFS request.
2030 nfs_request_async_finish(
2032 struct nfsm_chain
*nmrepp
,
2036 int error
, asyncio
= req
->r_callback
.rcb_func
? 1 : 0;
2038 lck_mtx_lock(&req
->r_mtx
);
2040 req
->r_flags
|= R_ASYNCWAIT
;
2041 while (req
->r_flags
& R_RESENDQ
) /* wait until the request is off the resend queue */
2042 msleep(req
, &req
->r_mtx
, PZERO
-1, "nfsresendqwait", NULL
);
2043 lck_mtx_unlock(&req
->r_mtx
);
2045 nfs_request_wait(req
);
2046 error
= nfs_request_finish(req
, nmrepp
, status
);
2048 while (!error
&& (req
->r_flags
& R_RESTART
)) {
2049 if (asyncio
&& req
->r_resendtime
) { /* send later */
2050 lck_mtx_lock(&req
->r_mtx
);
2051 nfs_asyncio_resend(req
);
2052 lck_mtx_unlock(&req
->r_mtx
);
2053 return (EINPROGRESS
);
2056 req
->r_flags
&= ~R_RESTART
;
2057 if ((error
= nfs_request_add_header(req
)))
2059 if ((error
= nfs_request_send(req
, !asyncio
)))
2062 return (EINPROGRESS
);
2063 nfs_request_wait(req
);
2064 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
2070 FSDBG(275, R_XID32(req
->r_xid
), req
->r_np
, req
->r_procnum
, error
);
2071 nfs_request_rele(req
);
2076 * Cancel a pending asynchronous NFS request.
2079 nfs_request_async_cancel(struct nfsreq
*req
)
2081 nfs_reqdequeue(req
);
2082 FSDBG(275, R_XID32(req
->r_xid
), req
->r_np
, req
->r_procnum
, 0xD1ED1E);
2083 nfs_request_rele(req
);
2087 * Flag a request as being terminated.
2090 nfs_softterm(struct nfsreq
*req
)
2092 struct nfsmount
*nmp
= req
->r_nmp
;
2093 req
->r_flags
|= R_SOFTTERM
;
2094 req
->r_error
= ETIMEDOUT
;
2095 if (!(req
->r_flags
& R_CWND
) || !nmp
)
2097 /* update congestion window */
2098 req
->r_flags
&= ~R_CWND
;
2099 lck_mtx_lock(&nmp
->nm_lock
);
2100 FSDBG(532, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
2101 nmp
->nm_sent
-= NFS_CWNDSCALE
;
2102 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
2103 /* congestion window is open, poke the cwnd queue */
2104 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
2105 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
2106 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
2109 lck_mtx_unlock(&nmp
->nm_lock
);
2113 * Ensure req isn't in use by the timer, then dequeue it.
2116 nfs_reqdequeue(struct nfsreq
*req
)
2118 lck_mtx_lock(nfs_request_mutex
);
2119 while (req
->r_lflags
& RL_BUSY
) {
2120 req
->r_lflags
|= RL_WAITING
;
2121 msleep(&req
->r_lflags
, nfs_request_mutex
, PSOCK
, "reqdeq", NULL
);
2123 if (req
->r_lflags
& RL_QUEUED
) {
2124 TAILQ_REMOVE(&nfs_reqq
, req
, r_chain
);
2125 req
->r_lflags
&= ~RL_QUEUED
;
2127 lck_mtx_unlock(nfs_request_mutex
);
2131 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
2132 * free()'d out from under it.
2135 nfs_reqbusy(struct nfsreq
*req
)
2137 if (req
->r_lflags
& RL_BUSY
)
2138 panic("req locked");
2139 req
->r_lflags
|= RL_BUSY
;
2143 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
2145 static struct nfsreq
*
2146 nfs_reqnext(struct nfsreq
*req
)
2148 struct nfsreq
* nextreq
;
2153 * We need to get and busy the next req before signalling the
2154 * current one, otherwise wakeup() may block us and we'll race to
2155 * grab the next req.
2157 nextreq
= TAILQ_NEXT(req
, r_chain
);
2158 if (nextreq
!= NULL
)
2159 nfs_reqbusy(nextreq
);
2160 /* unbusy and signal. */
2161 req
->r_lflags
&= ~RL_BUSY
;
2162 if (req
->r_lflags
& RL_WAITING
) {
2163 req
->r_lflags
&= ~RL_WAITING
;
2164 wakeup(&req
->r_lflags
);
2170 * NFS request queue timer routine
2172 * Scan the NFS request queue for any requests that have timed out.
2174 * Alert the system of unresponsive servers.
2175 * Mark expired requests on soft mounts as terminated.
2176 * For UDP, mark/signal requests for retransmission.
2179 nfs_request_timer(__unused
void *param0
, __unused
void *param1
)
2182 struct nfsmount
*nmp
;
2183 int timeo
, maxtime
, finish_asyncio
, error
;
2185 TAILQ_HEAD(nfs_mount_pokeq
, nfsmount
) nfs_mount_poke_queue
;
2187 lck_mtx_lock(nfs_request_mutex
);
2188 req
= TAILQ_FIRST(&nfs_reqq
);
2189 if (req
== NULL
) { /* no requests - turn timer off */
2190 nfs_request_timer_on
= 0;
2191 lck_mtx_unlock(nfs_request_mutex
);
2196 TAILQ_INIT(&nfs_mount_poke_queue
);
2199 for ( ; req
!= NULL
; req
= nfs_reqnext(req
)) {
2201 if (!nmp
) /* unmounted */
2203 if (req
->r_error
|| req
->r_nmrep
.nmc_mhead
)
2205 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0))) {
2206 if (req
->r_callback
.rcb_func
!= NULL
) {
2207 /* async I/O RPC needs to be finished */
2208 lck_mtx_lock(&req
->r_mtx
);
2209 req
->r_error
= error
;
2210 finish_asyncio
= !(req
->r_flags
& R_WAITSENT
);
2212 lck_mtx_unlock(&req
->r_mtx
);
2214 nfs_asyncio_finish(req
);
2219 lck_mtx_lock(&req
->r_mtx
);
2221 if (nmp
->nm_tprintf_initial_delay
&&
2222 ((req
->r_rexmit
> 2) || (req
->r_flags
& R_RESENDERR
)) &&
2223 ((req
->r_lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
)) {
2224 req
->r_lastmsg
= now
.tv_sec
;
2225 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_TIMEO
,
2227 req
->r_flags
|= R_TPRINTFMSG
;
2228 lck_mtx_lock(&nmp
->nm_lock
);
2229 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
2230 lck_mtx_unlock(&nmp
->nm_lock
);
2231 /* we're not yet completely mounted and */
2232 /* we can't complete an RPC, so we fail */
2233 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpctimeouts
);
2235 finish_asyncio
= ((req
->r_callback
.rcb_func
!= NULL
) && !(req
->r_flags
& R_WAITSENT
));
2237 lck_mtx_unlock(&req
->r_mtx
);
2239 nfs_asyncio_finish(req
);
2242 lck_mtx_unlock(&nmp
->nm_lock
);
2246 * Put a reasonable limit on the maximum timeout,
2247 * and reduce that limit when soft mounts get timeouts or are in reconnect.
2249 if (!(nmp
->nm_flag
& NFSMNT_SOFT
))
2250 maxtime
= NFS_MAXTIMEO
;
2251 else if ((req
->r_flags
& R_SETUP
) || ((nmp
->nm_reconnect_start
<= 0) || ((now
.tv_sec
- nmp
->nm_reconnect_start
) < 8)))
2252 maxtime
= (NFS_MAXTIMEO
/ (nmp
->nm_timeouts
+1))/2;
2254 maxtime
= NFS_MINTIMEO
/4;
2257 * Check for request timeout.
2259 if (req
->r_rtt
>= 0) {
2261 lck_mtx_lock(&nmp
->nm_lock
);
2262 if (req
->r_flags
& R_RESENDERR
) {
2263 /* with resend errors, retry every few seconds */
2266 if (req
->r_procnum
== NFSPROC_NULL
&& req
->r_gss_ctx
!= NULL
)
2267 timeo
= NFS_MINIDEMTIMEO
; // gss context setup
2268 else if (nmp
->nm_flag
& NFSMNT_DUMBTIMR
)
2269 timeo
= nmp
->nm_timeo
;
2271 timeo
= NFS_RTO(nmp
, proct
[req
->r_procnum
]);
2273 /* ensure 62.5 ms floor */
2274 while (16 * timeo
< hz
)
2276 if (nmp
->nm_timeouts
> 0)
2277 timeo
*= nfs_backoff
[nmp
->nm_timeouts
- 1];
2279 /* limit timeout to max */
2280 if (timeo
> maxtime
)
2282 if (req
->r_rtt
<= timeo
) {
2283 lck_mtx_unlock(&nmp
->nm_lock
);
2284 lck_mtx_unlock(&req
->r_mtx
);
2287 /* The request has timed out */
2288 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
2289 req
->r_procnum
, proct
[req
->r_procnum
],
2290 req
->r_xid
, req
->r_rtt
, timeo
, nmp
->nm_timeouts
,
2291 (now
.tv_sec
- req
->r_start
)*NFS_HZ
, maxtime
));
2292 if (nmp
->nm_timeouts
< 8)
2294 /* if it's been a few seconds, try poking the socket */
2295 if ((nmp
->nm_sotype
== SOCK_STREAM
) &&
2296 ((now
.tv_sec
- req
->r_start
) >= 3) &&
2297 !(nmp
->nm_sockflags
& NMSOCK_POKE
)) {
2298 nmp
->nm_sockflags
|= NMSOCK_POKE
;
2299 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue
, nmp
, nm_pokeq
);
2301 lck_mtx_unlock(&nmp
->nm_lock
);
2304 /* For soft mounts (& SETUPs), check for too many retransmits/timeout. */
2305 if (((nmp
->nm_flag
& NFSMNT_SOFT
) || (req
->r_flags
& R_SETUP
)) &&
2306 ((req
->r_rexmit
>= req
->r_retry
) || /* too many */
2307 ((now
.tv_sec
- req
->r_start
)*NFS_HZ
> maxtime
))) { /* too long */
2308 OSAddAtomic(1, (SInt32
*)&nfsstats
.rpctimeouts
);
2309 lck_mtx_lock(&nmp
->nm_lock
);
2310 if (!(nmp
->nm_state
& NFSSTA_TIMEO
)) {
2311 lck_mtx_unlock(&nmp
->nm_lock
);
2312 /* make sure we note the unresponsive server */
2313 /* (maxtime may be less than tprintf delay) */
2314 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_TIMEO
,
2316 req
->r_lastmsg
= now
.tv_sec
;
2317 req
->r_flags
|= R_TPRINTFMSG
;
2319 lck_mtx_unlock(&nmp
->nm_lock
);
2321 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
2322 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
,
2323 now
.tv_sec
- req
->r_start
));
2325 finish_asyncio
= ((req
->r_callback
.rcb_func
!= NULL
) && !(req
->r_flags
& R_WAITSENT
));
2327 lck_mtx_unlock(&req
->r_mtx
);
2329 nfs_asyncio_finish(req
);
2333 /* for TCP, only resend if explicitly requested */
2334 if ((nmp
->nm_sotype
== SOCK_STREAM
) && !(req
->r_flags
& R_MUSTRESEND
)) {
2335 if (++req
->r_rexmit
> NFS_MAXREXMIT
)
2336 req
->r_rexmit
= NFS_MAXREXMIT
;
2338 lck_mtx_unlock(&req
->r_mtx
);
2343 * The request needs to be (re)sent. Kick the requester to resend it.
2344 * (unless it's already marked as needing a resend)
2346 if ((req
->r_flags
& R_MUSTRESEND
) && (req
->r_rtt
== -1)) {
2347 lck_mtx_unlock(&req
->r_mtx
);
2350 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
2351 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
2352 req
->r_flags
|= R_MUSTRESEND
;
2355 if ((req
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
)) == R_ASYNC
)
2356 nfs_asyncio_resend(req
);
2357 lck_mtx_unlock(&req
->r_mtx
);
2360 lck_mtx_unlock(nfs_request_mutex
);
2362 /* poke any sockets */
2363 while ((nmp
= TAILQ_FIRST(&nfs_mount_poke_queue
))) {
2364 TAILQ_REMOVE(&nfs_mount_poke_queue
, nmp
, nm_pokeq
);
2366 lck_mtx_lock(&nmp
->nm_lock
);
2367 nmp
->nm_sockflags
&= ~NMSOCK_POKE
;
2368 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
))
2369 wakeup(&nmp
->nm_sockflags
);
2370 lck_mtx_unlock(&nmp
->nm_lock
);
2373 nfs_interval_timer_start(nfs_request_timer_call
, NFS_REQUESTDELAY
);
2377 * check a thread's proc for the "noremotehang" flag.
2380 nfs_noremotehang(thread_t thd
)
2382 proc_t p
= thd
? get_bsdthreadtask_info(thd
) : NULL
;
2383 return (p
&& proc_noremotehang(p
));
2387 * Test for a termination condition pending on the process.
2388 * This is used to determine if we need to bail on a mount.
2389 * ETIMEDOUT is returned if there has been a soft timeout.
2390 * EINTR is returned if there is a signal pending that is not being ignored
2391 * and the mount is interruptable, or if we are a thread that is in the process
2392 * of cancellation (also SIGKILL posted).
2395 nfs_sigintr(struct nfsmount
*nmp
, struct nfsreq
*req
, thread_t thd
, int nmplocked
)
2402 if (req
&& (req
->r_flags
& R_SOFTTERM
))
2403 return (ETIMEDOUT
); /* request has been terminated. */
2406 * If we're in the progress of a force unmount and there's
2407 * been a timeout, we're dead and fail IO.
2410 lck_mtx_lock(&nmp
->nm_lock
);
2411 if ((nmp
->nm_state
& NFSSTA_FORCE
) &&
2412 (nmp
->nm_state
& (NFSSTA_TIMEO
|NFSSTA_JUKEBOXTIMEO
|NFSSTA_LOCKTIMEO
))) {
2414 } else if (nmp
->nm_mountp
->mnt_kern_flag
& MNTK_FRCUNMOUNT
) {
2415 /* Someone is unmounting us, go soft and mark it. */
2416 nmp
->nm_flag
|= NFSMNT_SOFT
;
2417 nmp
->nm_state
|= NFSSTA_FORCE
;
2421 * If the mount is hung and we've requested not to hang
2422 * on remote filesystems, then bail now.
2424 if (!error
&& (nmp
->nm_state
& NFSSTA_TIMEO
) && nfs_noremotehang(thd
))
2428 lck_mtx_unlock(&nmp
->nm_lock
);
2432 /* may not have a thread for async I/O */
2436 /* If this thread belongs to kernel task; then abort check is not needed */
2437 if ((current_proc() != kernproc
) && current_thread_aborted())
2440 /* mask off thread and process blocked signals. */
2441 if ((nmp
->nm_flag
& NFSMNT_INT
) &&
2442 proc_pendingsignals(get_bsdthreadtask_info(thd
), NFSINT_SIGMASK
))
2448 * Lock a socket against others.
2449 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2450 * and also to avoid race conditions between the processes with nfs requests
2451 * in progress when a reconnect is necessary.
2454 nfs_sndlock(struct nfsreq
*req
)
2456 struct nfsmount
*nmp
= req
->r_nmp
;
2458 int error
= 0, slpflag
= 0;
2459 struct timespec ts
= { 0, 0 };
2464 lck_mtx_lock(&nmp
->nm_lock
);
2465 statep
= &nmp
->nm_state
;
2467 if ((nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
)
2469 while (*statep
& NFSSTA_SNDLOCK
) {
2470 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 1)))
2472 *statep
|= NFSSTA_WANTSND
;
2473 if (nfs_noremotehang(req
->r_thread
))
2475 msleep(statep
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfsndlck", &ts
);
2476 if (slpflag
== PCATCH
) {
2482 *statep
|= NFSSTA_SNDLOCK
;
2483 lck_mtx_unlock(&nmp
->nm_lock
);
2488 * Unlock the stream socket for others.
2491 nfs_sndunlock(struct nfsreq
*req
)
2493 struct nfsmount
*nmp
= req
->r_nmp
;
2494 int *statep
, wake
= 0;
2498 lck_mtx_lock(&nmp
->nm_lock
);
2499 statep
= &nmp
->nm_state
;
2500 if ((*statep
& NFSSTA_SNDLOCK
) == 0)
2501 panic("nfs sndunlock");
2502 *statep
&= ~NFSSTA_SNDLOCK
;
2503 if (*statep
& NFSSTA_WANTSND
) {
2504 *statep
&= ~NFSSTA_WANTSND
;
2507 lck_mtx_unlock(&nmp
->nm_lock
);
2512 #endif /* NFSCLIENT */
2517 * Generate the rpc reply header
2518 * siz arg. is used to decide if adding a cluster is worthwhile
2522 struct nfsrv_descript
*nd
,
2523 __unused
struct nfsrv_sock
*slp
,
2524 struct nfsm_chain
*nmrepp
,
2529 struct nfsm_chain nmrep
;
2532 err
= nd
->nd_repstat
;
2533 if (err
&& (nd
->nd_vers
== NFS_VER2
))
2537 * If this is a big reply, use a cluster else
2538 * try and leave leading space for the lower level headers.
2540 siz
+= RPC_REPLYSIZ
;
2541 if (siz
>= nfs_mbuf_minclsize
) {
2542 error
= mbuf_getpacket(MBUF_WAITOK
, &mrep
);
2544 error
= mbuf_gethdr(MBUF_WAITOK
, MBUF_TYPE_DATA
, &mrep
);
2547 /* unable to allocate packet */
2548 /* XXX should we keep statistics for these errors? */
2551 if (siz
< nfs_mbuf_minclsize
) {
2552 /* leave space for lower level headers */
2553 tl
= mbuf_data(mrep
);
2554 tl
+= 80/sizeof(*tl
); /* XXX max_hdr? XXX */
2555 mbuf_setdata(mrep
, tl
, 6 * NFSX_UNSIGNED
);
2557 nfsm_chain_init(&nmrep
, mrep
);
2558 nfsm_chain_add_32(error
, &nmrep
, nd
->nd_retxid
);
2559 nfsm_chain_add_32(error
, &nmrep
, RPC_REPLY
);
2560 if (err
== ERPCMISMATCH
|| (err
& NFSERR_AUTHERR
)) {
2561 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGDENIED
);
2562 if (err
& NFSERR_AUTHERR
) {
2563 nfsm_chain_add_32(error
, &nmrep
, RPC_AUTHERR
);
2564 nfsm_chain_add_32(error
, &nmrep
, (err
& ~NFSERR_AUTHERR
));
2566 nfsm_chain_add_32(error
, &nmrep
, RPC_MISMATCH
);
2567 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
2568 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
2572 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGACCEPTED
);
2573 if (nd
->nd_gss_context
!= NULL
) {
2574 /* RPCSEC_GSS verifier */
2575 error
= nfs_gss_svc_verf_put(nd
, &nmrep
);
2577 nfsm_chain_add_32(error
, &nmrep
, RPC_SYSTEM_ERR
);
2581 /* RPCAUTH_NULL verifier */
2582 nfsm_chain_add_32(error
, &nmrep
, RPCAUTH_NULL
);
2583 nfsm_chain_add_32(error
, &nmrep
, 0);
2585 /* accepted status */
2588 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGUNAVAIL
);
2591 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGMISMATCH
);
2592 /* XXX hard coded versions? */
2593 nfsm_chain_add_32(error
, &nmrep
, NFS_VER2
);
2594 nfsm_chain_add_32(error
, &nmrep
, NFS_VER3
);
2597 nfsm_chain_add_32(error
, &nmrep
, RPC_PROCUNAVAIL
);
2600 nfsm_chain_add_32(error
, &nmrep
, RPC_GARBAGE
);
2603 nfsm_chain_add_32(error
, &nmrep
, RPC_SUCCESS
);
2604 if (nd
->nd_gss_context
!= NULL
)
2605 error
= nfs_gss_svc_prepare_reply(nd
, &nmrep
);
2606 if (err
!= NFSERR_RETVOID
)
2607 nfsm_chain_add_32(error
, &nmrep
,
2608 (err
? nfsrv_errmap(nd
, err
) : 0));
2614 nfsm_chain_build_done(error
, &nmrep
);
2616 /* error composing reply header */
2617 /* XXX should we keep statistics for these errors? */
2623 if ((err
!= 0) && (err
!= NFSERR_RETVOID
))
2624 OSAddAtomic(1, (SInt32
*)&nfsstats
.srvrpc_errs
);
2629 * The nfs server send routine.
2631 * - return EINTR or ERESTART if interrupted by a signal
2632 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
2633 * - do any cleanup required by recoverable socket errors (???)
2636 nfsrv_send(struct nfsrv_sock
*slp
, mbuf_t nam
, mbuf_t top
)
2639 socket_t so
= slp
->ns_so
;
2640 struct sockaddr
*sendnam
;
2643 bzero(&msg
, sizeof(msg
));
2644 if (nam
&& !sock_isconnected(so
) && (slp
->ns_sotype
!= SOCK_STREAM
)) {
2645 if ((sendnam
= mbuf_data(nam
))) {
2646 msg
.msg_name
= (caddr_t
)sendnam
;
2647 msg
.msg_namelen
= sendnam
->sa_len
;
2650 error
= sock_sendmbuf(so
, &msg
, top
, 0, NULL
);
2653 log(LOG_INFO
, "nfsd send error %d\n", error
);
2655 if ((error
== EWOULDBLOCK
) && (slp
->ns_sotype
== SOCK_STREAM
))
2656 error
= EPIPE
; /* zap TCP sockets if they time out on send */
2658 /* Handle any recoverable (soft) socket errors here. (???) */
2659 if (error
!= EINTR
&& error
!= ERESTART
&& error
!= EIO
&&
2660 error
!= EWOULDBLOCK
&& error
!= EPIPE
)
2667 * Socket upcall routine for the nfsd sockets.
2668 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
2669 * Essentially do as much as possible non-blocking, else punt and it will
2670 * be called with MBUF_WAITOK from an nfsd.
2673 nfsrv_rcv(socket_t so
, caddr_t arg
, int waitflag
)
2675 struct nfsrv_sock
*slp
= (struct nfsrv_sock
*)arg
;
2677 if (!nfsd_thread_count
|| !(slp
->ns_flag
& SLP_VALID
))
2680 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
2681 nfsrv_rcv_locked(so
, slp
, waitflag
);
2682 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
2685 nfsrv_rcv_locked(socket_t so
, struct nfsrv_sock
*slp
, int waitflag
)
2687 mbuf_t m
, mp
, mhck
, m2
;
2688 int ns_flag
=0, error
;
2692 if ((slp
->ns_flag
& SLP_VALID
) == 0) {
2693 if (waitflag
== MBUF_DONTWAIT
)
2694 lck_rw_done(&slp
->ns_rwlock
);
2700 * Define this to test for nfsds handling this under heavy load.
2702 if (waitflag
== MBUF_DONTWAIT
) {
2703 ns_flag
= SLP_NEEDQ
;
2707 if (slp
->ns_sotype
== SOCK_STREAM
) {
2709 * If there are already records on the queue, defer soreceive()
2710 * to an nfsd so that there is feedback to the TCP layer that
2711 * the nfs servers are heavily loaded.
2713 if (slp
->ns_rec
&& waitflag
== MBUF_DONTWAIT
) {
2714 ns_flag
= SLP_NEEDQ
;
2721 bytes_read
= 1000000000;
2722 error
= sock_receivembuf(so
, NULL
, &mp
, MSG_DONTWAIT
, &bytes_read
);
2723 if (error
|| mp
== NULL
) {
2724 if (error
== EWOULDBLOCK
)
2725 ns_flag
= (waitflag
== MBUF_DONTWAIT
) ? SLP_NEEDQ
: 0;
2727 ns_flag
= SLP_DISCONN
;
2731 if (slp
->ns_rawend
) {
2732 if ((error
= mbuf_setnext(slp
->ns_rawend
, m
)))
2733 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error
);
2734 slp
->ns_cc
+= bytes_read
;
2737 slp
->ns_cc
= bytes_read
;
2739 while ((m2
= mbuf_next(m
)))
2744 * Now try and parse record(s) out of the raw stream data.
2746 error
= nfsrv_getstream(slp
, waitflag
);
2749 ns_flag
= SLP_DISCONN
;
2751 ns_flag
= SLP_NEEDQ
;
2754 struct sockaddr_storage nam
;
2756 if (slp
->ns_reccnt
>= nfsrv_sock_max_rec_queue_length
) {
2757 /* already have max # RPC records queued on this socket */
2758 ns_flag
= SLP_NEEDQ
;
2762 bzero(&msg
, sizeof(msg
));
2763 msg
.msg_name
= (caddr_t
)&nam
;
2764 msg
.msg_namelen
= sizeof(nam
);
2767 bytes_read
= 1000000000;
2768 error
= sock_receivembuf(so
, &msg
, &mp
, MSG_DONTWAIT
| MSG_NEEDSA
, &bytes_read
);
2770 if (msg
.msg_name
&& (mbuf_get(MBUF_WAITOK
, MBUF_TYPE_SONAME
, &mhck
) == 0)) {
2771 mbuf_setlen(mhck
, nam
.ss_len
);
2772 bcopy(&nam
, mbuf_data(mhck
), nam
.ss_len
);
2774 if (mbuf_setnext(m
, mp
)) {
2775 /* trouble... just drop it */
2776 printf("nfsrv_rcv: mbuf_setnext failed\n");
2784 mbuf_setnextpkt(slp
->ns_recend
, m
);
2787 slp
->ns_flag
|= SLP_DOREC
;
2790 mbuf_setnextpkt(m
, NULL
);
2797 * Now try and process the request records, non-blocking.
2801 slp
->ns_flag
|= ns_flag
;
2802 if (waitflag
== MBUF_DONTWAIT
) {
2803 int wake
= (slp
->ns_flag
& SLP_WORKTODO
);
2804 lck_rw_done(&slp
->ns_rwlock
);
2805 if (wake
&& nfsd_thread_count
) {
2806 lck_mtx_lock(nfsd_mutex
);
2807 nfsrv_wakenfsd(slp
);
2808 lck_mtx_unlock(nfsd_mutex
);
2814 * Try and extract an RPC request from the mbuf data list received on a
2815 * stream socket. The "waitflag" argument indicates whether or not it
2819 nfsrv_getstream(struct nfsrv_sock
*slp
, int waitflag
)
2822 char *cp1
, *cp2
, *mdata
;
2823 int len
, mlen
, error
;
2824 mbuf_t om
, m2
, recm
;
2827 if (slp
->ns_flag
& SLP_GETSTREAM
)
2828 panic("nfs getstream");
2829 slp
->ns_flag
|= SLP_GETSTREAM
;
2831 if (slp
->ns_reclen
== 0) {
2832 if (slp
->ns_cc
< NFSX_UNSIGNED
) {
2833 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2837 mdata
= mbuf_data(m
);
2839 if (mlen
>= NFSX_UNSIGNED
) {
2840 bcopy(mdata
, (caddr_t
)&recmark
, NFSX_UNSIGNED
);
2841 mdata
+= NFSX_UNSIGNED
;
2842 mlen
-= NFSX_UNSIGNED
;
2843 mbuf_setdata(m
, mdata
, mlen
);
2845 cp1
= (caddr_t
)&recmark
;
2847 while (cp1
< ((caddr_t
)&recmark
) + NFSX_UNSIGNED
) {
2855 mbuf_setdata(m
, cp2
, mlen
);
2858 slp
->ns_cc
-= NFSX_UNSIGNED
;
2859 recmark
= ntohl(recmark
);
2860 slp
->ns_reclen
= recmark
& ~0x80000000;
2861 if (recmark
& 0x80000000)
2862 slp
->ns_flag
|= SLP_LASTFRAG
;
2864 slp
->ns_flag
&= ~SLP_LASTFRAG
;
2865 if (slp
->ns_reclen
< NFS_MINPACKET
|| slp
->ns_reclen
> NFS_MAXPACKET
) {
2866 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2872 * Now get the record part.
2874 * Note that slp->ns_reclen may be 0. Linux sometimes
2875 * generates 0-length RPCs
2878 if (slp
->ns_cc
== slp
->ns_reclen
) {
2880 slp
->ns_raw
= slp
->ns_rawend
= NULL
;
2881 slp
->ns_cc
= slp
->ns_reclen
= 0;
2882 } else if (slp
->ns_cc
> slp
->ns_reclen
) {
2886 mdata
= mbuf_data(m
);
2888 while (len
< slp
->ns_reclen
) {
2889 if ((len
+ mlen
) > slp
->ns_reclen
) {
2890 if (mbuf_copym(m
, 0, slp
->ns_reclen
- len
, waitflag
, &m2
)) {
2891 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2892 return (EWOULDBLOCK
);
2895 if (mbuf_setnext(om
, m2
)) {
2896 /* trouble... just drop it */
2897 printf("nfsrv_getstream: mbuf_setnext failed\n");
2899 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2900 return (EWOULDBLOCK
);
2906 mdata
+= slp
->ns_reclen
- len
;
2907 mlen
-= slp
->ns_reclen
- len
;
2908 mbuf_setdata(m
, mdata
, mlen
);
2909 len
= slp
->ns_reclen
;
2910 } else if ((len
+ mlen
) == slp
->ns_reclen
) {
2915 if (mbuf_setnext(om
, NULL
)) {
2916 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
2917 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2918 return (EWOULDBLOCK
);
2921 mdata
= mbuf_data(m
);
2927 mdata
= mbuf_data(m
);
2934 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2939 * Accumulate the fragments into a record.
2941 if (slp
->ns_frag
== NULL
) {
2942 slp
->ns_frag
= recm
;
2945 while ((m2
= mbuf_next(m
)))
2947 if ((error
= mbuf_setnext(m
, recm
)))
2948 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error
);
2950 if (slp
->ns_flag
& SLP_LASTFRAG
) {
2952 mbuf_setnextpkt(slp
->ns_recend
, slp
->ns_frag
);
2954 slp
->ns_rec
= slp
->ns_frag
;
2955 slp
->ns_flag
|= SLP_DOREC
;
2957 slp
->ns_recend
= slp
->ns_frag
;
2958 slp
->ns_frag
= NULL
;
2964 * Parse an RPC header.
2968 struct nfsrv_sock
*slp
,
2970 struct nfsrv_descript
**ndp
)
2974 struct nfsrv_descript
*nd
;
2978 if (!(slp
->ns_flag
& (SLP_VALID
|SLP_DOREC
)) || (slp
->ns_rec
== NULL
))
2980 MALLOC_ZONE(nd
, struct nfsrv_descript
*,
2981 sizeof (struct nfsrv_descript
), M_NFSRVDESC
, M_WAITOK
);
2985 slp
->ns_rec
= mbuf_nextpkt(m
);
2987 mbuf_setnextpkt(m
, NULL
);
2989 slp
->ns_flag
&= ~SLP_DOREC
;
2990 slp
->ns_recend
= NULL
;
2993 if (mbuf_type(m
) == MBUF_TYPE_SONAME
) {
2996 if ((error
= mbuf_setnext(nam
, NULL
)))
2997 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error
);
3001 nfsm_chain_dissect_init(error
, &nd
->nd_nmreq
, m
);
3003 error
= nfsrv_getreq(nd
);
3007 FREE_ZONE(nd
, sizeof(*nd
), M_NFSRVDESC
);
3017 * Parse an RPC request
3019 * - fill in the cred struct.
3022 nfsrv_getreq(struct nfsrv_descript
*nd
)
3024 struct nfsm_chain
*nmreq
;
3026 u_long nfsvers
, auth_type
;
3031 struct ucred temp_cred
;
3035 nd
->nd_gss_context
= NULL
;
3036 nd
->nd_gss_seqnum
= 0;
3037 nd
->nd_gss_mb
= NULL
;
3039 user_id
= group_id
= -2;
3040 val
= auth_type
= len
= 0;
3042 nmreq
= &nd
->nd_nmreq
;
3043 nfsm_chain_get_32(error
, nmreq
, nd
->nd_retxid
); // XID
3044 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Call
3045 if (!error
&& (val
!= RPC_CALL
))
3049 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Version
3051 if (val
!= RPC_VER2
) {
3052 nd
->nd_repstat
= ERPCMISMATCH
;
3053 nd
->nd_procnum
= NFSPROC_NOOP
;
3056 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Program Number
3058 if (val
!= NFS_PROG
) {
3059 nd
->nd_repstat
= EPROGUNAVAIL
;
3060 nd
->nd_procnum
= NFSPROC_NOOP
;
3063 nfsm_chain_get_32(error
, nmreq
, nfsvers
);// NFS Version Number
3065 if ((nfsvers
< NFS_VER2
) || (nfsvers
> NFS_VER3
)) {
3066 nd
->nd_repstat
= EPROGMISMATCH
;
3067 nd
->nd_procnum
= NFSPROC_NOOP
;
3070 nd
->nd_vers
= nfsvers
;
3071 nfsm_chain_get_32(error
, nmreq
, nd
->nd_procnum
);// NFS Procedure Number
3073 if ((nd
->nd_procnum
>= NFS_NPROCS
) ||
3074 ((nd
->nd_vers
== NFS_VER2
) && (nd
->nd_procnum
> NFSV2PROC_STATFS
))) {
3075 nd
->nd_repstat
= EPROCUNAVAIL
;
3076 nd
->nd_procnum
= NFSPROC_NOOP
;
3079 if (nfsvers
!= NFS_VER3
)
3080 nd
->nd_procnum
= nfsv3_procid
[nd
->nd_procnum
];
3081 nfsm_chain_get_32(error
, nmreq
, auth_type
); // Auth Flavor
3082 nfsm_chain_get_32(error
, nmreq
, len
); // Auth Length
3083 if (!error
&& (len
< 0 || len
> RPCAUTH_MAXSIZ
))
3087 /* Handle authentication */
3088 if (auth_type
== RPCAUTH_UNIX
) {
3089 if (nd
->nd_procnum
== NFSPROC_NULL
)
3091 nd
->nd_sec
= RPCAUTH_UNIX
;
3092 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
); // skip stamp
3093 nfsm_chain_get_32(error
, nmreq
, len
); // hostname length
3094 if (len
< 0 || len
> NFS_MAXNAMLEN
)
3096 nfsm_chain_adv(error
, nmreq
, nfsm_rndup(len
)); // skip hostname
3099 /* create a temporary credential using the bits from the wire */
3100 bzero(&temp_cred
, sizeof(temp_cred
));
3101 nfsm_chain_get_32(error
, nmreq
, user_id
);
3102 nfsm_chain_get_32(error
, nmreq
, group_id
);
3103 temp_cred
.cr_groups
[0] = group_id
;
3104 nfsm_chain_get_32(error
, nmreq
, len
); // extra GID count
3105 if ((len
< 0) || (len
> RPCAUTH_UNIXGIDS
))
3108 for (i
= 1; i
<= len
; i
++)
3110 nfsm_chain_get_32(error
, nmreq
, temp_cred
.cr_groups
[i
]);
3112 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
);
3114 ngroups
= (len
>= NGROUPS
) ? NGROUPS
: (len
+ 1);
3116 nfsrv_group_sort(&temp_cred
.cr_groups
[0], ngroups
);
3117 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
); // verifier flavor (should be AUTH_NONE)
3118 nfsm_chain_get_32(error
, nmreq
, len
); // verifier length
3119 if (len
< 0 || len
> RPCAUTH_MAXSIZ
)
3122 nfsm_chain_adv(error
, nmreq
, nfsm_rndup(len
));
3124 /* request creation of a real credential */
3125 temp_cred
.cr_uid
= user_id
;
3126 temp_cred
.cr_ngroups
= ngroups
;
3127 nd
->nd_cr
= kauth_cred_create(&temp_cred
);
3128 if (nd
->nd_cr
== NULL
) {
3129 nd
->nd_repstat
= ENOMEM
;
3130 nd
->nd_procnum
= NFSPROC_NOOP
;
3133 } else if (auth_type
== RPCSEC_GSS
) {
3134 error
= nfs_gss_svc_cred_get(nd
, nmreq
);
3136 if (error
== EINVAL
)
3137 goto nfsmout
; // drop the request
3138 nd
->nd_repstat
= error
;
3139 nd
->nd_procnum
= NFSPROC_NOOP
;
3143 if (nd
->nd_procnum
== NFSPROC_NULL
) // assume it's AUTH_NONE
3145 nd
->nd_repstat
= (NFSERR_AUTHERR
| AUTH_REJECTCRED
);
3146 nd
->nd_procnum
= NFSPROC_NOOP
;
3151 if (IS_VALID_CRED(nd
->nd_cr
))
3152 kauth_cred_unref(&nd
->nd_cr
);
3153 nfsm_chain_cleanup(nmreq
);
3158 * Search for a sleeping nfsd and wake it up.
3159 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
3160 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
3161 * Note: Must be called with nfsd_mutex held.
3164 nfsrv_wakenfsd(struct nfsrv_sock
*slp
)
3168 if ((slp
->ns_flag
& SLP_VALID
) == 0)
3171 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
3172 /* if there's work to do on this socket, make sure it's queued up */
3173 if ((slp
->ns_flag
& SLP_WORKTODO
) && !(slp
->ns_flag
& SLP_QUEUED
)) {
3174 TAILQ_INSERT_TAIL(&nfsrv_sockwait
, slp
, ns_svcq
);
3175 slp
->ns_flag
|= SLP_WAITQ
;
3177 lck_rw_done(&slp
->ns_rwlock
);
3179 /* wake up a waiting nfsd, if possible */
3180 nd
= TAILQ_FIRST(&nfsd_queue
);
3184 TAILQ_REMOVE(&nfsd_queue
, nd
, nfsd_queue
);
3185 nd
->nfsd_flag
&= ~NFSD_WAITING
;
3189 #endif /* NFSSERVER */
3192 nfs_msg(thread_t thd
,
3197 proc_t p
= thd
? get_bsdthreadtask_info(thd
) : NULL
;
3201 tpr
= tprintf_open(p
);
3205 tprintf(tpr
, "nfs server %s: %s, error %d\n", server
, msg
, error
);
3207 tprintf(tpr
, "nfs server %s: %s\n", server
, msg
);
3213 nfs_down(struct nfsmount
*nmp
, thread_t thd
, int error
, int flags
, const char *msg
)
3220 lck_mtx_lock(&nmp
->nm_lock
);
3221 ostate
= nmp
->nm_state
;
3222 if ((flags
& NFSSTA_TIMEO
) && !(ostate
& NFSSTA_TIMEO
))
3223 nmp
->nm_state
|= NFSSTA_TIMEO
;
3224 if ((flags
& NFSSTA_LOCKTIMEO
) && !(ostate
& NFSSTA_LOCKTIMEO
))
3225 nmp
->nm_state
|= NFSSTA_LOCKTIMEO
;
3226 if ((flags
& NFSSTA_JUKEBOXTIMEO
) && !(ostate
& NFSSTA_JUKEBOXTIMEO
))
3227 nmp
->nm_state
|= NFSSTA_JUKEBOXTIMEO
;
3228 lck_mtx_unlock(&nmp
->nm_lock
);
3230 if (!(ostate
& (NFSSTA_TIMEO
|NFSSTA_LOCKTIMEO
|NFSSTA_JUKEBOXTIMEO
)))
3231 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, VQ_NOTRESP
, 0);
3233 nfs_msg(thd
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, msg
, error
);
3237 nfs_up(struct nfsmount
*nmp
, thread_t thd
, int flags
, const char *msg
)
3245 nfs_msg(thd
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, msg
, 0);
3247 lck_mtx_lock(&nmp
->nm_lock
);
3248 ostate
= nmp
->nm_state
;
3249 if ((flags
& NFSSTA_TIMEO
) && (ostate
& NFSSTA_TIMEO
))
3250 nmp
->nm_state
&= ~NFSSTA_TIMEO
;
3251 if ((flags
& NFSSTA_LOCKTIMEO
) && (ostate
& NFSSTA_LOCKTIMEO
))
3252 nmp
->nm_state
&= ~NFSSTA_LOCKTIMEO
;
3253 if ((flags
& NFSSTA_JUKEBOXTIMEO
) && (ostate
& NFSSTA_JUKEBOXTIMEO
))
3254 nmp
->nm_state
&= ~NFSSTA_JUKEBOXTIMEO
;
3255 state
= nmp
->nm_state
;
3256 lck_mtx_unlock(&nmp
->nm_lock
);
3258 if ((ostate
& (NFSSTA_TIMEO
|NFSSTA_LOCKTIMEO
|NFSSTA_JUKEBOXTIMEO
)) &&
3259 !(state
& (NFSSTA_TIMEO
|NFSSTA_LOCKTIMEO
|NFSSTA_JUKEBOXTIMEO
)))
3260 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, VQ_NOTRESP
, 1);