2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
69 * Socket operations for use by nfs
72 #include <sys/param.h>
73 #include <sys/systm.h>
75 #include <sys/kauth.h>
76 #include <sys/mount_internal.h>
77 #include <sys/kernel.h>
78 #include <sys/kpi_mbuf.h>
79 #include <sys/malloc.h>
80 #include <sys/vnode.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/socket.h>
84 #include <sys/syslog.h>
85 #include <sys/tprintf.h>
86 #include <libkern/OSAtomic.h>
89 #include <kern/clock.h>
90 #include <kern/task.h>
91 #include <kern/thread.h>
92 #include <kern/thread_call.h>
95 #include <netinet/in.h>
96 #include <netinet/tcp.h>
98 #include <nfs/rpcv2.h>
99 #include <nfs/nfsproto.h>
101 #include <nfs/xdr_subs.h>
102 #include <nfs/nfsm_subs.h>
103 #include <nfs/nfs_gss.h>
104 #include <nfs/nfsmount.h>
105 #include <nfs/nfsnode.h>
108 boolean_t
current_thread_aborted(void);
109 kern_return_t
thread_terminate(thread_t
);
113 int nfsrv_sock_max_rec_queue_length
= 128; /* max # RPC records queued on (UDP) socket */
115 int nfsrv_getstream(struct nfsrv_sock
*,int);
116 int nfsrv_getreq(struct nfsrv_descript
*);
117 extern int nfsv3_procid
[NFS_NPROCS
];
118 #endif /* NFSSERVER */
122 int nfs_reconnect(struct nfsmount
*);
123 int nfs_connect_setup(struct nfsmount
*);
124 void nfs_mount_sock_thread(void *, wait_result_t
);
125 void nfs_udp_rcv(socket_t
, void*, int);
126 void nfs_tcp_rcv(socket_t
, void*, int);
127 void nfs_sock_poke(struct nfsmount
*);
128 void nfs_request_match_reply(struct nfsmount
*, mbuf_t
);
129 void nfs_reqdequeue(struct nfsreq
*);
130 void nfs_reqbusy(struct nfsreq
*);
131 struct nfsreq
*nfs_reqnext(struct nfsreq
*);
132 int nfs_wait_reply(struct nfsreq
*);
133 void nfs_softterm(struct nfsreq
*);
135 #ifdef NFS_SOCKET_DEBUGGING
136 #define NFS_SOCK_DBG(X) printf X
138 #define NFS_SOCK_DBG(X)
142 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
143 * Use the mean and mean deviation of rtt for the appropriate type of rpc
144 * for the frequent rpcs and a default for the others.
145 * The justification for doing "other" this way is that these rpcs
146 * happen so infrequently that timer est. would probably be stale.
147 * Also, since many of these rpcs are
148 * non-idempotent, a conservative timeout is desired.
149 * getattr, lookup - A+2D
153 #define NFS_RTO(n, t) \
154 ((t) == 0 ? (n)->nm_timeo : \
156 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
157 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
158 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
159 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
162 * Defines which timer to use for the procnum.
169 static int proct
[NFS_NPROCS
] = {
170 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
174 * There is a congestion window for outstanding rpcs maintained per mount
175 * point. The cwnd size is adjusted in roughly the way that:
176 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
177 * SIGCOMM '88". ACM, August 1988.
178 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
179 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
180 * of rpcs is in progress.
181 * (The sent count and cwnd are scaled for integer arith.)
182 * Variants of "slow start" were tried and were found to be too much of a
183 * performance hit (ave. rtt 3 times larger),
184 * I suspect due to the large rtt that nfs rpcs have.
186 #define NFS_CWNDSCALE 256
187 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
188 static int nfs_backoff
[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
191 * Initialize socket state and perform setup for a new NFS connection.
194 nfs_connect(struct nfsmount
*nmp
, int verbose
)
197 int error
, on
= 1, proto
;
199 struct sockaddr
*saddr
;
200 struct sockaddr_in sin
;
201 struct timeval timeo
;
203 lck_mtx_lock(&nmp
->nm_lock
);
204 nmp
->nm_sockflags
|= NMSOCK_CONNECTING
;
205 saddr
= mbuf_data(nmp
->nm_nam
);
206 upcall
= (nmp
->nm_sotype
== SOCK_STREAM
) ? nfs_tcp_rcv
: nfs_udp_rcv
;
207 lck_mtx_unlock(&nmp
->nm_lock
);
208 error
= sock_socket(saddr
->sa_family
, nmp
->nm_sotype
,
209 nmp
->nm_soproto
, upcall
, nmp
, &nmp
->nm_so
);
212 lck_mtx_lock(&nmp
->nm_lock
);
216 * Some servers require that the client port be a reserved port number.
218 if (saddr
->sa_family
== AF_INET
&& (nmp
->nm_flag
& NFSMNT_RESVPORT
)) {
219 int portrange
= IP_PORTRANGE_LOW
;
220 error
= sock_setsockopt(so
, IPPROTO_IP
, IP_PORTRANGE
, &portrange
, sizeof(portrange
));
221 if (!error
) { /* bind now to check for failure */
222 sin
.sin_len
= sizeof (struct sockaddr_in
);
223 sin
.sin_family
= AF_INET
;
224 sin
.sin_addr
.s_addr
= INADDR_ANY
;
226 error
= sock_bind(so
, (struct sockaddr
*) &sin
);
229 lck_mtx_unlock(&nmp
->nm_lock
);
235 * Protocols that do not require connections may be optionally left
236 * unconnected for servers that reply from a different address/port.
238 if (nmp
->nm_flag
& NFSMNT_NOCONN
) {
239 if (nmp
->nm_sotype
== SOCK_STREAM
) {
241 lck_mtx_unlock(&nmp
->nm_lock
);
245 int tocnt
= 0, optlen
= sizeof(error
);
246 struct timespec ts
= { 1, 0 };
248 lck_mtx_unlock(&nmp
->nm_lock
);
249 error
= sock_connect(so
, mbuf_data(nmp
->nm_nam
), MSG_DONTWAIT
);
250 if (error
&& (error
!= EINPROGRESS
))
252 lck_mtx_lock(&nmp
->nm_lock
);
253 while (!sock_isconnected(so
)) {
254 nfs_mount_check_dead_timeout(nmp
);
255 if ((tocnt
++ == 30) && verbose
) /* log a warning if connect is taking a while */
256 log(LOG_INFO
, "nfs_connect: socket connect taking a while for %s\n",
257 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
258 /* check for error on socket */
259 sock_getsockopt(so
, SOL_SOCKET
, SO_ERROR
, &error
, &optlen
);
262 log(LOG_INFO
, "nfs_connect: socket error %d for %s\n",
263 error
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
266 /* abort if this is taking too long or we're unmounting */
267 if ((tocnt
> 120) || (nmp
->nm_sockflags
& NMSOCK_UNMOUNT
)) {
271 if ((error
= nfs_sigintr(nmp
, NULL
, current_thread(), 1)))
273 msleep(&nmp
->nm_so
, &nmp
->nm_lock
, PSOCK
, "nfs_socket_connect", &ts
);
275 if ((tocnt
> 30) && verbose
)
276 log(LOG_INFO
, "nfs_connect: socket connect %s for %s\n",
277 error
? "aborted" : "completed",
278 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
280 lck_mtx_unlock(&nmp
->nm_lock
);
286 * Set socket send/receive timeouts
287 * - Receive timeout shouldn't matter because all receives are performed
288 * in the socket upcall non-blocking.
289 * - Send timeout should allow us to react to a blocked socket.
290 * Soft mounts will want to abort sooner.
293 timeo
.tv_sec
= (nmp
->nm_flag
& NFSMNT_SOFT
) ? 10 : 60;
294 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
295 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
297 log(LOG_INFO
, "nfs_connect: socket timeout setting errors for %s\n",
298 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
302 if (nmp
->nm_sotype
== SOCK_STREAM
) {
303 /* Assume that SOCK_STREAM always requires a connection */
304 sock_setsockopt(so
, SOL_SOCKET
, SO_KEEPALIVE
, &on
, sizeof(on
));
305 /* set nodelay for TCP */
306 sock_gettype(so
, NULL
, NULL
, &proto
);
307 if (proto
== IPPROTO_TCP
)
308 sock_setsockopt(so
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
311 if (nmp
->nm_sotype
== SOCK_DGRAM
) { /* set socket buffer sizes for UDP */
312 int reserve
= NFS_UDPSOCKBUF
;
313 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDBUF
, &reserve
, sizeof(reserve
));
314 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVBUF
, &reserve
, sizeof(reserve
));
316 log(LOG_INFO
, "nfs_connect: socket buffer setting errors for %s\n",
317 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
322 /* set SO_NOADDRERR to detect network changes ASAP */
323 error
= sock_setsockopt(so
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
));
325 lck_mtx_unlock(&nmp
->nm_lock
);
328 /* just playin' it safe */
329 sock_setsockopt(so
, SOL_SOCKET
, SO_UPCALLCLOSEWAIT
, &on
, sizeof(on
));
331 if (!(nmp
->nm_flag
& NFSMNT_INT
))
332 sock_nointerrupt(so
, 1);
334 /* Initialize socket state variables */
335 nmp
->nm_srtt
[0] = nmp
->nm_srtt
[1] = nmp
->nm_srtt
[2] =
336 nmp
->nm_srtt
[3] = (NFS_TIMEO
<< 3);
337 nmp
->nm_sdrtt
[0] = nmp
->nm_sdrtt
[1] = nmp
->nm_sdrtt
[2] =
338 nmp
->nm_sdrtt
[3] = 0;
339 if (nmp
->nm_sotype
== SOCK_DGRAM
) {
340 /* XXX do we really want to reset this on each reconnect? */
341 nmp
->nm_cwnd
= NFS_MAXCWND
/ 2; /* Initial send window */
343 } else if (nmp
->nm_sotype
== SOCK_STREAM
) {
344 nmp
->nm_markerleft
= sizeof(nmp
->nm_fragleft
);
345 nmp
->nm_fragleft
= nmp
->nm_reclen
= 0;
346 nmp
->nm_timeouts
= 0;
348 nmp
->nm_sockflags
&= ~NMSOCK_CONNECTING
;
349 nmp
->nm_sockflags
|= NMSOCK_SETUP
;
350 FSDBG(529, nmp
, nmp
->nm_state
, nmp
->nm_flag
, nmp
->nm_cwnd
);
351 lck_mtx_unlock(&nmp
->nm_lock
);
352 error
= nfs_connect_setup(nmp
);
354 lck_mtx_lock(&nmp
->nm_lock
);
355 nmp
->nm_sockflags
&= ~(NMSOCK_CONNECTING
|NMSOCK_SETUP
);
357 nmp
->nm_sockflags
|= NMSOCK_READY
;
358 wakeup(&nmp
->nm_sockflags
);
360 lck_mtx_unlock(&nmp
->nm_lock
);
364 /* setup & confirm socket connection is functional */
366 nfs_connect_setup(struct nfsmount
*nmp
)
368 struct nfsm_chain nmreq
, nmrep
;
369 int error
= 0, status
;
372 if (nmp
->nm_vers
>= NFS_VER4
) {
373 error
= nfs4_setclientid(nmp
);
376 error
= nfs4_renew(nmp
, R_SETUP
);
377 if ((error
== NFSERR_ADMIN_REVOKED
) ||
378 (error
== NFSERR_EXPIRED
) ||
379 (error
== NFSERR_LEASE_MOVED
) ||
380 (error
== NFSERR_STALE_CLIENTID
)) {
381 lck_mtx_lock(&nmp
->nm_lock
);
382 nmp
->nm_state
|= NFSSTA_RECOVER
;
383 lck_mtx_unlock(&nmp
->nm_lock
);
386 /* verify connection's OK by sending a NULL request */
387 nfsm_chain_null(&nmreq
);
388 nfsm_chain_null(&nmrep
);
389 nfsm_chain_build_alloc_init(error
, &nmreq
, 0);
390 nfsm_chain_build_done(error
, &nmreq
);
392 error
= nfs_request2(NULL
, nmp
->nm_mountp
, &nmreq
, NFSPROC_NULL
,
393 current_thread(), NULL
, R_SETUP
, &nmrep
, &xid
, &status
);
397 nfsm_chain_cleanup(&nmreq
);
398 nfsm_chain_cleanup(&nmrep
);
404 * NFS socket reconnect routine:
405 * Called when a connection is broken.
406 * - disconnect the old socket
407 * - nfs_connect() again
408 * - set R_MUSTRESEND for all outstanding requests on mount point
409 * If this fails the mount point is DEAD!
412 nfs_reconnect(struct nfsmount
*nmp
)
416 thread_t thd
= current_thread();
417 int error
, wentdown
= 0, verbose
= 1;
421 lastmsg
= now
.tv_sec
- (nmp
->nm_tprintf_delay
- nmp
->nm_tprintf_initial_delay
);
425 while ((error
= nfs_connect(nmp
, verbose
))) {
428 if (error
== EINTR
|| error
== ERESTART
)
433 if ((lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
) {
434 lastmsg
= now
.tv_sec
;
435 nfs_down(nmp
, thd
, error
, NFSSTA_TIMEO
, "can not connect");
438 lck_mtx_lock(&nmp
->nm_lock
);
439 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
440 /* we're not yet completely mounted and */
441 /* we can't reconnect, so we fail */
442 lck_mtx_unlock(&nmp
->nm_lock
);
445 nfs_mount_check_dead_timeout(nmp
);
446 if ((error
= nfs_sigintr(nmp
, NULL
, thd
, 1))) {
447 lck_mtx_unlock(&nmp
->nm_lock
);
450 lck_mtx_unlock(&nmp
->nm_lock
);
451 tsleep(&lbolt
, PSOCK
, "nfs_reconnect_delay", 0);
452 if ((error
= nfs_sigintr(nmp
, NULL
, thd
, 0)))
457 nfs_up(nmp
, thd
, NFSSTA_TIMEO
, "connected");
460 * Loop through outstanding request list and mark all requests
461 * as needing a resend. (Though nfs_need_reconnect() probably
462 * marked them all already.)
464 lck_mtx_lock(nfs_request_mutex
);
465 TAILQ_FOREACH(rq
, &nfs_reqq
, r_chain
) {
466 if (rq
->r_nmp
== nmp
) {
467 lck_mtx_lock(&rq
->r_mtx
);
468 if (!rq
->r_error
&& !rq
->r_nmrep
.nmc_mhead
&& !(rq
->r_flags
& R_MUSTRESEND
)) {
469 rq
->r_flags
|= R_MUSTRESEND
;
472 if ((rq
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
|R_SENDING
)) == R_ASYNC
)
473 nfs_asyncio_resend(rq
);
475 lck_mtx_unlock(&rq
->r_mtx
);
478 lck_mtx_unlock(nfs_request_mutex
);
483 * NFS disconnect. Clean up and unlink.
486 nfs_disconnect(struct nfsmount
*nmp
)
490 lck_mtx_lock(&nmp
->nm_lock
);
491 if ((nmp
->nm_sotype
== SOCK_STREAM
) && nmp
->nm_m
) {
492 mbuf_freem(nmp
->nm_m
);
493 nmp
->nm_m
= nmp
->nm_mlast
= NULL
;
498 lck_mtx_unlock(&nmp
->nm_lock
);
499 sock_shutdown(so
, SHUT_RDWR
);
502 lck_mtx_unlock(&nmp
->nm_lock
);
507 * mark an NFS mount as needing a reconnect/resends.
510 nfs_need_reconnect(struct nfsmount
*nmp
)
514 lck_mtx_lock(&nmp
->nm_lock
);
515 nmp
->nm_sockflags
&= ~(NMSOCK_READY
|NMSOCK_SETUP
);
516 lck_mtx_unlock(&nmp
->nm_lock
);
519 * Loop through outstanding request list and
520 * mark all requests as needing a resend.
522 lck_mtx_lock(nfs_request_mutex
);
523 TAILQ_FOREACH(rq
, &nfs_reqq
, r_chain
) {
524 if (rq
->r_nmp
== nmp
) {
525 lck_mtx_lock(&rq
->r_mtx
);
526 if (!rq
->r_error
&& !rq
->r_nmrep
.nmc_mhead
&& !(rq
->r_flags
& R_MUSTRESEND
)) {
527 rq
->r_flags
|= R_MUSTRESEND
;
530 if ((rq
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
|R_SENDING
)) == R_ASYNC
)
531 nfs_asyncio_resend(rq
);
533 lck_mtx_unlock(&rq
->r_mtx
);
536 lck_mtx_unlock(nfs_request_mutex
);
540 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
543 nfs_mount_sock_thread(void *arg
, __unused wait_result_t wr
)
545 struct nfsmount
*nmp
= arg
;
546 struct timespec ts
= { 30, 0 };
547 thread_t thd
= current_thread();
550 int error
, dofinish
, force
;
553 nfs_stateid dstateid
;
555 lck_mtx_lock(&nmp
->nm_lock
);
557 while (!(nmp
->nm_sockflags
& NMSOCK_READY
) ||
558 !TAILQ_EMPTY(&nmp
->nm_resendq
) ||
559 nmp
->nm_deadto_start
||
560 ((nmp
->nm_vers
>= NFS_VER4
) &&
561 ((nmp
->nm_state
& NFSSTA_RECOVER
) || !TAILQ_EMPTY(&nmp
->nm_recallq
))))
563 if (nmp
->nm_sockflags
& NMSOCK_UNMOUNT
)
565 force
= (nmp
->nm_state
& NFSSTA_FORCE
);
566 /* do reconnect, if necessary */
567 if (!(nmp
->nm_sockflags
& NMSOCK_READY
) && !force
) {
568 if (nmp
->nm_reconnect_start
<= 0) {
570 nmp
->nm_reconnect_start
= now
.tv_sec
;
572 lck_mtx_unlock(&nmp
->nm_lock
);
573 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
));
574 if (nfs_reconnect(nmp
) == 0)
575 nmp
->nm_reconnect_start
= 0;
576 lck_mtx_lock(&nmp
->nm_lock
);
578 if ((nmp
->nm_sockflags
& NMSOCK_READY
) &&
579 (nmp
->nm_state
& NFSSTA_RECOVER
) &&
580 !(nmp
->nm_sockflags
& NMSOCK_UNMOUNT
) && !force
) {
581 /* perform state recovery */
582 lck_mtx_unlock(&nmp
->nm_lock
);
584 lck_mtx_lock(&nmp
->nm_lock
);
586 /* handle NFSv4 delegation recalls */
587 while ((nmp
->nm_vers
>= NFS_VER4
) && !force
&&
588 (nmp
->nm_sockflags
& NMSOCK_READY
) && !(nmp
->nm_state
& NFSSTA_RECOVER
) &&
589 ((np
= TAILQ_FIRST(&nmp
->nm_recallq
)))) {
590 TAILQ_REMOVE(&nmp
->nm_recallq
, np
, n_dlink
);
591 np
->n_dlink
.tqe_next
= NFSNOLIST
;
592 lck_mtx_unlock(&nmp
->nm_lock
);
593 lck_mtx_lock(&np
->n_openlock
);
594 dstateid
= np
->n_dstateid
;
595 if (np
->n_openflags
& N_DELEG_MASK
) {
596 fh
.fh_len
= np
->n_fhsize
;
597 bcopy(np
->n_fhp
, &fh
.fh_data
, fh
.fh_len
);
598 np
->n_openflags
&= ~N_DELEG_MASK
;
599 lck_mtx_unlock(&np
->n_openlock
);
600 nfs4_delegreturn_rpc(nmp
, fh
.fh_data
, fh
.fh_len
, &dstateid
, thd
, nmp
->nm_mcred
);
602 lck_mtx_unlock(&np
->n_openlock
);
604 lck_mtx_lock(&nmp
->nm_lock
);
606 /* do resends, if necessary/possible */
607 while ((((nmp
->nm_sockflags
& NMSOCK_READY
) && !(nmp
->nm_state
& NFSSTA_RECOVER
)) || force
) &&
608 ((req
= TAILQ_FIRST(&nmp
->nm_resendq
)))) {
609 if (req
->r_resendtime
)
611 while (req
&& !force
&& req
->r_resendtime
&& (now
.tv_sec
< req
->r_resendtime
))
612 req
= TAILQ_NEXT(req
, r_rchain
);
615 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
616 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
617 lck_mtx_unlock(&nmp
->nm_lock
);
618 lck_mtx_lock(&req
->r_mtx
);
619 if (req
->r_error
|| req
->r_nmrep
.nmc_mhead
) {
620 dofinish
= req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
621 req
->r_flags
&= ~R_RESENDQ
;
623 lck_mtx_unlock(&req
->r_mtx
);
625 nfs_asyncio_finish(req
);
626 lck_mtx_lock(&nmp
->nm_lock
);
629 if ((req
->r_flags
& R_RESTART
) || req
->r_gss_ctx
) {
630 req
->r_flags
&= ~R_RESTART
;
631 req
->r_resendtime
= 0;
632 lck_mtx_unlock(&req
->r_mtx
);
633 /* async RPCs on GSS mounts need to be rebuilt and resent. */
635 if (req
->r_gss_ctx
) {
636 nfs_gss_clnt_rpcdone(req
);
637 error
= nfs_gss_clnt_args_restore(req
);
638 if (error
== ENEEDAUTH
)
641 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
642 req
->r_gss_ctx
? " gss" : "", req
->r_procnum
, req
->r_xid
,
643 req
->r_flags
, req
->r_rtt
));
644 error
= !req
->r_nmp
? ENXIO
: 0; /* unmounted? */
646 error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0);
648 error
= nfs_request_add_header(req
);
650 error
= nfs_request_send(req
, 0);
651 lck_mtx_lock(&req
->r_mtx
);
652 if (req
->r_flags
& R_RESENDQ
)
653 req
->r_flags
&= ~R_RESENDQ
;
655 req
->r_error
= error
;
657 dofinish
= error
&& req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
658 lck_mtx_unlock(&req
->r_mtx
);
660 nfs_asyncio_finish(req
);
661 lck_mtx_lock(&nmp
->nm_lock
);
665 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
666 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
667 error
= !req
->r_nmp
? ENXIO
: 0; /* unmounted? */
669 error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0);
671 req
->r_flags
|= R_SENDING
;
672 lck_mtx_unlock(&req
->r_mtx
);
673 error
= nfs_send(req
, 0);
674 lck_mtx_lock(&req
->r_mtx
);
676 if (req
->r_flags
& R_RESENDQ
)
677 req
->r_flags
&= ~R_RESENDQ
;
679 lck_mtx_unlock(&req
->r_mtx
);
680 lck_mtx_lock(&nmp
->nm_lock
);
684 req
->r_error
= error
;
685 if (req
->r_flags
& R_RESENDQ
)
686 req
->r_flags
&= ~R_RESENDQ
;
688 dofinish
= req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
689 lck_mtx_unlock(&req
->r_mtx
);
691 nfs_asyncio_finish(req
);
692 lck_mtx_lock(&nmp
->nm_lock
);
694 if (nmp
->nm_deadto_start
)
695 nfs_mount_check_dead_timeout(nmp
);
696 if (force
|| (nmp
->nm_state
& NFSSTA_DEAD
))
698 if ((nmp
->nm_sockflags
& NMSOCK_READY
) || (nmp
->nm_state
& NFSSTA_RECOVER
)) {
699 if (nmp
->nm_deadto_start
|| !TAILQ_EMPTY(&nmp
->nm_resendq
) ||
700 (nmp
->nm_state
& NFSSTA_RECOVER
))
704 msleep(&nmp
->nm_sockthd
, &nmp
->nm_lock
, PSOCK
, "nfssockthread", &ts
);
708 /* If we're unmounting, send the unmount RPC, if requested/appropriate. */
709 if ((nmp
->nm_sockflags
& NMSOCK_UNMOUNT
) && (nmp
->nm_flag
& NFSMNT_CALLUMNT
) &&
710 (nmp
->nm_vers
< NFS_VER4
) && !(nmp
->nm_state
& (NFSSTA_FORCE
|NFSSTA_DEAD
))) {
711 lck_mtx_unlock(&nmp
->nm_lock
);
712 nfs3_umount_rpc(nmp
, vfs_context_kernel(),
713 (nmp
->nm_sockflags
& NMSOCK_READY
) ? 6 : 2);
714 lck_mtx_lock(&nmp
->nm_lock
);
717 if (nmp
->nm_sockthd
== thd
)
718 nmp
->nm_sockthd
= NULL
;
719 lck_mtx_unlock(&nmp
->nm_lock
);
720 wakeup(&nmp
->nm_sockthd
);
721 thread_terminate(thd
);
724 /* start or wake a mount's socket thread */
726 nfs_mount_sock_thread_wake(struct nfsmount
*nmp
)
729 wakeup(&nmp
->nm_sockthd
);
730 else if (kernel_thread_start(nfs_mount_sock_thread
, nmp
, &nmp
->nm_sockthd
) == KERN_SUCCESS
)
731 thread_deallocate(nmp
->nm_sockthd
);
735 * Check if we should mark the mount dead because the
736 * unresponsive mount has reached the dead timeout.
737 * (must be called with nmp locked)
740 nfs_mount_check_dead_timeout(struct nfsmount
*nmp
)
744 if (!(nmp
->nm_flag
& NFSMNT_DEADTIMEOUT
))
746 if (nmp
->nm_deadto_start
== 0)
748 if (nmp
->nm_state
& NFSSTA_DEAD
)
751 if ((now
.tv_sec
- nmp
->nm_deadto_start
) < nmp
->nm_deadtimeout
)
753 printf("nfs server %s: dead\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
754 nmp
->nm_state
|= NFSSTA_DEAD
;
755 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, VQ_DEAD
, 0);
759 * RPC record marker parsing state
761 struct nfs_rpc_record_state
763 uint16_t nrrs_lastfrag
; /* last fragment of record */
764 uint16_t nrrs_markerleft
; /* marker bytes remaining */
765 uint32_t nrrs_fragleft
; /* fragment bytes remaining */
766 uint32_t nrrs_reclen
; /* length of RPC record */
767 mbuf_t nrrs_m
; /* mbufs for current record */
770 int nfs_rpc_record_read(socket_t
, struct nfs_rpc_record_state
*, int *, mbuf_t
*);
773 * NFS callback channel socket state
775 struct nfs_callback_socket
777 TAILQ_ENTRY(nfs_callback_socket
) ncbs_link
;
778 socket_t ncbs_so
; /* the socket */
779 struct sockaddr_in ncbs_sin
; /* socket address */
780 struct nfs_rpc_record_state ncbs_rrs
; /* RPC record parsing state */
781 time_t ncbs_stamp
; /* last accessed at */
782 uint32_t ncbs_flags
; /* see below */
784 #define NCBSOCK_UPCALL 0x0001
785 #define NCBSOCK_UPCALLWANT 0x0002
786 #define NCBSOCK_DEAD 0x0004
789 * NFS callback channel state
791 * One listening socket for accepting socket connections from servers and
792 * a list of connected sockets to handle callback requests on.
793 * Mounts registered with the callback channel are assigned IDs and
794 * put on a list so that the callback request handling code can match
795 * the requests up with mounts.
797 socket_t nfs4_cb_so
= NULL
;
798 in_port_t nfs4_cb_port
= 0;
799 uint32_t nfs4_cb_id
= 0;
800 uint32_t nfs4_cb_so_usecount
= 0;
801 TAILQ_HEAD(nfs4_cb_sock_list
,nfs_callback_socket
) nfs4_cb_socks
;
802 TAILQ_HEAD(nfs4_cb_mount_list
,nfsmount
) nfs4_cb_mounts
;
804 int nfs4_cb_handler(struct nfs_callback_socket
*, mbuf_t
);
807 * Set up the callback channel for the NFS mount.
809 * Initializes the callback channel socket state and
810 * assigns a callback ID to the mount.
813 nfs4_mount_callback_setup(struct nfsmount
*nmp
)
815 struct sockaddr_in sin
;
817 struct timeval timeo
;
820 lck_mtx_lock(nfs_global_mutex
);
821 if (nfs4_cb_id
== 0) {
822 TAILQ_INIT(&nfs4_cb_mounts
);
823 TAILQ_INIT(&nfs4_cb_socks
);
826 nmp
->nm_cbid
= nfs4_cb_id
++;
827 if (nmp
->nm_cbid
== 0)
828 nmp
->nm_cbid
= nfs4_cb_id
++;
829 nfs4_cb_so_usecount
++;
830 TAILQ_INSERT_HEAD(&nfs4_cb_mounts
, nmp
, nm_cblink
);
833 lck_mtx_unlock(nfs_global_mutex
);
837 error
= sock_socket(AF_INET
, SOCK_STREAM
, IPPROTO_TCP
, nfs4_cb_accept
, NULL
, &nfs4_cb_so
);
839 log(LOG_INFO
, "nfs callback setup: error %d creating listening socket\n", error
);
844 sin
.sin_len
= sizeof(struct sockaddr_in
);
845 sin
.sin_family
= AF_INET
;
846 sin
.sin_addr
.s_addr
= htonl(INADDR_ANY
);
848 error
= sock_bind(so
, (struct sockaddr
*)&sin
);
850 log(LOG_INFO
, "nfs callback setup: error %d binding listening socket\n", error
);
853 error
= sock_getsockname(so
, (struct sockaddr
*)&sin
, sin
.sin_len
);
855 log(LOG_INFO
, "nfs callback setup: error %d getting listening socket port\n", error
);
858 nfs4_cb_port
= ntohs(sin
.sin_port
);
860 error
= sock_listen(so
, 32);
862 log(LOG_INFO
, "nfs callback setup: error %d on listen\n", error
);
866 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
869 error
= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
871 log(LOG_INFO
, "nfs callback setup: error %d setting socket rx timeout\n", error
);
872 error
= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
874 log(LOG_INFO
, "nfs callback setup: error %d setting socket tx timeout\n", error
);
875 sock_setsockopt(so
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
876 sock_setsockopt(so
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
));
877 sock_setsockopt(so
, SOL_SOCKET
, SO_UPCALLCLOSEWAIT
, &on
, sizeof(on
));
883 lck_mtx_unlock(nfs_global_mutex
);
885 sock_shutdown(so
, SHUT_RDWR
);
889 lck_mtx_unlock(nfs_global_mutex
);
894 * Shut down the callback channel for the NFS mount.
896 * Clears the mount's callback ID and releases the mounts
897 * reference on the callback socket. Last reference dropped
898 * will also shut down the callback socket(s).
901 nfs4_mount_callback_shutdown(struct nfsmount
*nmp
)
903 struct nfs_callback_socket
*ncbsp
;
905 struct nfs4_cb_sock_list cb_socks
;
906 struct timespec ts
= {1,0};
908 lck_mtx_lock(nfs_global_mutex
);
909 TAILQ_REMOVE(&nfs4_cb_mounts
, nmp
, nm_cblink
);
910 /* wait for any callbacks in progress to complete */
911 while (nmp
->nm_cbrefs
)
912 msleep(&nmp
->nm_cbrefs
, nfs_global_mutex
, PSOCK
, "cbshutwait", &ts
);
913 if (--nfs4_cb_so_usecount
) {
914 lck_mtx_unlock(nfs_global_mutex
);
919 TAILQ_INIT(&cb_socks
);
920 TAILQ_CONCAT(&cb_socks
, &nfs4_cb_socks
, ncbs_link
);
921 lck_mtx_unlock(nfs_global_mutex
);
923 sock_shutdown(so
, SHUT_RDWR
);
926 while ((ncbsp
= TAILQ_FIRST(&cb_socks
))) {
927 TAILQ_REMOVE(&cb_socks
, ncbsp
, ncbs_link
);
928 sock_shutdown(ncbsp
->ncbs_so
, SHUT_RDWR
);
929 sock_close(ncbsp
->ncbs_so
);
935 * Check periodically for stale/unused nfs callback sockets
937 #define NFS4_CB_TIMER_PERIOD 30
938 #define NFS4_CB_IDLE_MAX 300
940 nfs4_callback_timer(__unused
void *param0
, __unused
void *param1
)
942 struct nfs_callback_socket
*ncbsp
, *nextncbsp
;
946 lck_mtx_lock(nfs_global_mutex
);
947 if (TAILQ_EMPTY(&nfs4_cb_socks
)) {
948 nfs4_callback_timer_on
= 0;
949 lck_mtx_unlock(nfs_global_mutex
);
953 TAILQ_FOREACH_SAFE(ncbsp
, &nfs4_cb_socks
, ncbs_link
, nextncbsp
) {
954 if (!(ncbsp
->ncbs_flags
& NCBSOCK_DEAD
) &&
955 (now
.tv_sec
< (ncbsp
->ncbs_stamp
+ NFS4_CB_IDLE_MAX
)))
957 TAILQ_REMOVE(&nfs4_cb_socks
, ncbsp
, ncbs_link
);
958 lck_mtx_unlock(nfs_global_mutex
);
959 sock_shutdown(ncbsp
->ncbs_so
, SHUT_RDWR
);
960 sock_close(ncbsp
->ncbs_so
);
964 nfs4_callback_timer_on
= 1;
965 nfs_interval_timer_start(nfs4_callback_timer_call
,
966 NFS4_CB_TIMER_PERIOD
* 1000);
967 lck_mtx_unlock(nfs_global_mutex
);
971 * Accept a new callback socket.
974 nfs4_cb_accept(socket_t so
, __unused
void *arg
, __unused
int waitflag
)
976 socket_t newso
= NULL
;
977 struct nfs_callback_socket
*ncbsp
;
978 struct nfsmount
*nmp
;
979 struct timeval timeo
, now
;
980 struct sockaddr_in
*saddr
;
983 if (so
!= nfs4_cb_so
)
986 /* allocate/initialize a new nfs_callback_socket */
987 MALLOC(ncbsp
, struct nfs_callback_socket
*, sizeof(struct nfs_callback_socket
), M_TEMP
, M_WAITOK
);
989 log(LOG_ERR
, "nfs callback accept: no memory for new socket\n");
992 bzero(ncbsp
, sizeof(*ncbsp
));
993 ncbsp
->ncbs_sin
.sin_len
= sizeof(struct sockaddr_in
);
994 ncbsp
->ncbs_rrs
.nrrs_markerleft
= sizeof(ncbsp
->ncbs_rrs
.nrrs_fragleft
);
996 /* accept a new socket */
997 error
= sock_accept(so
, (struct sockaddr
*)&ncbsp
->ncbs_sin
,
998 ncbsp
->ncbs_sin
.sin_len
, MSG_DONTWAIT
,
999 nfs4_cb_rcv
, ncbsp
, &newso
);
1001 log(LOG_INFO
, "nfs callback accept: error %d accepting socket\n", error
);
1002 FREE(ncbsp
, M_TEMP
);
1006 /* set up the new socket */
1007 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
1010 error
= sock_setsockopt(newso
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
1012 log(LOG_INFO
, "nfs callback socket: error %d setting socket rx timeout\n", error
);
1013 error
= sock_setsockopt(newso
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
1015 log(LOG_INFO
, "nfs callback socket: error %d setting socket tx timeout\n", error
);
1016 sock_setsockopt(newso
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
1017 sock_setsockopt(newso
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
));
1018 sock_setsockopt(newso
, SOL_SOCKET
, SO_UPCALLCLOSEWAIT
, &on
, sizeof(on
));
1020 ncbsp
->ncbs_so
= newso
;
1022 ncbsp
->ncbs_stamp
= now
.tv_sec
;
1024 lck_mtx_lock(nfs_global_mutex
);
1026 /* add it to the list */
1027 TAILQ_INSERT_HEAD(&nfs4_cb_socks
, ncbsp
, ncbs_link
);
1029 /* verify it's from a host we have mounted */
1030 TAILQ_FOREACH(nmp
, &nfs4_cb_mounts
, nm_cblink
) {
1031 /* check socket's source address matches this mount's server address */
1032 saddr
= mbuf_data(nmp
->nm_nam
);
1033 if ((ncbsp
->ncbs_sin
.sin_len
== saddr
->sin_len
) &&
1034 (ncbsp
->ncbs_sin
.sin_family
== saddr
->sin_family
) &&
1035 (ncbsp
->ncbs_sin
.sin_addr
.s_addr
== saddr
->sin_addr
.s_addr
))
1038 if (!nmp
) /* we don't want this socket, mark it dead */
1039 ncbsp
->ncbs_flags
|= NCBSOCK_DEAD
;
1041 /* make sure the callback socket cleanup timer is running */
1042 /* (shorten the timer if we've got a socket we don't want) */
1043 if (!nfs4_callback_timer_on
) {
1044 nfs4_callback_timer_on
= 1;
1045 nfs_interval_timer_start(nfs4_callback_timer_call
,
1046 !nmp
? 500 : (NFS4_CB_TIMER_PERIOD
* 1000));
1047 } else if (!nmp
&& (nfs4_callback_timer_on
< 2)) {
1048 nfs4_callback_timer_on
= 2;
1049 thread_call_cancel(nfs4_callback_timer_call
);
1050 nfs_interval_timer_start(nfs4_callback_timer_call
, 500);
1053 lck_mtx_unlock(nfs_global_mutex
);
1057 * Receive mbufs from callback sockets into RPC records and process each record.
1058 * Detect connection has been closed and shut down.
1061 nfs4_cb_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
1063 struct nfs_callback_socket
*ncbsp
= arg
;
1064 struct timespec ts
= {1,0};
1067 int error
= 0, recv
= 1;
1069 lck_mtx_lock(nfs_global_mutex
);
1070 while (ncbsp
->ncbs_flags
& NCBSOCK_UPCALL
) {
1071 /* wait if upcall is already in progress */
1072 ncbsp
->ncbs_flags
|= NCBSOCK_UPCALLWANT
;
1073 msleep(ncbsp
, nfs_global_mutex
, PSOCK
, "cbupcall", &ts
);
1075 ncbsp
->ncbs_flags
|= NCBSOCK_UPCALL
;
1076 lck_mtx_unlock(nfs_global_mutex
);
1078 /* loop while we make error-free progress */
1079 while (!error
&& recv
) {
1080 error
= nfs_rpc_record_read(so
, &ncbsp
->ncbs_rrs
, &recv
, &m
);
1081 if (m
) /* handle the request */
1082 error
= nfs4_cb_handler(ncbsp
, m
);
1085 /* note: no error and no data indicates server closed its end */
1086 if ((error
!= EWOULDBLOCK
) && (error
|| !recv
)) {
1088 * Socket is either being closed or should be.
1089 * We can't close the socket in the context of the upcall.
1090 * So we mark it as dead and leave it for the cleanup timer to reap.
1092 ncbsp
->ncbs_stamp
= 0;
1093 ncbsp
->ncbs_flags
|= NCBSOCK_DEAD
;
1096 ncbsp
->ncbs_stamp
= now
.tv_sec
;
1099 lck_mtx_lock(nfs_global_mutex
);
1100 ncbsp
->ncbs_flags
&= ~NCBSOCK_UPCALL
;
1101 lck_mtx_unlock(nfs_global_mutex
);
1106 * Handle an NFS callback channel request.
1109 nfs4_cb_handler(struct nfs_callback_socket
*ncbsp
, mbuf_t mreq
)
1111 socket_t so
= ncbsp
->ncbs_so
;
1112 struct nfsm_chain nmreq
, nmrep
;
1113 mbuf_t mhead
= NULL
, mrest
= NULL
, m
;
1114 struct sockaddr_in
*saddr
;
1116 struct nfsmount
*nmp
;
1119 nfs_stateid stateid
;
1120 uint32_t bitmap
[NFS_ATTR_BITMAP_LEN
], rbitmap
[NFS_ATTR_BITMAP_LEN
], bmlen
, truncate
, attrbytes
;
1121 uint32_t val
, xid
, procnum
, taglen
, cbid
, numops
, op
, status
;
1122 uint32_t auth_type
, auth_len
;
1123 uint32_t numres
, *pnumres
;
1124 int error
= 0, replen
, len
;
1127 xid
= numops
= op
= status
= procnum
= taglen
= cbid
= 0;
1129 nfsm_chain_dissect_init(error
, &nmreq
, mreq
);
1130 nfsm_chain_get_32(error
, &nmreq
, xid
); // RPC XID
1131 nfsm_chain_get_32(error
, &nmreq
, val
); // RPC Call
1132 nfsm_assert(error
, (val
== RPC_CALL
), EBADRPC
);
1133 nfsm_chain_get_32(error
, &nmreq
, val
); // RPC Version
1134 nfsm_assert(error
, (val
== RPC_VER2
), ERPCMISMATCH
);
1135 nfsm_chain_get_32(error
, &nmreq
, val
); // RPC Program Number
1136 nfsm_assert(error
, (val
== NFS4_CALLBACK_PROG
), EPROGUNAVAIL
);
1137 nfsm_chain_get_32(error
, &nmreq
, val
); // NFS Callback Program Version Number
1138 nfsm_assert(error
, (val
== NFS4_CALLBACK_PROG_VERSION
), EPROGMISMATCH
);
1139 nfsm_chain_get_32(error
, &nmreq
, procnum
); // NFS Callback Procedure Number
1140 nfsm_assert(error
, (procnum
<= NFSPROC4_CB_COMPOUND
), EPROCUNAVAIL
);
1142 /* Handle authentication */
1143 /* XXX just ignore auth for now - handling kerberos may be tricky */
1144 nfsm_chain_get_32(error
, &nmreq
, auth_type
); // RPC Auth Flavor
1145 nfsm_chain_get_32(error
, &nmreq
, auth_len
); // RPC Auth Length
1146 nfsm_assert(error
, (auth_len
<= RPCAUTH_MAXSIZ
), EBADRPC
);
1147 if (!error
&& (auth_len
> 0))
1148 nfsm_chain_adv(error
, &nmreq
, nfsm_rndup(auth_len
));
1149 nfsm_chain_adv(error
, &nmreq
, NFSX_UNSIGNED
); // verifier flavor (should be AUTH_NONE)
1150 nfsm_chain_get_32(error
, &nmreq
, auth_len
); // verifier length
1151 nfsm_assert(error
, (auth_len
<= RPCAUTH_MAXSIZ
), EBADRPC
);
1152 if (!error
&& (auth_len
> 0))
1153 nfsm_chain_adv(error
, &nmreq
, nfsm_rndup(auth_len
));
1161 case NFSPROC4_CB_NULL
:
1162 status
= NFSERR_RETVOID
;
1164 case NFSPROC4_CB_COMPOUND
:
1165 /* tag, minorversion, cb ident, numops, op array */
1166 nfsm_chain_get_32(error
, &nmreq
, taglen
); /* tag length */
1167 nfsm_assert(error
, (val
<= NFS4_OPAQUE_LIMIT
), EBADRPC
);
1169 /* start building the body of the response */
1170 nfsm_mbuf_get(error
, &mrest
, nfsm_rndup(taglen
) + 5*NFSX_UNSIGNED
);
1171 nfsm_chain_init(&nmrep
, mrest
);
1173 /* copy tag from request to response */
1174 nfsm_chain_add_32(error
, &nmrep
, taglen
); /* tag length */
1175 for (len
= (int)taglen
; !error
&& (len
> 0); len
-= NFSX_UNSIGNED
) {
1176 nfsm_chain_get_32(error
, &nmreq
, val
);
1177 nfsm_chain_add_32(error
, &nmrep
, val
);
1180 /* insert number of results placeholder */
1182 nfsm_chain_add_32(error
, &nmrep
, numres
);
1183 pnumres
= (uint32_t*)(nmrep
.nmc_ptr
- NFSX_UNSIGNED
);
1185 nfsm_chain_get_32(error
, &nmreq
, val
); /* minorversion */
1186 nfsm_assert(error
, (val
== 0), NFSERR_MINOR_VERS_MISMATCH
);
1187 nfsm_chain_get_32(error
, &nmreq
, cbid
); /* callback ID */
1188 nfsm_chain_get_32(error
, &nmreq
, numops
); /* number of operations */
1190 if ((error
== EBADRPC
) || (error
== NFSERR_MINOR_VERS_MISMATCH
))
1192 else if ((error
== ENOBUFS
) || (error
== ENOMEM
))
1193 status
= NFSERR_RESOURCE
;
1195 status
= NFSERR_SERVERFAULT
;
1197 nfsm_chain_null(&nmrep
);
1200 /* match the callback ID to a registered mount */
1201 lck_mtx_lock(nfs_global_mutex
);
1202 TAILQ_FOREACH(nmp
, &nfs4_cb_mounts
, nm_cblink
) {
1203 if (nmp
->nm_cbid
!= cbid
)
1205 /* verify socket's source address matches this mount's server address */
1206 saddr
= mbuf_data(nmp
->nm_nam
);
1207 if ((ncbsp
->ncbs_sin
.sin_len
!= saddr
->sin_len
) ||
1208 (ncbsp
->ncbs_sin
.sin_family
!= saddr
->sin_family
) ||
1209 (ncbsp
->ncbs_sin
.sin_addr
.s_addr
!= saddr
->sin_addr
.s_addr
))
1213 /* mark the NFS mount as busy */
1216 lck_mtx_unlock(nfs_global_mutex
);
1218 /* if no mount match, just drop socket. */
1220 nfsm_chain_null(&nmrep
);
1224 /* process ops, adding results to mrest */
1225 while (numops
> 0) {
1227 nfsm_chain_get_32(error
, &nmreq
, op
);
1231 case NFS_OP_CB_GETATTR
:
1232 // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS)
1234 nfsm_chain_get_fh(error
, &nmreq
, NFS_VER4
, &fh
);
1235 bmlen
= NFS_ATTR_BITMAP_LEN
;
1236 nfsm_chain_get_bitmap(error
, &nmreq
, bitmap
, bmlen
);
1240 numops
= 0; /* don't process any more ops */
1242 /* find the node for the file handle */
1243 error
= nfs_nget(nmp
->nm_mountp
, NULL
, NULL
, fh
.fh_data
, fh
.fh_len
, NULL
, NULL
, NG_NOCREATE
, &np
);
1245 status
= NFSERR_BADHANDLE
;
1248 numops
= 0; /* don't process any more ops */
1251 nfsm_chain_add_32(error
, &nmrep
, op
);
1252 nfsm_chain_add_32(error
, &nmrep
, status
);
1253 if (!error
&& (status
== EBADRPC
))
1256 /* only allow returning size, change, and mtime attrs */
1257 NFS_CLEAR_ATTRIBUTES(&rbitmap
);
1259 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_CHANGE
)) {
1260 NFS_BITMAP_SET(&rbitmap
, NFS_FATTR_CHANGE
);
1261 attrbytes
+= 2 * NFSX_UNSIGNED
;
1263 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_SIZE
)) {
1264 NFS_BITMAP_SET(&rbitmap
, NFS_FATTR_SIZE
);
1265 attrbytes
+= 2 * NFSX_UNSIGNED
;
1267 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_TIME_MODIFY
)) {
1268 NFS_BITMAP_SET(&rbitmap
, NFS_FATTR_TIME_MODIFY
);
1269 attrbytes
+= 3 * NFSX_UNSIGNED
;
1271 nfsm_chain_add_bitmap(error
, &nmrep
, rbitmap
, NFS_ATTR_BITMAP_LEN
);
1272 nfsm_chain_add_32(error
, &nmrep
, attrbytes
);
1273 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_CHANGE
))
1274 nfsm_chain_add_64(error
, &nmrep
,
1275 np
->n_vattr
.nva_change
+ ((np
->n_flag
& NMODIFIED
) ? 1 : 0));
1276 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_SIZE
))
1277 nfsm_chain_add_64(error
, &nmrep
, np
->n_size
);
1278 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_TIME_MODIFY
)) {
1279 nfsm_chain_add_64(error
, &nmrep
, np
->n_vattr
.nva_timesec
[NFSTIME_MODIFY
]);
1280 nfsm_chain_add_32(error
, &nmrep
, np
->n_vattr
.nva_timensec
[NFSTIME_MODIFY
]);
1282 nfs_node_unlock(np
);
1283 vnode_put(NFSTOV(np
));
1287 * If we hit an error building the reply, we can't easily back up.
1288 * So we'll just update the status and hope the server ignores the
1292 case NFS_OP_CB_RECALL
:
1293 // (STATEID, TRUNCATE, FH) -> (STATUS)
1295 nfsm_chain_get_stateid(error
, &nmreq
, &stateid
);
1296 nfsm_chain_get_32(error
, &nmreq
, truncate
);
1297 nfsm_chain_get_fh(error
, &nmreq
, NFS_VER4
, &fh
);
1301 numops
= 0; /* don't process any more ops */
1303 /* find the node for the file handle */
1304 error
= nfs_nget(nmp
->nm_mountp
, NULL
, NULL
, fh
.fh_data
, fh
.fh_len
, NULL
, NULL
, NG_NOCREATE
, &np
);
1306 status
= NFSERR_BADHANDLE
;
1309 numops
= 0; /* don't process any more ops */
1310 } else if (!(np
->n_openflags
& N_DELEG_MASK
) ||
1311 bcmp(&np
->n_dstateid
, &stateid
, sizeof(stateid
))) {
1312 /* delegation stateid state doesn't match */
1313 status
= NFSERR_BAD_STATEID
;
1314 numops
= 0; /* don't process any more ops */
1317 /* add node to recall queue, and wake socket thread */
1318 lck_mtx_lock(&nmp
->nm_lock
);
1319 if (np
->n_dlink
.tqe_next
== NFSNOLIST
)
1320 TAILQ_INSERT_TAIL(&nmp
->nm_recallq
, np
, n_dlink
);
1321 nfs_mount_sock_thread_wake(nmp
);
1322 lck_mtx_unlock(&nmp
->nm_lock
);
1325 nfs_node_unlock(np
);
1326 vnode_put(NFSTOV(np
));
1329 nfsm_chain_add_32(error
, &nmrep
, op
);
1330 nfsm_chain_add_32(error
, &nmrep
, status
);
1331 if (!error
&& (status
== EBADRPC
))
1334 case NFS_OP_CB_ILLEGAL
:
1336 nfsm_chain_add_32(error
, &nmrep
, NFS_OP_CB_ILLEGAL
);
1337 status
= NFSERR_OP_ILLEGAL
;
1338 nfsm_chain_add_32(error
, &nmrep
, status
);
1339 numops
= 0; /* don't process any more ops */
1345 if (!status
&& error
) {
1346 if (error
== EBADRPC
)
1348 else if ((error
== ENOBUFS
) || (error
== ENOMEM
))
1349 status
= NFSERR_RESOURCE
;
1351 status
= NFSERR_SERVERFAULT
;
1355 /* Now, set the numres field */
1356 *pnumres
= txdr_unsigned(numres
);
1357 nfsm_chain_build_done(error
, &nmrep
);
1358 nfsm_chain_null(&nmrep
);
1360 /* drop the callback reference on the mount */
1361 lck_mtx_lock(nfs_global_mutex
);
1364 wakeup(&nmp
->nm_cbrefs
);
1365 lck_mtx_unlock(nfs_global_mutex
);
1370 if (status
== EBADRPC
)
1371 OSAddAtomic(1, &nfsstats
.rpcinvalid
);
1373 /* build reply header */
1374 error
= mbuf_gethdr(MBUF_WAITOK
, MBUF_TYPE_DATA
, &mhead
);
1375 nfsm_chain_init(&nmrep
, mhead
);
1376 nfsm_chain_add_32(error
, &nmrep
, 0); /* insert space for an RPC record mark */
1377 nfsm_chain_add_32(error
, &nmrep
, xid
);
1378 nfsm_chain_add_32(error
, &nmrep
, RPC_REPLY
);
1379 if ((status
== ERPCMISMATCH
) || (status
& NFSERR_AUTHERR
)) {
1380 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGDENIED
);
1381 if (status
& NFSERR_AUTHERR
) {
1382 nfsm_chain_add_32(error
, &nmrep
, RPC_AUTHERR
);
1383 nfsm_chain_add_32(error
, &nmrep
, (status
& ~NFSERR_AUTHERR
));
1385 nfsm_chain_add_32(error
, &nmrep
, RPC_MISMATCH
);
1386 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
1387 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
1391 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGACCEPTED
);
1392 /* XXX RPCAUTH_NULL verifier */
1393 nfsm_chain_add_32(error
, &nmrep
, RPCAUTH_NULL
);
1394 nfsm_chain_add_32(error
, &nmrep
, 0);
1395 /* accepted status */
1398 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGUNAVAIL
);
1401 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGMISMATCH
);
1402 nfsm_chain_add_32(error
, &nmrep
, NFS4_CALLBACK_PROG_VERSION
);
1403 nfsm_chain_add_32(error
, &nmrep
, NFS4_CALLBACK_PROG_VERSION
);
1406 nfsm_chain_add_32(error
, &nmrep
, RPC_PROCUNAVAIL
);
1409 nfsm_chain_add_32(error
, &nmrep
, RPC_GARBAGE
);
1412 nfsm_chain_add_32(error
, &nmrep
, RPC_SUCCESS
);
1413 if (status
!= NFSERR_RETVOID
)
1414 nfsm_chain_add_32(error
, &nmrep
, status
);
1418 nfsm_chain_build_done(error
, &nmrep
);
1420 nfsm_chain_null(&nmrep
);
1423 error
= mbuf_setnext(nmrep
.nmc_mcur
, mrest
);
1425 printf("nfs cb: mbuf_setnext failed %d\n", error
);
1429 /* Calculate the size of the reply */
1431 for (m
= nmrep
.nmc_mhead
; m
; m
= mbuf_next(m
))
1432 replen
+= mbuf_len(m
);
1433 mbuf_pkthdr_setlen(mhead
, replen
);
1434 error
= mbuf_pkthdr_setrcvif(mhead
, NULL
);
1435 nfsm_chain_set_recmark(error
, &nmrep
, (replen
- NFSX_UNSIGNED
) | 0x80000000);
1436 nfsm_chain_null(&nmrep
);
1438 /* send the reply */
1439 bzero(&msg
, sizeof(msg
));
1440 error
= sock_sendmbuf(so
, &msg
, mhead
, 0, &sentlen
);
1442 if (!error
&& ((int)sentlen
!= replen
))
1443 error
= EWOULDBLOCK
;
1444 if (error
== EWOULDBLOCK
) /* inability to send response is considered fatal */
1448 nfsm_chain_cleanup(&nmrep
);
1460 * Read the next (marked) RPC record from the socket.
1462 * *recvp returns if any data was received.
1463 * *mp returns the next complete RPC record
1466 nfs_rpc_record_read(socket_t so
, struct nfs_rpc_record_state
*nrrsp
, int *recvp
, mbuf_t
*mp
)
1477 /* read the TCP RPC record marker */
1478 while (!error
&& nrrsp
->nrrs_markerleft
) {
1479 aio
.iov_base
= ((char*)&nrrsp
->nrrs_fragleft
+
1480 sizeof(nrrsp
->nrrs_fragleft
) - nrrsp
->nrrs_markerleft
);
1481 aio
.iov_len
= nrrsp
->nrrs_markerleft
;
1482 bzero(&msg
, sizeof(msg
));
1485 error
= sock_receive(so
, &msg
, MSG_DONTWAIT
, &rcvlen
);
1486 if (error
|| !rcvlen
)
1489 nrrsp
->nrrs_markerleft
-= rcvlen
;
1490 if (nrrsp
->nrrs_markerleft
)
1492 /* record marker complete */
1493 nrrsp
->nrrs_fragleft
= ntohl(nrrsp
->nrrs_fragleft
);
1494 if (nrrsp
->nrrs_fragleft
& 0x80000000) {
1495 nrrsp
->nrrs_lastfrag
= 1;
1496 nrrsp
->nrrs_fragleft
&= ~0x80000000;
1498 nrrsp
->nrrs_reclen
+= nrrsp
->nrrs_fragleft
;
1499 if (nrrsp
->nrrs_reclen
> NFS_MAXPACKET
) {
1501 * This is SERIOUS! We are out of sync with the sender
1502 * and forcing a disconnect/reconnect is all I can do.
1504 log(LOG_ERR
, "impossible RPC record length (%d) on callback", nrrsp
->nrrs_reclen
);
1509 /* read the TCP RPC record fragment */
1510 while (!error
&& !nrrsp
->nrrs_markerleft
&& nrrsp
->nrrs_fragleft
) {
1512 rcvlen
= nrrsp
->nrrs_fragleft
;
1513 error
= sock_receivembuf(so
, NULL
, &m
, MSG_DONTWAIT
, &rcvlen
);
1514 if (error
|| !rcvlen
|| !m
)
1517 /* append mbufs to list */
1518 nrrsp
->nrrs_fragleft
-= rcvlen
;
1519 if (!nrrsp
->nrrs_m
) {
1522 error
= mbuf_setnext(nrrsp
->nrrs_mlast
, m
);
1524 printf("nfs tcp rcv: mbuf_setnext failed %d\n", error
);
1529 while (mbuf_next(m
))
1531 nrrsp
->nrrs_mlast
= m
;
1534 /* done reading fragment? */
1535 if (!error
&& !nrrsp
->nrrs_markerleft
&& !nrrsp
->nrrs_fragleft
) {
1536 /* reset socket fragment parsing state */
1537 nrrsp
->nrrs_markerleft
= sizeof(nrrsp
->nrrs_fragleft
);
1538 if (nrrsp
->nrrs_lastfrag
) {
1539 /* RPC record complete */
1540 *mp
= nrrsp
->nrrs_m
;
1541 /* reset socket record parsing state */
1542 nrrsp
->nrrs_reclen
= 0;
1543 nrrsp
->nrrs_m
= nrrsp
->nrrs_mlast
= NULL
;
1544 nrrsp
->nrrs_lastfrag
= 0;
1554 * The NFS client send routine.
1556 * Send the given NFS request out the mount's socket.
1557 * Holds nfs_sndlock() for the duration of this call.
1559 * - check for request termination (sigintr)
1560 * - wait for reconnect, if necessary
1561 * - UDP: check the congestion window
1562 * - make a copy of the request to send
1563 * - UDP: update the congestion window
1564 * - send the request
1566 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
1567 * rexmit count is also updated if this isn't the first send.
1569 * If the send is not successful, make sure R_MUSTRESEND is set.
1570 * If this wasn't the first transmit, set R_RESENDERR.
1571 * Also, undo any UDP congestion window changes made.
1573 * If the error appears to indicate that the socket should
1574 * be reconnected, mark the socket for reconnection.
1576 * Only return errors when the request should be aborted.
1579 nfs_send(struct nfsreq
*req
, int wait
)
1581 struct nfsmount
*nmp
;
1583 int error
, error2
, sotype
, rexmit
, slpflag
= 0, needrecon
;
1585 struct sockaddr
*sendnam
;
1588 struct timespec ts
= { 2, 0 };
1591 error
= nfs_sndlock(req
);
1593 lck_mtx_lock(&req
->r_mtx
);
1594 req
->r_error
= error
;
1595 req
->r_flags
&= ~R_SENDING
;
1596 lck_mtx_unlock(&req
->r_mtx
);
1600 error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0);
1603 lck_mtx_lock(&req
->r_mtx
);
1604 req
->r_error
= error
;
1605 req
->r_flags
&= ~R_SENDING
;
1606 lck_mtx_unlock(&req
->r_mtx
);
1610 sotype
= nmp
->nm_sotype
;
1613 * If it's a setup RPC but we're not in SETUP... must need reconnect.
1614 * If it's a recovery RPC but the socket's not ready... must need reconnect.
1616 if (((req
->r_flags
& R_SETUP
) && !(nmp
->nm_sockflags
& NMSOCK_SETUP
)) ||
1617 ((req
->r_flags
& R_RECOVER
) && !(nmp
->nm_sockflags
& NMSOCK_READY
))) {
1620 lck_mtx_lock(&req
->r_mtx
);
1621 req
->r_error
= error
;
1622 req
->r_flags
&= ~R_SENDING
;
1623 lck_mtx_unlock(&req
->r_mtx
);
1627 /* If the socket needs reconnection, do that now. */
1628 /* wait until socket is ready - unless this request is part of setup */
1629 lck_mtx_lock(&nmp
->nm_lock
);
1630 if (!(nmp
->nm_sockflags
& NMSOCK_READY
) &&
1631 !((nmp
->nm_sockflags
& NMSOCK_SETUP
) && (req
->r_flags
& R_SETUP
))) {
1632 if (nmp
->nm_flag
& NFSMNT_INT
)
1634 lck_mtx_unlock(&nmp
->nm_lock
);
1637 lck_mtx_lock(&req
->r_mtx
);
1638 req
->r_flags
&= ~R_SENDING
;
1639 req
->r_flags
|= R_MUSTRESEND
;
1641 lck_mtx_unlock(&req
->r_mtx
);
1644 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req
->r_xid
));
1645 lck_mtx_lock(&req
->r_mtx
);
1646 req
->r_flags
&= ~R_MUSTRESEND
;
1648 lck_mtx_unlock(&req
->r_mtx
);
1649 lck_mtx_lock(&nmp
->nm_lock
);
1650 while (!(nmp
->nm_sockflags
& NMSOCK_READY
)) {
1651 /* don't bother waiting if the socket thread won't be reconnecting it */
1652 if (nmp
->nm_state
& NFSSTA_FORCE
) {
1656 if ((nmp
->nm_flag
& NFSMNT_SOFT
) && (nmp
->nm_reconnect_start
> 0)) {
1659 if ((now
.tv_sec
- nmp
->nm_reconnect_start
) >= 8) {
1660 /* soft mount in reconnect for a while... terminate ASAP */
1661 OSAddAtomic(1, &nfsstats
.rpctimeouts
);
1662 req
->r_flags
|= R_SOFTTERM
;
1663 req
->r_error
= error
= ETIMEDOUT
;
1667 /* make sure socket thread is running, then wait */
1668 nfs_mount_sock_thread_wake(nmp
);
1669 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 1)))
1671 msleep(req
, &nmp
->nm_lock
, slpflag
|PSOCK
, "nfsconnectwait", &ts
);
1674 lck_mtx_unlock(&nmp
->nm_lock
);
1676 lck_mtx_lock(&req
->r_mtx
);
1677 req
->r_error
= error
;
1678 req
->r_flags
&= ~R_SENDING
;
1679 lck_mtx_unlock(&req
->r_mtx
);
1685 lck_mtx_unlock(&nmp
->nm_lock
);
1688 lck_mtx_lock(&req
->r_mtx
);
1689 req
->r_flags
&= ~R_SENDING
;
1690 req
->r_flags
|= R_MUSTRESEND
;
1692 lck_mtx_unlock(&req
->r_mtx
);
1696 lck_mtx_lock(&req
->r_mtx
);
1697 rexmit
= (req
->r_flags
& R_SENT
);
1699 if (sotype
== SOCK_DGRAM
) {
1700 lck_mtx_lock(&nmp
->nm_lock
);
1701 if (!(req
->r_flags
& R_CWND
) && (nmp
->nm_sent
>= nmp
->nm_cwnd
)) {
1702 /* if we can't send this out yet, wait on the cwnd queue */
1703 slpflag
= ((nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
) ? PCATCH
: 0;
1704 lck_mtx_unlock(&nmp
->nm_lock
);
1706 req
->r_flags
&= ~R_SENDING
;
1707 req
->r_flags
|= R_MUSTRESEND
;
1708 lck_mtx_unlock(&req
->r_mtx
);
1713 lck_mtx_lock(&nmp
->nm_lock
);
1714 while (nmp
->nm_sent
>= nmp
->nm_cwnd
) {
1715 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 1)))
1717 TAILQ_INSERT_TAIL(&nmp
->nm_cwndq
, req
, r_cchain
);
1718 msleep(req
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfswaitcwnd", &ts
);
1720 if ((req
->r_cchain
.tqe_next
!= NFSREQNOLIST
)) {
1721 TAILQ_REMOVE(&nmp
->nm_cwndq
, req
, r_cchain
);
1722 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
1725 lck_mtx_unlock(&nmp
->nm_lock
);
1729 * We update these *before* the send to avoid racing
1730 * against others who may be looking to send requests.
1733 /* first transmit */
1734 req
->r_flags
|= R_CWND
;
1735 nmp
->nm_sent
+= NFS_CWNDSCALE
;
1738 * When retransmitting, turn timing off
1739 * and divide congestion window by 2.
1741 req
->r_flags
&= ~R_TIMING
;
1743 if (nmp
->nm_cwnd
< NFS_CWNDSCALE
)
1744 nmp
->nm_cwnd
= NFS_CWNDSCALE
;
1746 lck_mtx_unlock(&nmp
->nm_lock
);
1749 req
->r_flags
&= ~R_MUSTRESEND
;
1750 lck_mtx_unlock(&req
->r_mtx
);
1752 error
= mbuf_copym(req
->r_mhead
, 0, MBUF_COPYALL
,
1753 wait
? MBUF_WAITOK
: MBUF_DONTWAIT
, &mreqcopy
);
1756 log(LOG_INFO
, "nfs_send: mbuf copy failed %d\n", error
);
1758 lck_mtx_lock(&req
->r_mtx
);
1759 req
->r_flags
&= ~R_SENDING
;
1760 req
->r_flags
|= R_MUSTRESEND
;
1762 lck_mtx_unlock(&req
->r_mtx
);
1766 bzero(&msg
, sizeof(msg
));
1767 if (nmp
->nm_nam
&& (sotype
!= SOCK_STREAM
) && !sock_isconnected(so
)) {
1768 if ((sendnam
= mbuf_data(nmp
->nm_nam
))) {
1769 msg
.msg_name
= (caddr_t
)sendnam
;
1770 msg
.msg_namelen
= sendnam
->sa_len
;
1773 error
= sock_sendmbuf(so
, &msg
, mreqcopy
, 0, &sentlen
);
1774 #ifdef NFS_SOCKET_DEBUGGING
1775 if (error
|| (sentlen
!= req
->r_mreqlen
))
1776 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
1777 req
->r_xid
, (int)sentlen
, (int)req
->r_mreqlen
, error
));
1779 if (!error
&& (sentlen
!= req
->r_mreqlen
))
1780 error
= EWOULDBLOCK
;
1781 needrecon
= ((sotype
== SOCK_STREAM
) && sentlen
&& (sentlen
!= req
->r_mreqlen
));
1783 lck_mtx_lock(&req
->r_mtx
);
1784 req
->r_flags
&= ~R_SENDING
;
1786 if (rexmit
&& (++req
->r_rexmit
> NFS_MAXREXMIT
))
1787 req
->r_rexmit
= NFS_MAXREXMIT
;
1791 req
->r_flags
&= ~R_RESENDERR
;
1793 OSAddAtomic(1, &nfsstats
.rpcretries
);
1794 req
->r_flags
|= R_SENT
;
1795 if (req
->r_flags
& R_WAITSENT
) {
1796 req
->r_flags
&= ~R_WAITSENT
;
1800 lck_mtx_unlock(&req
->r_mtx
);
1805 req
->r_flags
|= R_MUSTRESEND
;
1807 req
->r_flags
|= R_RESENDERR
;
1808 if ((error
== EINTR
) || (error
== ERESTART
))
1809 req
->r_error
= error
;
1810 lck_mtx_unlock(&req
->r_mtx
);
1812 if (sotype
== SOCK_DGRAM
) {
1814 * Note: even though a first send may fail, we consider
1815 * the request sent for congestion window purposes.
1816 * So we don't need to undo any of the changes made above.
1819 * Socket errors ignored for connectionless sockets??
1820 * For now, ignore them all
1822 if ((error
!= EINTR
) && (error
!= ERESTART
) &&
1823 (error
!= EWOULDBLOCK
) && (error
!= EIO
)) {
1824 int clearerror
= 0, optlen
= sizeof(clearerror
);
1825 sock_getsockopt(so
, SOL_SOCKET
, SO_ERROR
, &clearerror
, &optlen
);
1826 #ifdef NFS_SOCKET_DEBUGGING
1828 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
1829 error
, clearerror
));
1834 /* check if it appears we should reconnect the socket */
1837 /* if send timed out, reconnect if on TCP */
1838 if (sotype
!= SOCK_STREAM
)
1855 if (needrecon
) { /* mark socket as needing reconnect */
1856 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req
->r_xid
, error
));
1857 nfs_need_reconnect(nmp
);
1863 * Don't log some errors:
1864 * EPIPE errors may be common with servers that drop idle connections.
1865 * EADDRNOTAVAIL may occur on network transitions.
1866 * ENOTCONN may occur under some network conditions.
1868 if ((error
== EPIPE
) || (error
== EADDRNOTAVAIL
) || (error
== ENOTCONN
))
1870 if (error
&& (error
!= EINTR
) && (error
!= ERESTART
))
1871 log(LOG_INFO
, "nfs send error %d for server %s\n", error
,
1872 !req
->r_nmp
? "<unmounted>" :
1873 vfs_statfs(req
->r_nmp
->nm_mountp
)->f_mntfromname
);
1875 /* prefer request termination error over other errors */
1876 error2
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0);
1880 /* only allow the following errors to be returned */
1881 if ((error
!= EINTR
) && (error
!= ERESTART
) && (error
!= EIO
) &&
1882 (error
!= ENXIO
) && (error
!= ETIMEDOUT
))
1888 * NFS client socket upcalls
1890 * Pull RPC replies out of an NFS mount's socket and match them
1891 * up with the pending request.
1893 * The datagram code is simple because we always get whole
1894 * messages out of the socket.
1896 * The stream code is more involved because we have to parse
1897 * the RPC records out of the stream.
1900 /* NFS client UDP socket upcall */
1902 nfs_udp_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
1904 struct nfsmount
*nmp
= arg
;
1909 if (nmp
->nm_sockflags
& NMSOCK_CONNECTING
) {
1910 wakeup(&nmp
->nm_so
);
1914 /* make sure we're on the current socket */
1915 if (nmp
->nm_so
!= so
)
1921 error
= sock_receivembuf(so
, NULL
, &m
, MSG_DONTWAIT
, &rcvlen
);
1923 nfs_request_match_reply(nmp
, m
);
1924 } while (m
&& !error
);
1926 if (error
&& (error
!= EWOULDBLOCK
)) {
1927 /* problems with the socket... mark for reconnection */
1928 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error
));
1929 nfs_need_reconnect(nmp
);
1933 /* NFS client TCP socket upcall */
1935 nfs_tcp_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
1937 struct nfsmount
*nmp
= arg
;
1945 if (nmp
->nm_sockflags
& NMSOCK_CONNECTING
) {
1946 wakeup(&nmp
->nm_so
);
1950 /* make sure we're on the current socket */
1951 if (nmp
->nm_so
!= so
)
1954 lck_mtx_lock(&nmp
->nm_lock
);
1955 if (nmp
->nm_sockflags
& NMSOCK_UPCALL
) {
1956 /* upcall is already receiving data - just return */
1957 lck_mtx_unlock(&nmp
->nm_lock
);
1960 nmp
->nm_sockflags
|= NMSOCK_UPCALL
;
1965 /* read the TCP RPC record marker */
1966 while (!error
&& nmp
->nm_markerleft
) {
1967 aio
.iov_base
= ((char*)&nmp
->nm_fragleft
+
1968 sizeof(nmp
->nm_fragleft
) - nmp
->nm_markerleft
);
1969 aio
.iov_len
= nmp
->nm_markerleft
;
1970 bzero(&msg
, sizeof(msg
));
1973 lck_mtx_unlock(&nmp
->nm_lock
);
1974 error
= sock_receive(so
, &msg
, MSG_DONTWAIT
, &rcvlen
);
1975 lck_mtx_lock(&nmp
->nm_lock
);
1976 if (error
|| !rcvlen
)
1979 nmp
->nm_markerleft
-= rcvlen
;
1980 if (nmp
->nm_markerleft
)
1982 /* record marker complete */
1983 nmp
->nm_fragleft
= ntohl(nmp
->nm_fragleft
);
1984 if (nmp
->nm_fragleft
& 0x80000000) {
1985 nmp
->nm_sockflags
|= NMSOCK_LASTFRAG
;
1986 nmp
->nm_fragleft
&= ~0x80000000;
1988 nmp
->nm_reclen
+= nmp
->nm_fragleft
;
1989 if (nmp
->nm_reclen
> NFS_MAXPACKET
) {
1991 * This is SERIOUS! We are out of sync with the sender
1992 * and forcing a disconnect/reconnect is all I can do.
1994 log(LOG_ERR
, "%s (%d) from nfs server %s\n",
1995 "impossible RPC record length", nmp
->nm_reclen
,
1996 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
2001 /* read the TCP RPC record fragment */
2002 while (!error
&& !nmp
->nm_markerleft
&& nmp
->nm_fragleft
) {
2004 rcvlen
= nmp
->nm_fragleft
;
2005 lck_mtx_unlock(&nmp
->nm_lock
);
2006 error
= sock_receivembuf(so
, NULL
, &m
, MSG_DONTWAIT
, &rcvlen
);
2007 lck_mtx_lock(&nmp
->nm_lock
);
2008 if (error
|| !rcvlen
|| !m
)
2011 /* append mbufs to list */
2012 nmp
->nm_fragleft
-= rcvlen
;
2016 error
= mbuf_setnext(nmp
->nm_mlast
, m
);
2018 printf("nfs_tcp_rcv: mbuf_setnext failed %d\n", error
);
2023 while (mbuf_next(m
))
2028 /* done reading fragment? */
2030 if (!error
&& !nmp
->nm_markerleft
&& !nmp
->nm_fragleft
) {
2031 /* reset socket fragment parsing state */
2032 nmp
->nm_markerleft
= sizeof(nmp
->nm_fragleft
);
2033 if (nmp
->nm_sockflags
& NMSOCK_LASTFRAG
) {
2034 /* RPC record complete */
2036 /* reset socket record parsing state */
2038 nmp
->nm_m
= nmp
->nm_mlast
= NULL
;
2039 nmp
->nm_sockflags
&= ~NMSOCK_LASTFRAG
;
2043 if (m
) { /* match completed response with request */
2044 lck_mtx_unlock(&nmp
->nm_lock
);
2045 nfs_request_match_reply(nmp
, m
);
2046 lck_mtx_lock(&nmp
->nm_lock
);
2049 /* loop if we've been making error-free progress */
2053 nmp
->nm_sockflags
&= ~NMSOCK_UPCALL
;
2054 lck_mtx_unlock(&nmp
->nm_lock
);
2055 #ifdef NFS_SOCKET_DEBUGGING
2056 if (!recv
&& (error
!= EWOULDBLOCK
))
2057 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error
));
2059 /* note: no error and no data indicates server closed its end */
2060 if ((error
!= EWOULDBLOCK
) && (error
|| !recv
)) {
2061 /* problems with the socket... mark for reconnection */
2062 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error
));
2063 nfs_need_reconnect(nmp
);
2068 * "poke" a socket to try to provoke any pending errors
2071 nfs_sock_poke(struct nfsmount
*nmp
)
2079 lck_mtx_lock(&nmp
->nm_lock
);
2080 if ((nmp
->nm_sockflags
& NMSOCK_UNMOUNT
) || !nmp
->nm_so
) {
2081 lck_mtx_unlock(&nmp
->nm_lock
);
2084 lck_mtx_unlock(&nmp
->nm_lock
);
2085 aio
.iov_base
= &dummy
;
2088 bzero(&msg
, sizeof(msg
));
2091 error
= sock_send(nmp
->nm_so
, &msg
, MSG_DONTWAIT
, &len
);
2092 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error
));
2096 * Match an RPC reply with the corresponding request
2099 nfs_request_match_reply(struct nfsmount
*nmp
, mbuf_t mrep
)
2102 struct nfsm_chain nmrep
;
2103 u_int32_t reply
= 0, rxid
= 0;
2104 int error
= 0, asyncioq
, t1
;
2106 /* Get the xid and check that it is an rpc reply */
2107 nfsm_chain_dissect_init(error
, &nmrep
, mrep
);
2108 nfsm_chain_get_32(error
, &nmrep
, rxid
);
2109 nfsm_chain_get_32(error
, &nmrep
, reply
);
2110 if (error
|| (reply
!= RPC_REPLY
)) {
2111 OSAddAtomic(1, &nfsstats
.rpcinvalid
);
2117 * Loop through the request list to match up the reply
2118 * Iff no match, just drop it.
2120 lck_mtx_lock(nfs_request_mutex
);
2121 TAILQ_FOREACH(req
, &nfs_reqq
, r_chain
) {
2122 if (req
->r_nmrep
.nmc_mhead
|| (rxid
!= R_XID32(req
->r_xid
)))
2124 /* looks like we have it, grab lock and double check */
2125 lck_mtx_lock(&req
->r_mtx
);
2126 if (req
->r_nmrep
.nmc_mhead
|| (rxid
!= R_XID32(req
->r_xid
))) {
2127 lck_mtx_unlock(&req
->r_mtx
);
2131 req
->r_nmrep
= nmrep
;
2132 lck_mtx_lock(&nmp
->nm_lock
);
2133 if (nmp
->nm_sotype
== SOCK_DGRAM
) {
2135 * Update congestion window.
2136 * Do the additive increase of one rpc/rtt.
2138 FSDBG(530, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
2139 if (nmp
->nm_cwnd
<= nmp
->nm_sent
) {
2141 ((NFS_CWNDSCALE
* NFS_CWNDSCALE
) +
2142 (nmp
->nm_cwnd
>> 1)) / nmp
->nm_cwnd
;
2143 if (nmp
->nm_cwnd
> NFS_MAXCWND
)
2144 nmp
->nm_cwnd
= NFS_MAXCWND
;
2146 if (req
->r_flags
& R_CWND
) {
2147 nmp
->nm_sent
-= NFS_CWNDSCALE
;
2148 req
->r_flags
&= ~R_CWND
;
2150 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
2151 /* congestion window is open, poke the cwnd queue */
2152 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
2153 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
2154 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
2159 * Update rtt using a gain of 0.125 on the mean
2160 * and a gain of 0.25 on the deviation.
2162 if (req
->r_flags
& R_TIMING
) {
2164 * Since the timer resolution of
2165 * NFS_HZ is so course, it can often
2166 * result in r_rtt == 0. Since
2167 * r_rtt == N means that the actual
2168 * rtt is between N+dt and N+2-dt ticks,
2171 if (proct
[req
->r_procnum
] == 0)
2172 panic("nfs_request_match_reply: proct[%d] is zero", req
->r_procnum
);
2173 t1
= req
->r_rtt
+ 1;
2174 t1
-= (NFS_SRTT(req
) >> 3);
2175 NFS_SRTT(req
) += t1
;
2178 t1
-= (NFS_SDRTT(req
) >> 2);
2179 NFS_SDRTT(req
) += t1
;
2181 nmp
->nm_timeouts
= 0;
2182 lck_mtx_unlock(&nmp
->nm_lock
);
2183 /* signal anyone waiting on this request */
2185 asyncioq
= (req
->r_callback
.rcb_func
!= NULL
);
2186 if (req
->r_gss_ctx
!= NULL
)
2187 nfs_gss_clnt_rpcdone(req
);
2188 lck_mtx_unlock(&req
->r_mtx
);
2189 lck_mtx_unlock(nfs_request_mutex
);
2190 /* if it's an async RPC with a callback, queue it up */
2192 nfs_asyncio_finish(req
);
2197 /* not matched to a request, so drop it. */
2198 lck_mtx_unlock(nfs_request_mutex
);
2199 OSAddAtomic(1, &nfsstats
.rpcunexpected
);
2205 * Wait for the reply for a given request...
2206 * ...potentially resending the request if necessary.
2209 nfs_wait_reply(struct nfsreq
*req
)
2211 struct timespec ts
= { 2, 0 };
2212 int error
= 0, slpflag
;
2214 if (req
->r_nmp
&& (req
->r_nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
)
2219 lck_mtx_lock(&req
->r_mtx
);
2220 while (!req
->r_nmrep
.nmc_mhead
) {
2221 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
2223 if (((error
= req
->r_error
)) || req
->r_nmrep
.nmc_mhead
)
2225 /* check if we need to resend */
2226 if (req
->r_flags
& R_MUSTRESEND
) {
2227 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
2228 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
2229 req
->r_flags
|= R_SENDING
;
2230 lck_mtx_unlock(&req
->r_mtx
);
2231 if (req
->r_gss_ctx
) {
2233 * It's an RPCSEC_GSS mount.
2234 * Can't just resend the original request
2235 * without bumping the cred sequence number.
2236 * Go back and re-build the request.
2238 lck_mtx_lock(&req
->r_mtx
);
2239 req
->r_flags
&= ~R_SENDING
;
2240 lck_mtx_unlock(&req
->r_mtx
);
2243 error
= nfs_send(req
, 1);
2244 lck_mtx_lock(&req
->r_mtx
);
2245 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
2246 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
, error
));
2249 if (((error
= req
->r_error
)) || req
->r_nmrep
.nmc_mhead
)
2252 /* need to poll if we're P_NOREMOTEHANG */
2253 if (nfs_noremotehang(req
->r_thread
))
2255 msleep(req
, &req
->r_mtx
, slpflag
| (PZERO
- 1), "nfswaitreply", &ts
);
2258 lck_mtx_unlock(&req
->r_mtx
);
2264 * An NFS request goes something like this:
2265 * (nb: always frees up mreq mbuf list)
2266 * nfs_request_create()
2267 * - allocates a request struct if one is not provided
2268 * - initial fill-in of the request struct
2269 * nfs_request_add_header()
2270 * - add the RPC header
2271 * nfs_request_send()
2272 * - link it into list
2273 * - call nfs_send() for first transmit
2274 * nfs_request_wait()
2275 * - call nfs_wait_reply() to wait for the reply
2276 * nfs_request_finish()
2277 * - break down rpc header and return with error or nfs reply
2278 * pointed to by nmrep.
2279 * nfs_request_rele()
2280 * nfs_request_destroy()
2281 * - clean up the request struct
2282 * - free the request struct if it was allocated by nfs_request_create()
2286 * Set up an NFS request struct (allocating if no request passed in).
2291 mount_t mp
, /* used only if !np */
2292 struct nfsm_chain
*nmrest
,
2296 struct nfsreq
**reqp
)
2298 struct nfsreq
*req
, *newreq
= NULL
;
2299 struct nfsmount
*nmp
;
2303 /* allocate a new NFS request structure */
2304 MALLOC_ZONE(newreq
, struct nfsreq
*, sizeof(*newreq
), M_NFSREQ
, M_WAITOK
);
2306 mbuf_freem(nmrest
->nmc_mhead
);
2307 nmrest
->nmc_mhead
= NULL
;
2313 bzero(req
, sizeof(*req
));
2315 req
->r_flags
= R_ALLOCATED
;
2317 nmp
= VFSTONFS(np
? NFSTOMP(np
) : mp
);
2320 FREE_ZONE(newreq
, sizeof(*newreq
), M_NFSREQ
);
2323 lck_mtx_lock(&nmp
->nm_lock
);
2324 if ((nmp
->nm_state
& (NFSSTA_FORCE
|NFSSTA_TIMEO
)) ==
2325 (NFSSTA_FORCE
|NFSSTA_TIMEO
)) {
2326 lck_mtx_unlock(&nmp
->nm_lock
);
2327 mbuf_freem(nmrest
->nmc_mhead
);
2328 nmrest
->nmc_mhead
= NULL
;
2330 FREE_ZONE(newreq
, sizeof(*newreq
), M_NFSREQ
);
2334 if ((nmp
->nm_vers
!= NFS_VER4
) && (procnum
>= 0) && (procnum
< NFS_NPROCS
))
2335 OSAddAtomic(1, &nfsstats
.rpccnt
[procnum
]);
2336 if ((nmp
->nm_vers
== NFS_VER4
) && (procnum
!= NFSPROC4_COMPOUND
) && (procnum
!= NFSPROC4_NULL
))
2337 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum
);
2339 lck_mtx_init(&req
->r_mtx
, nfs_request_grp
, LCK_ATTR_NULL
);
2342 req
->r_thread
= thd
;
2343 if (IS_VALID_CRED(cred
)) {
2344 kauth_cred_ref(cred
);
2347 req
->r_procnum
= procnum
;
2348 if (proct
[procnum
] > 0)
2349 req
->r_flags
|= R_TIMING
;
2350 req
->r_nmrep
.nmc_mhead
= NULL
;
2351 SLIST_INIT(&req
->r_gss_seqlist
);
2352 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
2353 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
2354 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
2356 lck_mtx_unlock(&nmp
->nm_lock
);
2358 /* move the request mbuf chain to the nfsreq */
2359 req
->r_mrest
= nmrest
->nmc_mhead
;
2360 nmrest
->nmc_mhead
= NULL
;
2362 req
->r_flags
|= R_INITTED
;
2370 * Clean up and free an NFS request structure.
2373 nfs_request_destroy(struct nfsreq
*req
)
2375 struct nfsmount
*nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
2376 struct gss_seq
*gsp
, *ngsp
;
2377 struct timespec ts
= { 1, 0 };
2378 int clearjbtimeo
= 0;
2380 if (!req
|| !(req
->r_flags
& R_INITTED
))
2382 req
->r_flags
&= ~R_INITTED
;
2383 if (req
->r_lflags
& RL_QUEUED
)
2384 nfs_reqdequeue(req
);
2385 if (req
->r_achain
.tqe_next
!= NFSREQNOLIST
) {
2386 /* still on an async I/O queue? */
2387 lck_mtx_lock(nfsiod_mutex
);
2388 if (nmp
&& (req
->r_achain
.tqe_next
!= NFSREQNOLIST
)) {
2389 TAILQ_REMOVE(&nmp
->nm_iodq
, req
, r_achain
);
2390 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
2392 lck_mtx_unlock(nfsiod_mutex
);
2394 lck_mtx_lock(&req
->r_mtx
);
2396 lck_mtx_lock(&nmp
->nm_lock
);
2397 if (req
->r_rchain
.tqe_next
!= NFSREQNOLIST
) {
2398 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
2399 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
2400 if (req
->r_flags
& R_RESENDQ
)
2401 req
->r_flags
&= ~R_RESENDQ
;
2403 if (req
->r_cchain
.tqe_next
!= NFSREQNOLIST
) {
2404 TAILQ_REMOVE(&nmp
->nm_cwndq
, req
, r_cchain
);
2405 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
2407 if (req
->r_flags
& R_JBTPRINTFMSG
) {
2408 req
->r_flags
&= ~R_JBTPRINTFMSG
;
2410 clearjbtimeo
= (nmp
->nm_jbreqs
== 0) ? NFSSTA_JUKEBOXTIMEO
: 0;
2412 lck_mtx_unlock(&nmp
->nm_lock
);
2414 while (req
->r_flags
& R_RESENDQ
)
2415 msleep(req
, &req
->r_mtx
, (PZERO
- 1), "nfsresendqwait", &ts
);
2416 lck_mtx_unlock(&req
->r_mtx
);
2418 nfs_up(nmp
, req
->r_thread
, clearjbtimeo
, NULL
);
2420 mbuf_freem(req
->r_mhead
);
2421 else if (req
->r_mrest
)
2422 mbuf_freem(req
->r_mrest
);
2423 if (req
->r_nmrep
.nmc_mhead
)
2424 mbuf_freem(req
->r_nmrep
.nmc_mhead
);
2425 if (IS_VALID_CRED(req
->r_cred
))
2426 kauth_cred_unref(&req
->r_cred
);
2428 nfs_gss_clnt_rpcdone(req
);
2429 SLIST_FOREACH_SAFE(gsp
, &req
->r_gss_seqlist
, gss_seqnext
, ngsp
)
2432 nfs_gss_clnt_ctx_unref(req
);
2434 lck_mtx_destroy(&req
->r_mtx
, nfs_request_grp
);
2435 if (req
->r_flags
& R_ALLOCATED
)
2436 FREE_ZONE(req
, sizeof(*req
), M_NFSREQ
);
2440 nfs_request_ref(struct nfsreq
*req
, int locked
)
2443 lck_mtx_lock(&req
->r_mtx
);
2444 if (req
->r_refs
<= 0)
2445 panic("nfsreq reference error");
2448 lck_mtx_unlock(&req
->r_mtx
);
2452 nfs_request_rele(struct nfsreq
*req
)
2456 lck_mtx_lock(&req
->r_mtx
);
2457 if (req
->r_refs
<= 0)
2458 panic("nfsreq reference underflow");
2460 destroy
= (req
->r_refs
== 0);
2461 lck_mtx_unlock(&req
->r_mtx
);
2463 nfs_request_destroy(req
);
2468 * Add an (updated) RPC header with authorization to an NFS request.
2471 nfs_request_add_header(struct nfsreq
*req
)
2473 struct nfsmount
*nmp
;
2474 int error
= 0, auth_len
= 0;
2477 /* free up any previous header */
2478 if ((m
= req
->r_mhead
)) {
2479 while (m
&& (m
!= req
->r_mrest
))
2481 req
->r_mhead
= NULL
;
2484 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
2488 if (!req
->r_cred
) /* RPCAUTH_NULL */
2490 else switch (nmp
->nm_auth
) {
2492 if (req
->r_cred
->cr_ngroups
< 1)
2494 auth_len
= ((((req
->r_cred
->cr_ngroups
- 1) > nmp
->nm_numgrps
) ?
2495 nmp
->nm_numgrps
: (req
->r_cred
->cr_ngroups
- 1)) << 2) +
2501 auth_len
= 5 * NFSX_UNSIGNED
+ 0; // zero context handle for now
2505 error
= nfsm_rpchead(req
, auth_len
, req
->r_mrest
, &req
->r_xid
, &req
->r_mhead
);
2509 req
->r_mreqlen
= mbuf_pkthdr_len(req
->r_mhead
);
2510 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
2513 lck_mtx_lock(&nmp
->nm_lock
);
2514 if (nmp
->nm_flag
& NFSMNT_SOFT
)
2515 req
->r_retry
= nmp
->nm_retry
;
2517 req
->r_retry
= NFS_MAXREXMIT
+ 1; /* past clip limit */
2518 lck_mtx_unlock(&nmp
->nm_lock
);
2525 * Queue an NFS request up and send it out.
2528 nfs_request_send(struct nfsreq
*req
, int wait
)
2530 struct nfsmount
*nmp
;
2533 lck_mtx_lock(&req
->r_mtx
);
2534 req
->r_flags
|= R_SENDING
;
2535 lck_mtx_unlock(&req
->r_mtx
);
2537 lck_mtx_lock(nfs_request_mutex
);
2539 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
2541 lck_mtx_unlock(nfs_request_mutex
);
2546 if (!req
->r_start
) {
2547 req
->r_start
= now
.tv_sec
;
2548 req
->r_lastmsg
= now
.tv_sec
-
2549 ((nmp
->nm_tprintf_delay
) - (nmp
->nm_tprintf_initial_delay
));
2552 OSAddAtomic(1, &nfsstats
.rpcrequests
);
2555 * Chain request into list of outstanding requests. Be sure
2556 * to put it LAST so timer finds oldest requests first.
2557 * Make sure that the request queue timer is running
2558 * to check for possible request timeout.
2560 TAILQ_INSERT_TAIL(&nfs_reqq
, req
, r_chain
);
2561 req
->r_lflags
|= RL_QUEUED
;
2562 if (!nfs_request_timer_on
) {
2563 nfs_request_timer_on
= 1;
2564 nfs_interval_timer_start(nfs_request_timer_call
,
2567 lck_mtx_unlock(nfs_request_mutex
);
2569 /* Send the request... */
2570 return (nfs_send(req
, wait
));
2574 * Call nfs_wait_reply() to wait for the reply.
2577 nfs_request_wait(struct nfsreq
*req
)
2579 req
->r_error
= nfs_wait_reply(req
);
2583 * Finish up an NFS request by dequeueing it and
2584 * doing the initial NFS request reply processing.
2589 struct nfsm_chain
*nmrepp
,
2592 struct nfsmount
*nmp
;
2595 uint32_t verf_len
= 0;
2596 uint32_t reply_status
= 0;
2597 uint32_t rejected_status
= 0;
2598 uint32_t auth_status
= 0;
2599 uint32_t accepted_status
= 0;
2600 struct nfsm_chain nmrep
;
2601 int error
, auth
, clearjbtimeo
;
2603 error
= req
->r_error
;
2606 nmrepp
->nmc_mhead
= NULL
;
2608 /* RPC done, unlink the request. */
2609 nfs_reqdequeue(req
);
2611 mrep
= req
->r_nmrep
.nmc_mhead
;
2613 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
2616 * Decrement the outstanding request count.
2618 if ((req
->r_flags
& R_CWND
) && nmp
) {
2619 req
->r_flags
&= ~R_CWND
;
2620 lck_mtx_lock(&nmp
->nm_lock
);
2621 FSDBG(273, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
2622 nmp
->nm_sent
-= NFS_CWNDSCALE
;
2623 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
2624 /* congestion window is open, poke the cwnd queue */
2625 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
2626 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
2627 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
2630 lck_mtx_unlock(&nmp
->nm_lock
);
2633 if (req
->r_gss_ctx
) { // Using gss cred ?
2635 * If the request had an RPCSEC_GSS credential
2636 * then reset its sequence number bit in the
2639 nfs_gss_clnt_rpcdone(req
);
2642 * If we need to re-send, go back and re-build the
2643 * request based on a new sequence number.
2644 * Note that we're using the original XID.
2646 if (error
== EAGAIN
) {
2650 error
= nfs_gss_clnt_args_restore(req
); // remove any trailer mbufs
2651 req
->r_nmrep
.nmc_mhead
= NULL
;
2652 req
->r_flags
|= R_RESTART
;
2653 if (error
== ENEEDAUTH
) {
2654 req
->r_xid
= 0; // get a new XID
2662 * If there was a successful reply, make sure to mark the mount as up.
2663 * If a tprintf message was given (or if this is a timed-out soft mount)
2664 * then post a tprintf message indicating the server is alive again.
2667 if ((req
->r_flags
& R_TPRINTFMSG
) ||
2668 (nmp
&& (nmp
->nm_flag
& NFSMNT_SOFT
) &&
2669 ((nmp
->nm_state
& (NFSSTA_TIMEO
|NFSSTA_FORCE
)) == NFSSTA_TIMEO
)))
2670 nfs_up(nmp
, req
->r_thread
, NFSSTA_TIMEO
, "is alive again");
2672 nfs_up(nmp
, req
->r_thread
, NFSSTA_TIMEO
, NULL
);
2679 * break down the RPC header and check if ok
2681 nmrep
= req
->r_nmrep
;
2682 nfsm_chain_get_32(error
, &nmrep
, reply_status
);
2684 if (reply_status
== RPC_MSGDENIED
) {
2685 nfsm_chain_get_32(error
, &nmrep
, rejected_status
);
2687 if (rejected_status
== RPC_MISMATCH
) {
2691 nfsm_chain_get_32(error
, &nmrep
, auth_status
);
2693 switch (auth_status
) {
2694 case RPCSEC_GSS_CREDPROBLEM
:
2695 case RPCSEC_GSS_CTXPROBLEM
:
2697 * An RPCSEC_GSS cred or context problem.
2698 * We can't use it anymore.
2699 * Restore the args, renew the context
2700 * and set up for a resend.
2702 error
= nfs_gss_clnt_args_restore(req
);
2703 if (error
&& error
!= ENEEDAUTH
)
2707 error
= nfs_gss_clnt_ctx_renew(req
);
2712 req
->r_nmrep
.nmc_mhead
= NULL
;
2713 req
->r_xid
= 0; // get a new XID
2714 req
->r_flags
|= R_RESTART
;
2723 /* Now check the verifier */
2724 nfsm_chain_get_32(error
, &nmrep
, verf_type
); // verifier flavor
2725 nfsm_chain_get_32(error
, &nmrep
, verf_len
); // verifier length
2728 auth
= !req
->r_cred
? RPCAUTH_NULL
: nmp
->nm_auth
;
2732 /* Any AUTH_UNIX verifier is ignored */
2734 nfsm_chain_adv(error
, &nmrep
, nfsm_rndup(verf_len
));
2735 nfsm_chain_get_32(error
, &nmrep
, accepted_status
);
2740 error
= nfs_gss_clnt_verf_get(req
, &nmrep
,
2741 verf_type
, verf_len
, &accepted_status
);
2746 switch (accepted_status
) {
2748 if (req
->r_procnum
== NFSPROC_NULL
) {
2750 * The NFS null procedure is unique,
2751 * in not returning an NFS status.
2755 nfsm_chain_get_32(error
, &nmrep
, *status
);
2759 if ((nmp
->nm_vers
!= NFS_VER2
) && (*status
== NFSERR_TRYLATER
)) {
2761 * It's a JUKEBOX error - delay and try again
2763 int delay
, slpflag
= (nmp
->nm_flag
& NFSMNT_INT
) ? PCATCH
: 0;
2766 req
->r_nmrep
.nmc_mhead
= NULL
;
2767 if ((req
->r_delay
>= 30) && !(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
2768 /* we're not yet completely mounted and */
2769 /* we can't complete an RPC, so we fail */
2770 OSAddAtomic(1, &nfsstats
.rpctimeouts
);
2772 error
= req
->r_error
;
2775 req
->r_delay
= !req
->r_delay
? NFS_TRYLATERDEL
: (req
->r_delay
* 2);
2776 if (req
->r_delay
> 30)
2778 if (nmp
->nm_tprintf_initial_delay
&& (req
->r_delay
>= nmp
->nm_tprintf_initial_delay
)) {
2779 if (!(req
->r_flags
& R_JBTPRINTFMSG
)) {
2780 req
->r_flags
|= R_JBTPRINTFMSG
;
2781 lck_mtx_lock(&nmp
->nm_lock
);
2783 lck_mtx_unlock(&nmp
->nm_lock
);
2785 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_JUKEBOXTIMEO
,
2786 "resource temporarily unavailable (jukebox)");
2788 if ((nmp
->nm_flag
& NFSMNT_SOFT
) && (req
->r_delay
== 30)) {
2789 /* for soft mounts, just give up after a short while */
2790 OSAddAtomic(1, &nfsstats
.rpctimeouts
);
2792 error
= req
->r_error
;
2795 delay
= req
->r_delay
;
2796 if (req
->r_callback
.rcb_func
) {
2799 req
->r_resendtime
= now
.tv_sec
+ delay
;
2802 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
2804 tsleep(&lbolt
, PSOCK
|slpflag
, "nfs_jukebox_trylater", 0);
2805 } while (--delay
> 0);
2807 req
->r_xid
= 0; // get a new XID
2808 req
->r_flags
|= R_RESTART
;
2810 FSDBG(273, R_XID32(req
->r_xid
), nmp
, req
, NFSERR_TRYLATER
);
2814 if (req
->r_flags
& R_JBTPRINTFMSG
) {
2815 req
->r_flags
&= ~R_JBTPRINTFMSG
;
2816 lck_mtx_lock(&nmp
->nm_lock
);
2818 clearjbtimeo
= (nmp
->nm_jbreqs
== 0) ? NFSSTA_JUKEBOXTIMEO
: 0;
2819 lck_mtx_unlock(&nmp
->nm_lock
);
2820 nfs_up(nmp
, req
->r_thread
, clearjbtimeo
, "resource available again");
2823 if (*status
== NFS_OK
) {
2825 * Successful NFS request
2828 req
->r_nmrep
.nmc_mhead
= NULL
;
2831 /* Got an NFS error of some kind */
2834 * If the File Handle was stale, invalidate the
2835 * lookup cache, just in case.
2837 if ((*status
== ESTALE
) && req
->r_np
)
2838 cache_purge(NFSTOV(req
->r_np
));
2839 if (nmp
->nm_vers
== NFS_VER2
)
2843 req
->r_nmrep
.nmc_mhead
= NULL
;
2846 case RPC_PROGUNAVAIL
:
2847 error
= EPROGUNAVAIL
;
2849 case RPC_PROGMISMATCH
:
2850 error
= ERPCMISMATCH
;
2852 case RPC_PROCUNAVAIL
:
2853 error
= EPROCUNAVAIL
;
2858 case RPC_SYSTEM_ERR
:
2864 if (req
->r_flags
& R_JBTPRINTFMSG
) {
2865 req
->r_flags
&= ~R_JBTPRINTFMSG
;
2866 lck_mtx_lock(&nmp
->nm_lock
);
2868 clearjbtimeo
= (nmp
->nm_jbreqs
== 0) ? NFSSTA_JUKEBOXTIMEO
: 0;
2869 lck_mtx_unlock(&nmp
->nm_lock
);
2871 nfs_up(nmp
, req
->r_thread
, clearjbtimeo
, NULL
);
2873 FSDBG(273, R_XID32(req
->r_xid
), nmp
, req
,
2874 (!error
&& (*status
== NFS_OK
)) ? 0xf0f0f0f0 : error
);
2880 * Perform an NFS request synchronously.
2886 mount_t mp
, /* used only if !np */
2887 struct nfsm_chain
*nmrest
,
2890 struct nfsm_chain
*nmrepp
,
2894 return nfs_request2(np
, mp
, nmrest
, procnum
,
2895 vfs_context_thread(ctx
), vfs_context_ucred(ctx
),
2896 0, nmrepp
, xidp
, status
);
2902 mount_t mp
, /* used only if !np */
2903 struct nfsm_chain
*nmrest
,
2908 struct nfsm_chain
*nmrepp
,
2912 struct nfsreq rq
, *req
= &rq
;
2915 if ((error
= nfs_request_create(np
, mp
, nmrest
, procnum
, thd
, cred
, &req
)))
2917 req
->r_flags
|= (flags
& R_OPTMASK
);
2919 FSDBG_TOP(273, R_XID32(req
->r_xid
), np
, procnum
, 0);
2922 req
->r_flags
&= ~R_RESTART
;
2923 if ((error
= nfs_request_add_header(req
)))
2927 if ((error
= nfs_request_send(req
, 1)))
2929 nfs_request_wait(req
);
2930 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
2932 } while (req
->r_flags
& R_RESTART
);
2934 FSDBG_BOT(273, R_XID32(req
->r_xid
), np
, procnum
, error
);
2935 nfs_request_rele(req
);
2941 * Set up a new null proc request to exchange GSS context tokens with the
2942 * server. Associate the context that we are setting up with the request that we
2949 struct nfsm_chain
*nmrest
,
2953 struct nfs_gss_clnt_ctx
*cp
, /* Set to gss context to renew or setup */
2954 struct nfsm_chain
*nmrepp
,
2957 struct nfsreq rq
, *req
= &rq
;
2960 if ((error
= nfs_request_create(NULL
, mp
, nmrest
, NFSPROC_NULL
, thd
, cred
, &req
)))
2962 req
->r_flags
|= (flags
& R_OPTMASK
);
2965 printf("nfs_request_gss request has no context\n");
2966 nfs_request_rele(req
);
2967 return (NFSERR_EAUTH
);
2969 nfs_gss_clnt_ctx_ref(req
, cp
);
2971 FSDBG_TOP(273, R_XID32(req
->r_xid
), NULL
, NFSPROC_NULL
, 0);
2974 req
->r_flags
&= ~R_RESTART
;
2975 if ((error
= nfs_request_add_header(req
)))
2978 if ((error
= nfs_request_send(req
, 1)))
2980 nfs_request_wait(req
);
2981 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
2983 } while (req
->r_flags
& R_RESTART
);
2985 FSDBG_BOT(273, R_XID32(req
->r_xid
), NULL
, NFSPROC_NULL
, error
);
2986 nfs_request_rele(req
);
2991 * Create and start an asynchronous NFS request.
2996 mount_t mp
, /* used only if !np */
2997 struct nfsm_chain
*nmrest
,
3001 struct nfsreq_cbinfo
*cb
,
3002 struct nfsreq
**reqp
)
3007 error
= nfs_request_create(np
, mp
, nmrest
, procnum
, thd
, cred
, reqp
);
3009 FSDBG(274, (req
? R_XID32(req
->r_xid
) : 0), np
, procnum
, error
);
3012 req
->r_flags
|= R_ASYNC
;
3014 req
->r_callback
= *cb
;
3015 error
= nfs_request_add_header(req
);
3017 req
->r_flags
|= R_WAITSENT
;
3018 if (req
->r_callback
.rcb_func
)
3019 nfs_request_ref(req
, 0);
3020 error
= nfs_request_send(req
, 1);
3021 lck_mtx_lock(&req
->r_mtx
);
3022 if (!error
&& !(req
->r_flags
& R_SENT
) && req
->r_callback
.rcb_func
) {
3023 /* make sure to wait until this async I/O request gets sent */
3024 int slpflag
= (req
->r_nmp
&& (req
->r_nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
) ? PCATCH
: 0;
3025 struct timespec ts
= { 2, 0 };
3026 while (!(req
->r_flags
& R_SENT
)) {
3027 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
3029 msleep(req
, &req
->r_mtx
, slpflag
| (PZERO
- 1), "nfswaitsent", &ts
);
3033 sent
= req
->r_flags
& R_SENT
;
3034 lck_mtx_unlock(&req
->r_mtx
);
3035 if (error
&& req
->r_callback
.rcb_func
&& !sent
)
3036 nfs_request_rele(req
);
3038 FSDBG(274, R_XID32(req
->r_xid
), np
, procnum
, error
);
3039 if (error
|| req
->r_callback
.rcb_func
)
3040 nfs_request_rele(req
);
3045 * Wait for and finish an asynchronous NFS request.
3048 nfs_request_async_finish(
3050 struct nfsm_chain
*nmrepp
,
3054 int error
= 0, asyncio
= req
->r_callback
.rcb_func
? 1 : 0;
3056 lck_mtx_lock(&req
->r_mtx
);
3058 req
->r_flags
|= R_ASYNCWAIT
;
3059 while (req
->r_flags
& R_RESENDQ
) { /* wait until the request is off the resend queue */
3060 struct timespec ts
= { 2, 0 };
3061 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
3063 msleep(req
, &req
->r_mtx
, PZERO
-1, "nfsresendqwait", &ts
);
3065 lck_mtx_unlock(&req
->r_mtx
);
3068 nfs_request_wait(req
);
3069 error
= nfs_request_finish(req
, nmrepp
, status
);
3072 while (!error
&& (req
->r_flags
& R_RESTART
)) {
3073 if (asyncio
&& req
->r_resendtime
) { /* send later */
3074 lck_mtx_lock(&req
->r_mtx
);
3075 nfs_asyncio_resend(req
);
3076 lck_mtx_unlock(&req
->r_mtx
);
3077 return (EINPROGRESS
);
3080 req
->r_flags
&= ~R_RESTART
;
3081 if ((error
= nfs_request_add_header(req
)))
3083 if ((error
= nfs_request_send(req
, !asyncio
)))
3086 return (EINPROGRESS
);
3087 nfs_request_wait(req
);
3088 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
3094 FSDBG(275, R_XID32(req
->r_xid
), req
->r_np
, req
->r_procnum
, error
);
3095 nfs_request_rele(req
);
3100 * Cancel a pending asynchronous NFS request.
3103 nfs_request_async_cancel(struct nfsreq
*req
)
3105 nfs_reqdequeue(req
);
3106 FSDBG(275, R_XID32(req
->r_xid
), req
->r_np
, req
->r_procnum
, 0xD1ED1E);
3107 nfs_request_rele(req
);
3111 * Flag a request as being terminated.
3114 nfs_softterm(struct nfsreq
*req
)
3116 struct nfsmount
*nmp
= req
->r_nmp
;
3117 req
->r_flags
|= R_SOFTTERM
;
3118 req
->r_error
= ETIMEDOUT
;
3119 if (!(req
->r_flags
& R_CWND
) || !nmp
)
3121 /* update congestion window */
3122 req
->r_flags
&= ~R_CWND
;
3123 lck_mtx_lock(&nmp
->nm_lock
);
3124 FSDBG(532, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
3125 nmp
->nm_sent
-= NFS_CWNDSCALE
;
3126 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
3127 /* congestion window is open, poke the cwnd queue */
3128 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
3129 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
3130 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
3133 lck_mtx_unlock(&nmp
->nm_lock
);
3137 * Ensure req isn't in use by the timer, then dequeue it.
3140 nfs_reqdequeue(struct nfsreq
*req
)
3142 lck_mtx_lock(nfs_request_mutex
);
3143 while (req
->r_lflags
& RL_BUSY
) {
3144 req
->r_lflags
|= RL_WAITING
;
3145 msleep(&req
->r_lflags
, nfs_request_mutex
, PSOCK
, "reqdeq", NULL
);
3147 if (req
->r_lflags
& RL_QUEUED
) {
3148 TAILQ_REMOVE(&nfs_reqq
, req
, r_chain
);
3149 req
->r_lflags
&= ~RL_QUEUED
;
3151 lck_mtx_unlock(nfs_request_mutex
);
3155 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
3156 * free()'d out from under it.
3159 nfs_reqbusy(struct nfsreq
*req
)
3161 if (req
->r_lflags
& RL_BUSY
)
3162 panic("req locked");
3163 req
->r_lflags
|= RL_BUSY
;
3167 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
3170 nfs_reqnext(struct nfsreq
*req
)
3172 struct nfsreq
* nextreq
;
3177 * We need to get and busy the next req before signalling the
3178 * current one, otherwise wakeup() may block us and we'll race to
3179 * grab the next req.
3181 nextreq
= TAILQ_NEXT(req
, r_chain
);
3182 if (nextreq
!= NULL
)
3183 nfs_reqbusy(nextreq
);
3184 /* unbusy and signal. */
3185 req
->r_lflags
&= ~RL_BUSY
;
3186 if (req
->r_lflags
& RL_WAITING
) {
3187 req
->r_lflags
&= ~RL_WAITING
;
3188 wakeup(&req
->r_lflags
);
3194 * NFS request queue timer routine
3196 * Scan the NFS request queue for any requests that have timed out.
3198 * Alert the system of unresponsive servers.
3199 * Mark expired requests on soft mounts as terminated.
3200 * For UDP, mark/signal requests for retransmission.
3203 nfs_request_timer(__unused
void *param0
, __unused
void *param1
)
3206 struct nfsmount
*nmp
;
3207 int timeo
, maxtime
, finish_asyncio
, error
;
3209 TAILQ_HEAD(nfs_mount_pokeq
, nfsmount
) nfs_mount_poke_queue
;
3211 lck_mtx_lock(nfs_request_mutex
);
3212 req
= TAILQ_FIRST(&nfs_reqq
);
3213 if (req
== NULL
) { /* no requests - turn timer off */
3214 nfs_request_timer_on
= 0;
3215 lck_mtx_unlock(nfs_request_mutex
);
3220 TAILQ_INIT(&nfs_mount_poke_queue
);
3223 for ( ; req
!= NULL
; req
= nfs_reqnext(req
)) {
3225 if (!nmp
) /* unmounted */
3227 if (req
->r_error
|| req
->r_nmrep
.nmc_mhead
)
3229 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0))) {
3230 if (req
->r_callback
.rcb_func
!= NULL
) {
3231 /* async I/O RPC needs to be finished */
3232 lck_mtx_lock(&req
->r_mtx
);
3233 req
->r_error
= error
;
3234 finish_asyncio
= !(req
->r_flags
& R_WAITSENT
);
3236 lck_mtx_unlock(&req
->r_mtx
);
3238 nfs_asyncio_finish(req
);
3243 lck_mtx_lock(&req
->r_mtx
);
3245 if (nmp
->nm_tprintf_initial_delay
&&
3246 ((req
->r_rexmit
> 2) || (req
->r_flags
& R_RESENDERR
)) &&
3247 ((req
->r_lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
)) {
3248 req
->r_lastmsg
= now
.tv_sec
;
3249 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_TIMEO
,
3251 req
->r_flags
|= R_TPRINTFMSG
;
3252 lck_mtx_lock(&nmp
->nm_lock
);
3253 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
3254 lck_mtx_unlock(&nmp
->nm_lock
);
3255 /* we're not yet completely mounted and */
3256 /* we can't complete an RPC, so we fail */
3257 OSAddAtomic(1, &nfsstats
.rpctimeouts
);
3259 finish_asyncio
= ((req
->r_callback
.rcb_func
!= NULL
) && !(req
->r_flags
& R_WAITSENT
));
3261 lck_mtx_unlock(&req
->r_mtx
);
3263 nfs_asyncio_finish(req
);
3266 lck_mtx_unlock(&nmp
->nm_lock
);
3270 * Put a reasonable limit on the maximum timeout,
3271 * and reduce that limit when soft mounts get timeouts or are in reconnect.
3273 if (!(nmp
->nm_flag
& NFSMNT_SOFT
))
3274 maxtime
= NFS_MAXTIMEO
;
3275 else if ((req
->r_flags
& (R_SETUP
|R_RECOVER
)) ||
3276 ((nmp
->nm_reconnect_start
<= 0) || ((now
.tv_sec
- nmp
->nm_reconnect_start
) < 8)))
3277 maxtime
= (NFS_MAXTIMEO
/ (nmp
->nm_timeouts
+1))/2;
3279 maxtime
= NFS_MINTIMEO
/4;
3282 * Check for request timeout.
3284 if (req
->r_rtt
>= 0) {
3286 lck_mtx_lock(&nmp
->nm_lock
);
3287 if (req
->r_flags
& R_RESENDERR
) {
3288 /* with resend errors, retry every few seconds */
3291 if (req
->r_procnum
== NFSPROC_NULL
&& req
->r_gss_ctx
!= NULL
)
3292 timeo
= NFS_MINIDEMTIMEO
; // gss context setup
3293 else if (nmp
->nm_flag
& NFSMNT_DUMBTIMR
)
3294 timeo
= nmp
->nm_timeo
;
3296 timeo
= NFS_RTO(nmp
, proct
[req
->r_procnum
]);
3298 /* ensure 62.5 ms floor */
3299 while (16 * timeo
< hz
)
3301 if (nmp
->nm_timeouts
> 0)
3302 timeo
*= nfs_backoff
[nmp
->nm_timeouts
- 1];
3304 /* limit timeout to max */
3305 if (timeo
> maxtime
)
3307 if (req
->r_rtt
<= timeo
) {
3308 lck_mtx_unlock(&nmp
->nm_lock
);
3309 lck_mtx_unlock(&req
->r_mtx
);
3312 /* The request has timed out */
3313 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
3314 req
->r_procnum
, proct
[req
->r_procnum
],
3315 req
->r_xid
, req
->r_rtt
, timeo
, nmp
->nm_timeouts
,
3316 (now
.tv_sec
- req
->r_start
)*NFS_HZ
, maxtime
));
3317 if (nmp
->nm_timeouts
< 8)
3319 nfs_mount_check_dead_timeout(nmp
);
3320 /* if it's been a few seconds, try poking the socket */
3321 if ((nmp
->nm_sotype
== SOCK_STREAM
) &&
3322 ((now
.tv_sec
- req
->r_start
) >= 3) &&
3323 !(nmp
->nm_sockflags
& NMSOCK_POKE
)) {
3324 nmp
->nm_sockflags
|= NMSOCK_POKE
;
3325 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue
, nmp
, nm_pokeq
);
3327 lck_mtx_unlock(&nmp
->nm_lock
);
3330 /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */
3331 if (((nmp
->nm_flag
& NFSMNT_SOFT
) || (req
->r_flags
& (R_SETUP
|R_RECOVER
))) &&
3332 ((req
->r_rexmit
>= req
->r_retry
) || /* too many */
3333 ((now
.tv_sec
- req
->r_start
)*NFS_HZ
> maxtime
))) { /* too long */
3334 OSAddAtomic(1, &nfsstats
.rpctimeouts
);
3335 lck_mtx_lock(&nmp
->nm_lock
);
3336 if (!(nmp
->nm_state
& NFSSTA_TIMEO
)) {
3337 lck_mtx_unlock(&nmp
->nm_lock
);
3338 /* make sure we note the unresponsive server */
3339 /* (maxtime may be less than tprintf delay) */
3340 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_TIMEO
,
3342 req
->r_lastmsg
= now
.tv_sec
;
3343 req
->r_flags
|= R_TPRINTFMSG
;
3345 lck_mtx_unlock(&nmp
->nm_lock
);
3347 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
3348 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
,
3349 now
.tv_sec
- req
->r_start
));
3351 finish_asyncio
= ((req
->r_callback
.rcb_func
!= NULL
) && !(req
->r_flags
& R_WAITSENT
));
3353 lck_mtx_unlock(&req
->r_mtx
);
3355 nfs_asyncio_finish(req
);
3359 /* for TCP, only resend if explicitly requested */
3360 if ((nmp
->nm_sotype
== SOCK_STREAM
) && !(req
->r_flags
& R_MUSTRESEND
)) {
3361 if (++req
->r_rexmit
> NFS_MAXREXMIT
)
3362 req
->r_rexmit
= NFS_MAXREXMIT
;
3364 lck_mtx_unlock(&req
->r_mtx
);
3369 * The request needs to be (re)sent. Kick the requester to resend it.
3370 * (unless it's already marked as needing a resend)
3372 if ((req
->r_flags
& R_MUSTRESEND
) && (req
->r_rtt
== -1)) {
3373 lck_mtx_unlock(&req
->r_mtx
);
3376 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
3377 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
3378 req
->r_flags
|= R_MUSTRESEND
;
3381 if ((req
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
|R_SENDING
)) == R_ASYNC
)
3382 nfs_asyncio_resend(req
);
3383 lck_mtx_unlock(&req
->r_mtx
);
3386 lck_mtx_unlock(nfs_request_mutex
);
3388 /* poke any sockets */
3389 while ((nmp
= TAILQ_FIRST(&nfs_mount_poke_queue
))) {
3390 TAILQ_REMOVE(&nfs_mount_poke_queue
, nmp
, nm_pokeq
);
3392 lck_mtx_lock(&nmp
->nm_lock
);
3393 nmp
->nm_sockflags
&= ~NMSOCK_POKE
;
3394 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
))
3395 wakeup(&nmp
->nm_sockflags
);
3396 lck_mtx_unlock(&nmp
->nm_lock
);
3399 nfs_interval_timer_start(nfs_request_timer_call
, NFS_REQUESTDELAY
);
3403 * check a thread's proc for the "noremotehang" flag.
3406 nfs_noremotehang(thread_t thd
)
3408 proc_t p
= thd
? get_bsdthreadtask_info(thd
) : NULL
;
3409 return (p
&& proc_noremotehang(p
));
3413 * Test for a termination condition pending on the process.
3414 * This is used to determine if we need to bail on a mount.
3415 * ETIMEDOUT is returned if there has been a soft timeout.
3416 * EINTR is returned if there is a signal pending that is not being ignored
3417 * and the mount is interruptable, or if we are a thread that is in the process
3418 * of cancellation (also SIGKILL posted).
3421 nfs_sigintr(struct nfsmount
*nmp
, struct nfsreq
*req
, thread_t thd
, int nmplocked
)
3429 if (req
&& (req
->r_flags
& R_SOFTTERM
))
3430 return (ETIMEDOUT
); /* request has been terminated. */
3433 * If we're in the progress of a force unmount and there's
3434 * been a timeout, we're dead and fail IO.
3437 lck_mtx_lock(&nmp
->nm_lock
);
3438 if ((nmp
->nm_state
& NFSSTA_FORCE
) &&
3439 (nmp
->nm_state
& (NFSSTA_TIMEO
|NFSSTA_JUKEBOXTIMEO
|NFSSTA_LOCKTIMEO
))) {
3441 } else if (nmp
->nm_mountp
->mnt_kern_flag
& MNTK_FRCUNMOUNT
) {
3442 /* Someone is unmounting us, go soft and mark it. */
3443 nmp
->nm_flag
|= NFSMNT_SOFT
;
3444 nmp
->nm_state
|= NFSSTA_FORCE
;
3447 /* Check if the mount is marked dead. */
3448 if (!error
&& (nmp
->nm_state
& NFSSTA_DEAD
))
3452 * If the mount is hung and we've requested not to hang
3453 * on remote filesystems, then bail now.
3455 if (!error
&& (nmp
->nm_state
& NFSSTA_TIMEO
) && nfs_noremotehang(thd
))
3459 lck_mtx_unlock(&nmp
->nm_lock
);
3463 /* may not have a thread for async I/O */
3467 /* If this thread belongs to kernel task; then abort check is not needed */
3468 if ((current_proc() != kernproc
) && current_thread_aborted())
3471 /* mask off thread and process blocked signals. */
3472 if ((nmp
->nm_flag
& NFSMNT_INT
) && ((p
= get_bsdthreadtask_info(thd
))) &&
3473 proc_pendingsignals(p
, NFSINT_SIGMASK
))
3479 * Lock a socket against others.
3480 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
3481 * and also to avoid race conditions between the processes with nfs requests
3482 * in progress when a reconnect is necessary.
3485 nfs_sndlock(struct nfsreq
*req
)
3487 struct nfsmount
*nmp
= req
->r_nmp
;
3489 int error
= 0, slpflag
= 0;
3490 struct timespec ts
= { 0, 0 };
3495 lck_mtx_lock(&nmp
->nm_lock
);
3496 statep
= &nmp
->nm_state
;
3498 if ((nmp
->nm_flag
& NFSMNT_INT
) && req
->r_thread
)
3500 while (*statep
& NFSSTA_SNDLOCK
) {
3501 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 1)))
3503 *statep
|= NFSSTA_WANTSND
;
3504 if (nfs_noremotehang(req
->r_thread
))
3506 msleep(statep
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfsndlck", &ts
);
3507 if (slpflag
== PCATCH
) {
3513 *statep
|= NFSSTA_SNDLOCK
;
3514 lck_mtx_unlock(&nmp
->nm_lock
);
3519 * Unlock the stream socket for others.
3522 nfs_sndunlock(struct nfsreq
*req
)
3524 struct nfsmount
*nmp
= req
->r_nmp
;
3525 int *statep
, wake
= 0;
3529 lck_mtx_lock(&nmp
->nm_lock
);
3530 statep
= &nmp
->nm_state
;
3531 if ((*statep
& NFSSTA_SNDLOCK
) == 0)
3532 panic("nfs sndunlock");
3533 *statep
&= ~NFSSTA_SNDLOCK
;
3534 if (*statep
& NFSSTA_WANTSND
) {
3535 *statep
&= ~NFSSTA_WANTSND
;
3538 lck_mtx_unlock(&nmp
->nm_lock
);
3545 struct nfsmount
*nmp
,
3547 struct sockaddr_in
*saddr
,
3552 struct nfsm_chain
*nmrep
)
3554 int error
= 0, on
= 1, try, sendat
= 2;
3556 struct sockaddr_in sin
;
3557 struct timeval tv
= { 1, 0 };
3558 mbuf_t m
, mrep
= NULL
;
3560 uint32_t rxid
= 0, reply
= 0, reply_status
, rejected_status
;
3561 uint32_t verf_type
, verf_len
, accepted_status
;
3564 /* create socket and set options */
3565 if (((error
= sock_socket(saddr
->sin_family
, SOCK_DGRAM
, IPPROTO_UDP
, NULL
, NULL
, &so
))) ||
3566 ((error
= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &tv
, sizeof(tv
)))) ||
3567 ((error
= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &tv
, sizeof(tv
)))) ||
3568 ((error
= sock_setsockopt(so
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
)))))
3571 int portrange
= IP_PORTRANGE_LOW
;
3572 error
= sock_setsockopt(so
, IPPROTO_IP
, IP_PORTRANGE
, &portrange
, sizeof(portrange
));
3574 /* bind now to check for failure */
3575 sin
.sin_len
= sizeof (struct sockaddr_in
);
3576 sin
.sin_family
= AF_INET
;
3577 sin
.sin_addr
.s_addr
= INADDR_ANY
;
3579 error
= sock_bind(so
, (struct sockaddr
*) &sin
);
3583 for (try=0; try < timeo
; try++) {
3584 if ((error
= nfs_sigintr(nmp
, NULL
, thd
, 0)))
3586 if (!try || (try == sendat
)) {
3587 /* send the request (resending periodically) */
3588 if ((error
= mbuf_copym(mreq
, 0, MBUF_COPYALL
, MBUF_WAITOK
, &m
)))
3590 bzero(&msg
, sizeof(msg
));
3591 msg
.msg_name
= saddr
;
3592 msg
.msg_namelen
= saddr
->sin_len
;
3593 if ((error
= sock_sendmbuf(so
, &msg
, m
, 0, NULL
)))
3599 /* wait for the response */
3601 bzero(&msg
, sizeof(msg
));
3602 error
= sock_receivembuf(so
, &msg
, &mrep
, 0, &readlen
);
3603 if (error
== EWOULDBLOCK
)
3606 /* parse the response */
3607 nfsm_chain_dissect_init(error
, nmrep
, mrep
);
3608 nfsm_chain_get_32(error
, nmrep
, rxid
);
3609 nfsm_chain_get_32(error
, nmrep
, reply
);
3611 if ((rxid
!= xid
) || (reply
!= RPC_REPLY
))
3613 nfsm_chain_get_32(error
, nmrep
, reply_status
);
3615 if (reply_status
== RPC_MSGDENIED
) {
3616 nfsm_chain_get_32(error
, nmrep
, rejected_status
);
3618 error
= (rejected_status
== RPC_MISMATCH
) ? ENOTSUP
: EACCES
;
3621 nfsm_chain_get_32(error
, nmrep
, verf_type
); /* verifier flavor */
3622 nfsm_chain_get_32(error
, nmrep
, verf_len
); /* verifier length */
3625 nfsm_chain_adv(error
, nmrep
, nfsm_rndup(verf_len
));
3626 nfsm_chain_get_32(error
, nmrep
, accepted_status
);
3627 nfsm_assert(error
, (accepted_status
== RPC_SUCCESS
), EIO
);
3632 sock_shutdown(so
, SHUT_RDWR
);
3640 nfs_msg(thread_t thd
,
3645 proc_t p
= thd
? get_bsdthreadtask_info(thd
) : NULL
;
3649 tpr
= tprintf_open(p
);
3653 tprintf(tpr
, "nfs server %s: %s, error %d\n", server
, msg
, error
);
3655 tprintf(tpr
, "nfs server %s: %s\n", server
, msg
);
3661 nfs_down(struct nfsmount
*nmp
, thread_t thd
, int error
, int flags
, const char *msg
)
3663 int timeoutmask
, wasunresponsive
, unresponsive
, softnobrowse
;
3664 uint32_t do_vfs_signal
;
3670 lck_mtx_lock(&nmp
->nm_lock
);
3672 timeoutmask
= NFSSTA_TIMEO
| NFSSTA_LOCKTIMEO
| NFSSTA_JUKEBOXTIMEO
;
3673 if (nmp
->nm_flag
& NFSMNT_MUTEJUKEBOX
) /* jukebox timeouts don't count as unresponsive if muted */
3674 timeoutmask
&= ~NFSSTA_JUKEBOXTIMEO
;
3675 wasunresponsive
= (nmp
->nm_state
& timeoutmask
);
3677 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
3678 softnobrowse
= ((nmp
->nm_flag
& NFSMNT_SOFT
) && (vfs_flags(nmp
->nm_mountp
) & MNT_DONTBROWSE
));
3680 if ((flags
& NFSSTA_TIMEO
) && !(nmp
->nm_state
& NFSSTA_TIMEO
))
3681 nmp
->nm_state
|= NFSSTA_TIMEO
;
3682 if ((flags
& NFSSTA_LOCKTIMEO
) && !(nmp
->nm_state
& NFSSTA_LOCKTIMEO
))
3683 nmp
->nm_state
|= NFSSTA_LOCKTIMEO
;
3684 if ((flags
& NFSSTA_JUKEBOXTIMEO
) && !(nmp
->nm_state
& NFSSTA_JUKEBOXTIMEO
))
3685 nmp
->nm_state
|= NFSSTA_JUKEBOXTIMEO
;
3687 unresponsive
= (nmp
->nm_state
& timeoutmask
);
3689 if (unresponsive
&& (nmp
->nm_flag
& NFSMNT_DEADTIMEOUT
)) {
3691 if (!wasunresponsive
) {
3692 nmp
->nm_deadto_start
= now
.tv_sec
;
3693 nfs_mount_sock_thread_wake(nmp
);
3694 } else if ((now
.tv_sec
- nmp
->nm_deadto_start
) > nmp
->nm_deadtimeout
) {
3695 if (!(nmp
->nm_state
& NFSSTA_DEAD
))
3696 printf("nfs server %s: dead\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
3697 nmp
->nm_state
|= NFSSTA_DEAD
;
3700 lck_mtx_unlock(&nmp
->nm_lock
);
3702 if (nmp
->nm_state
& NFSSTA_DEAD
)
3703 do_vfs_signal
= VQ_DEAD
;
3704 else if (softnobrowse
|| wasunresponsive
|| !unresponsive
)
3707 do_vfs_signal
= VQ_NOTRESP
;
3709 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, do_vfs_signal
, 0);
3711 nfs_msg(thd
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, msg
, error
);
3715 nfs_up(struct nfsmount
*nmp
, thread_t thd
, int flags
, const char *msg
)
3717 int timeoutmask
, wasunresponsive
, unresponsive
, softnobrowse
;
3724 nfs_msg(thd
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, msg
, 0);
3726 lck_mtx_lock(&nmp
->nm_lock
);
3728 timeoutmask
= NFSSTA_TIMEO
| NFSSTA_LOCKTIMEO
| NFSSTA_JUKEBOXTIMEO
;
3729 if (nmp
->nm_flag
& NFSMNT_MUTEJUKEBOX
) /* jukebox timeouts don't count as unresponsive if muted */
3730 timeoutmask
&= ~NFSSTA_JUKEBOXTIMEO
;
3731 wasunresponsive
= (nmp
->nm_state
& timeoutmask
);
3733 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
3734 softnobrowse
= ((nmp
->nm_flag
& NFSMNT_SOFT
) && (vfs_flags(nmp
->nm_mountp
) & MNT_DONTBROWSE
));
3736 if ((flags
& NFSSTA_TIMEO
) && (nmp
->nm_state
& NFSSTA_TIMEO
))
3737 nmp
->nm_state
&= ~NFSSTA_TIMEO
;
3738 if ((flags
& NFSSTA_LOCKTIMEO
) && (nmp
->nm_state
& NFSSTA_LOCKTIMEO
))
3739 nmp
->nm_state
&= ~NFSSTA_LOCKTIMEO
;
3740 if ((flags
& NFSSTA_JUKEBOXTIMEO
) && (nmp
->nm_state
& NFSSTA_JUKEBOXTIMEO
))
3741 nmp
->nm_state
&= ~NFSSTA_JUKEBOXTIMEO
;
3743 unresponsive
= (nmp
->nm_state
& timeoutmask
);
3745 if (nmp
->nm_deadto_start
)
3746 nmp
->nm_deadto_start
= 0;
3747 lck_mtx_unlock(&nmp
->nm_lock
);
3752 do_vfs_signal
= (wasunresponsive
&& !unresponsive
);
3754 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, VQ_NOTRESP
, 1);
3758 #endif /* NFSCLIENT */
3763 * Generate the rpc reply header
3764 * siz arg. is used to decide if adding a cluster is worthwhile
3768 struct nfsrv_descript
*nd
,
3769 __unused
struct nfsrv_sock
*slp
,
3770 struct nfsm_chain
*nmrepp
,
3775 struct nfsm_chain nmrep
;
3778 err
= nd
->nd_repstat
;
3779 if (err
&& (nd
->nd_vers
== NFS_VER2
))
3783 * If this is a big reply, use a cluster else
3784 * try and leave leading space for the lower level headers.
3786 siz
+= RPC_REPLYSIZ
;
3787 if (siz
>= nfs_mbuf_minclsize
) {
3788 error
= mbuf_getpacket(MBUF_WAITOK
, &mrep
);
3790 error
= mbuf_gethdr(MBUF_WAITOK
, MBUF_TYPE_DATA
, &mrep
);
3793 /* unable to allocate packet */
3794 /* XXX should we keep statistics for these errors? */
3797 if (siz
< nfs_mbuf_minclsize
) {
3798 /* leave space for lower level headers */
3799 tl
= mbuf_data(mrep
);
3800 tl
+= 80/sizeof(*tl
); /* XXX max_hdr? XXX */
3801 mbuf_setdata(mrep
, tl
, 6 * NFSX_UNSIGNED
);
3803 nfsm_chain_init(&nmrep
, mrep
);
3804 nfsm_chain_add_32(error
, &nmrep
, nd
->nd_retxid
);
3805 nfsm_chain_add_32(error
, &nmrep
, RPC_REPLY
);
3806 if (err
== ERPCMISMATCH
|| (err
& NFSERR_AUTHERR
)) {
3807 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGDENIED
);
3808 if (err
& NFSERR_AUTHERR
) {
3809 nfsm_chain_add_32(error
, &nmrep
, RPC_AUTHERR
);
3810 nfsm_chain_add_32(error
, &nmrep
, (err
& ~NFSERR_AUTHERR
));
3812 nfsm_chain_add_32(error
, &nmrep
, RPC_MISMATCH
);
3813 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
3814 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
3818 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGACCEPTED
);
3819 if (nd
->nd_gss_context
!= NULL
) {
3820 /* RPCSEC_GSS verifier */
3821 error
= nfs_gss_svc_verf_put(nd
, &nmrep
);
3823 nfsm_chain_add_32(error
, &nmrep
, RPC_SYSTEM_ERR
);
3827 /* RPCAUTH_NULL verifier */
3828 nfsm_chain_add_32(error
, &nmrep
, RPCAUTH_NULL
);
3829 nfsm_chain_add_32(error
, &nmrep
, 0);
3831 /* accepted status */
3834 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGUNAVAIL
);
3837 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGMISMATCH
);
3838 /* XXX hard coded versions? */
3839 nfsm_chain_add_32(error
, &nmrep
, NFS_VER2
);
3840 nfsm_chain_add_32(error
, &nmrep
, NFS_VER3
);
3843 nfsm_chain_add_32(error
, &nmrep
, RPC_PROCUNAVAIL
);
3846 nfsm_chain_add_32(error
, &nmrep
, RPC_GARBAGE
);
3849 nfsm_chain_add_32(error
, &nmrep
, RPC_SUCCESS
);
3850 if (nd
->nd_gss_context
!= NULL
)
3851 error
= nfs_gss_svc_prepare_reply(nd
, &nmrep
);
3852 if (err
!= NFSERR_RETVOID
)
3853 nfsm_chain_add_32(error
, &nmrep
,
3854 (err
? nfsrv_errmap(nd
, err
) : 0));
3860 nfsm_chain_build_done(error
, &nmrep
);
3862 /* error composing reply header */
3863 /* XXX should we keep statistics for these errors? */
3869 if ((err
!= 0) && (err
!= NFSERR_RETVOID
))
3870 OSAddAtomic(1, &nfsstats
.srvrpc_errs
);
3875 * The nfs server send routine.
3877 * - return EINTR or ERESTART if interrupted by a signal
3878 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
3879 * - do any cleanup required by recoverable socket errors (???)
3882 nfsrv_send(struct nfsrv_sock
*slp
, mbuf_t nam
, mbuf_t top
)
3885 socket_t so
= slp
->ns_so
;
3886 struct sockaddr
*sendnam
;
3889 bzero(&msg
, sizeof(msg
));
3890 if (nam
&& !sock_isconnected(so
) && (slp
->ns_sotype
!= SOCK_STREAM
)) {
3891 if ((sendnam
= mbuf_data(nam
))) {
3892 msg
.msg_name
= (caddr_t
)sendnam
;
3893 msg
.msg_namelen
= sendnam
->sa_len
;
3896 error
= sock_sendmbuf(so
, &msg
, top
, 0, NULL
);
3899 log(LOG_INFO
, "nfsd send error %d\n", error
);
3901 if ((error
== EWOULDBLOCK
) && (slp
->ns_sotype
== SOCK_STREAM
))
3902 error
= EPIPE
; /* zap TCP sockets if they time out on send */
3904 /* Handle any recoverable (soft) socket errors here. (???) */
3905 if (error
!= EINTR
&& error
!= ERESTART
&& error
!= EIO
&&
3906 error
!= EWOULDBLOCK
&& error
!= EPIPE
)
3913 * Socket upcall routine for the nfsd sockets.
3914 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
3915 * Essentially do as much as possible non-blocking, else punt and it will
3916 * be called with MBUF_WAITOK from an nfsd.
3919 nfsrv_rcv(socket_t so
, caddr_t arg
, int waitflag
)
3921 struct nfsrv_sock
*slp
= (struct nfsrv_sock
*)arg
;
3923 if (!nfsd_thread_count
|| !(slp
->ns_flag
& SLP_VALID
))
3926 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
3927 nfsrv_rcv_locked(so
, slp
, waitflag
);
3928 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
3931 nfsrv_rcv_locked(socket_t so
, struct nfsrv_sock
*slp
, int waitflag
)
3933 mbuf_t m
, mp
, mhck
, m2
;
3934 int ns_flag
=0, error
;
3938 if ((slp
->ns_flag
& SLP_VALID
) == 0) {
3939 if (waitflag
== MBUF_DONTWAIT
)
3940 lck_rw_done(&slp
->ns_rwlock
);
3946 * Define this to test for nfsds handling this under heavy load.
3948 if (waitflag
== MBUF_DONTWAIT
) {
3949 ns_flag
= SLP_NEEDQ
;
3953 if (slp
->ns_sotype
== SOCK_STREAM
) {
3955 * If there are already records on the queue, defer soreceive()
3956 * to an(other) nfsd so that there is feedback to the TCP layer that
3957 * the nfs servers are heavily loaded.
3960 ns_flag
= SLP_NEEDQ
;
3967 bytes_read
= 1000000000;
3968 error
= sock_receivembuf(so
, NULL
, &mp
, MSG_DONTWAIT
, &bytes_read
);
3969 if (error
|| mp
== NULL
) {
3970 if (error
== EWOULDBLOCK
)
3971 ns_flag
= (waitflag
== MBUF_DONTWAIT
) ? SLP_NEEDQ
: 0;
3973 ns_flag
= SLP_DISCONN
;
3977 if (slp
->ns_rawend
) {
3978 if ((error
= mbuf_setnext(slp
->ns_rawend
, m
)))
3979 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error
);
3980 slp
->ns_cc
+= bytes_read
;
3983 slp
->ns_cc
= bytes_read
;
3985 while ((m2
= mbuf_next(m
)))
3990 * Now try and parse record(s) out of the raw stream data.
3992 error
= nfsrv_getstream(slp
, waitflag
);
3995 ns_flag
= SLP_DISCONN
;
3997 ns_flag
= SLP_NEEDQ
;
4000 struct sockaddr_storage nam
;
4002 if (slp
->ns_reccnt
>= nfsrv_sock_max_rec_queue_length
) {
4003 /* already have max # RPC records queued on this socket */
4004 ns_flag
= SLP_NEEDQ
;
4008 bzero(&msg
, sizeof(msg
));
4009 msg
.msg_name
= (caddr_t
)&nam
;
4010 msg
.msg_namelen
= sizeof(nam
);
4013 bytes_read
= 1000000000;
4014 error
= sock_receivembuf(so
, &msg
, &mp
, MSG_DONTWAIT
| MSG_NEEDSA
, &bytes_read
);
4016 if (msg
.msg_name
&& (mbuf_get(MBUF_WAITOK
, MBUF_TYPE_SONAME
, &mhck
) == 0)) {
4017 mbuf_setlen(mhck
, nam
.ss_len
);
4018 bcopy(&nam
, mbuf_data(mhck
), nam
.ss_len
);
4020 if (mbuf_setnext(m
, mp
)) {
4021 /* trouble... just drop it */
4022 printf("nfsrv_rcv: mbuf_setnext failed\n");
4030 mbuf_setnextpkt(slp
->ns_recend
, m
);
4033 slp
->ns_flag
|= SLP_DOREC
;
4036 mbuf_setnextpkt(m
, NULL
);
4043 * Now try and process the request records, non-blocking.
4047 slp
->ns_flag
|= ns_flag
;
4048 if (waitflag
== MBUF_DONTWAIT
) {
4049 int wake
= (slp
->ns_flag
& SLP_WORKTODO
);
4050 lck_rw_done(&slp
->ns_rwlock
);
4051 if (wake
&& nfsd_thread_count
) {
4052 lck_mtx_lock(nfsd_mutex
);
4053 nfsrv_wakenfsd(slp
);
4054 lck_mtx_unlock(nfsd_mutex
);
4060 * Try and extract an RPC request from the mbuf data list received on a
4061 * stream socket. The "waitflag" argument indicates whether or not it
4065 nfsrv_getstream(struct nfsrv_sock
*slp
, int waitflag
)
4068 char *cp1
, *cp2
, *mdata
;
4069 int len
, mlen
, error
;
4070 mbuf_t om
, m2
, recm
;
4073 if (slp
->ns_flag
& SLP_GETSTREAM
)
4074 panic("nfs getstream");
4075 slp
->ns_flag
|= SLP_GETSTREAM
;
4077 if (slp
->ns_reclen
== 0) {
4078 if (slp
->ns_cc
< NFSX_UNSIGNED
) {
4079 slp
->ns_flag
&= ~SLP_GETSTREAM
;
4083 mdata
= mbuf_data(m
);
4085 if (mlen
>= NFSX_UNSIGNED
) {
4086 bcopy(mdata
, (caddr_t
)&recmark
, NFSX_UNSIGNED
);
4087 mdata
+= NFSX_UNSIGNED
;
4088 mlen
-= NFSX_UNSIGNED
;
4089 mbuf_setdata(m
, mdata
, mlen
);
4091 cp1
= (caddr_t
)&recmark
;
4093 while (cp1
< ((caddr_t
)&recmark
) + NFSX_UNSIGNED
) {
4101 mbuf_setdata(m
, cp2
, mlen
);
4104 slp
->ns_cc
-= NFSX_UNSIGNED
;
4105 recmark
= ntohl(recmark
);
4106 slp
->ns_reclen
= recmark
& ~0x80000000;
4107 if (recmark
& 0x80000000)
4108 slp
->ns_flag
|= SLP_LASTFRAG
;
4110 slp
->ns_flag
&= ~SLP_LASTFRAG
;
4111 if (slp
->ns_reclen
<= 0 || slp
->ns_reclen
> NFS_MAXPACKET
) {
4112 slp
->ns_flag
&= ~SLP_GETSTREAM
;
4118 * Now get the record part.
4120 * Note that slp->ns_reclen may be 0. Linux sometimes
4121 * generates 0-length RPCs
4124 if (slp
->ns_cc
== slp
->ns_reclen
) {
4126 slp
->ns_raw
= slp
->ns_rawend
= NULL
;
4127 slp
->ns_cc
= slp
->ns_reclen
= 0;
4128 } else if (slp
->ns_cc
> slp
->ns_reclen
) {
4132 mdata
= mbuf_data(m
);
4134 while (len
< slp
->ns_reclen
) {
4135 if ((len
+ mlen
) > slp
->ns_reclen
) {
4136 if (mbuf_copym(m
, 0, slp
->ns_reclen
- len
, waitflag
, &m2
)) {
4137 slp
->ns_flag
&= ~SLP_GETSTREAM
;
4138 return (EWOULDBLOCK
);
4141 if (mbuf_setnext(om
, m2
)) {
4142 /* trouble... just drop it */
4143 printf("nfsrv_getstream: mbuf_setnext failed\n");
4145 slp
->ns_flag
&= ~SLP_GETSTREAM
;
4146 return (EWOULDBLOCK
);
4152 mdata
+= slp
->ns_reclen
- len
;
4153 mlen
-= slp
->ns_reclen
- len
;
4154 mbuf_setdata(m
, mdata
, mlen
);
4155 len
= slp
->ns_reclen
;
4156 } else if ((len
+ mlen
) == slp
->ns_reclen
) {
4161 if (mbuf_setnext(om
, NULL
)) {
4162 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
4163 slp
->ns_flag
&= ~SLP_GETSTREAM
;
4164 return (EWOULDBLOCK
);
4167 mdata
= mbuf_data(m
);
4173 mdata
= mbuf_data(m
);
4180 slp
->ns_flag
&= ~SLP_GETSTREAM
;
4185 * Accumulate the fragments into a record.
4187 if (slp
->ns_frag
== NULL
) {
4188 slp
->ns_frag
= recm
;
4191 while ((m2
= mbuf_next(m
)))
4193 if ((error
= mbuf_setnext(m
, recm
)))
4194 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error
);
4196 if (slp
->ns_flag
& SLP_LASTFRAG
) {
4198 mbuf_setnextpkt(slp
->ns_recend
, slp
->ns_frag
);
4200 slp
->ns_rec
= slp
->ns_frag
;
4201 slp
->ns_flag
|= SLP_DOREC
;
4203 slp
->ns_recend
= slp
->ns_frag
;
4204 slp
->ns_frag
= NULL
;
4210 * Parse an RPC header.
4214 struct nfsrv_sock
*slp
,
4216 struct nfsrv_descript
**ndp
)
4220 struct nfsrv_descript
*nd
;
4224 if (!(slp
->ns_flag
& (SLP_VALID
|SLP_DOREC
)) || (slp
->ns_rec
== NULL
))
4226 MALLOC_ZONE(nd
, struct nfsrv_descript
*,
4227 sizeof (struct nfsrv_descript
), M_NFSRVDESC
, M_WAITOK
);
4231 slp
->ns_rec
= mbuf_nextpkt(m
);
4233 mbuf_setnextpkt(m
, NULL
);
4235 slp
->ns_flag
&= ~SLP_DOREC
;
4236 slp
->ns_recend
= NULL
;
4239 if (mbuf_type(m
) == MBUF_TYPE_SONAME
) {
4242 if ((error
= mbuf_setnext(nam
, NULL
)))
4243 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error
);
4247 nfsm_chain_dissect_init(error
, &nd
->nd_nmreq
, m
);
4249 error
= nfsrv_getreq(nd
);
4253 FREE_ZONE(nd
, sizeof(*nd
), M_NFSRVDESC
);
4263 * Parse an RPC request
4265 * - fill in the cred struct.
4268 nfsrv_getreq(struct nfsrv_descript
*nd
)
4270 struct nfsm_chain
*nmreq
;
4272 u_int32_t nfsvers
, auth_type
;
4277 struct ucred temp_cred
;
4281 nd
->nd_gss_context
= NULL
;
4282 nd
->nd_gss_seqnum
= 0;
4283 nd
->nd_gss_mb
= NULL
;
4285 user_id
= group_id
= -2;
4286 val
= auth_type
= len
= 0;
4288 nmreq
= &nd
->nd_nmreq
;
4289 nfsm_chain_get_32(error
, nmreq
, nd
->nd_retxid
); // XID
4290 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Call
4291 if (!error
&& (val
!= RPC_CALL
))
4295 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Version
4297 if (val
!= RPC_VER2
) {
4298 nd
->nd_repstat
= ERPCMISMATCH
;
4299 nd
->nd_procnum
= NFSPROC_NOOP
;
4302 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Program Number
4304 if (val
!= NFS_PROG
) {
4305 nd
->nd_repstat
= EPROGUNAVAIL
;
4306 nd
->nd_procnum
= NFSPROC_NOOP
;
4309 nfsm_chain_get_32(error
, nmreq
, nfsvers
);// NFS Version Number
4311 if ((nfsvers
< NFS_VER2
) || (nfsvers
> NFS_VER3
)) {
4312 nd
->nd_repstat
= EPROGMISMATCH
;
4313 nd
->nd_procnum
= NFSPROC_NOOP
;
4316 nd
->nd_vers
= nfsvers
;
4317 nfsm_chain_get_32(error
, nmreq
, nd
->nd_procnum
);// NFS Procedure Number
4319 if ((nd
->nd_procnum
>= NFS_NPROCS
) ||
4320 ((nd
->nd_vers
== NFS_VER2
) && (nd
->nd_procnum
> NFSV2PROC_STATFS
))) {
4321 nd
->nd_repstat
= EPROCUNAVAIL
;
4322 nd
->nd_procnum
= NFSPROC_NOOP
;
4325 if (nfsvers
!= NFS_VER3
)
4326 nd
->nd_procnum
= nfsv3_procid
[nd
->nd_procnum
];
4327 nfsm_chain_get_32(error
, nmreq
, auth_type
); // Auth Flavor
4328 nfsm_chain_get_32(error
, nmreq
, len
); // Auth Length
4329 if (!error
&& (len
< 0 || len
> RPCAUTH_MAXSIZ
))
4333 /* Handle authentication */
4334 if (auth_type
== RPCAUTH_UNIX
) {
4335 if (nd
->nd_procnum
== NFSPROC_NULL
)
4337 nd
->nd_sec
= RPCAUTH_UNIX
;
4338 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
); // skip stamp
4339 nfsm_chain_get_32(error
, nmreq
, len
); // hostname length
4340 if (len
< 0 || len
> NFS_MAXNAMLEN
)
4342 nfsm_chain_adv(error
, nmreq
, nfsm_rndup(len
)); // skip hostname
4345 /* create a temporary credential using the bits from the wire */
4346 bzero(&temp_cred
, sizeof(temp_cred
));
4347 nfsm_chain_get_32(error
, nmreq
, user_id
);
4348 nfsm_chain_get_32(error
, nmreq
, group_id
);
4349 temp_cred
.cr_groups
[0] = group_id
;
4350 nfsm_chain_get_32(error
, nmreq
, len
); // extra GID count
4351 if ((len
< 0) || (len
> RPCAUTH_UNIXGIDS
))
4354 for (i
= 1; i
<= len
; i
++)
4356 nfsm_chain_get_32(error
, nmreq
, temp_cred
.cr_groups
[i
]);
4358 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
);
4360 ngroups
= (len
>= NGROUPS
) ? NGROUPS
: (len
+ 1);
4362 nfsrv_group_sort(&temp_cred
.cr_groups
[0], ngroups
);
4363 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
); // verifier flavor (should be AUTH_NONE)
4364 nfsm_chain_get_32(error
, nmreq
, len
); // verifier length
4365 if (len
< 0 || len
> RPCAUTH_MAXSIZ
)
4368 nfsm_chain_adv(error
, nmreq
, nfsm_rndup(len
));
4370 /* request creation of a real credential */
4371 temp_cred
.cr_uid
= user_id
;
4372 temp_cred
.cr_ngroups
= ngroups
;
4373 nd
->nd_cr
= kauth_cred_create(&temp_cred
);
4374 if (nd
->nd_cr
== NULL
) {
4375 nd
->nd_repstat
= ENOMEM
;
4376 nd
->nd_procnum
= NFSPROC_NOOP
;
4379 } else if (auth_type
== RPCSEC_GSS
) {
4380 error
= nfs_gss_svc_cred_get(nd
, nmreq
);
4382 if (error
== EINVAL
)
4383 goto nfsmout
; // drop the request
4384 nd
->nd_repstat
= error
;
4385 nd
->nd_procnum
= NFSPROC_NOOP
;
4389 if (nd
->nd_procnum
== NFSPROC_NULL
) // assume it's AUTH_NONE
4391 nd
->nd_repstat
= (NFSERR_AUTHERR
| AUTH_REJECTCRED
);
4392 nd
->nd_procnum
= NFSPROC_NOOP
;
4397 if (IS_VALID_CRED(nd
->nd_cr
))
4398 kauth_cred_unref(&nd
->nd_cr
);
4399 nfsm_chain_cleanup(nmreq
);
4404 * Search for a sleeping nfsd and wake it up.
4405 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
4406 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
4407 * Note: Must be called with nfsd_mutex held.
4410 nfsrv_wakenfsd(struct nfsrv_sock
*slp
)
4414 if ((slp
->ns_flag
& SLP_VALID
) == 0)
4417 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
4418 /* if there's work to do on this socket, make sure it's queued up */
4419 if ((slp
->ns_flag
& SLP_WORKTODO
) && !(slp
->ns_flag
& SLP_QUEUED
)) {
4420 TAILQ_INSERT_TAIL(&nfsrv_sockwait
, slp
, ns_svcq
);
4421 slp
->ns_flag
|= SLP_WAITQ
;
4423 lck_rw_done(&slp
->ns_rwlock
);
4425 /* wake up a waiting nfsd, if possible */
4426 nd
= TAILQ_FIRST(&nfsd_queue
);
4430 TAILQ_REMOVE(&nfsd_queue
, nd
, nfsd_queue
);
4431 nd
->nfsd_flag
&= ~NFSD_WAITING
;
4435 #endif /* NFSSERVER */