2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
69 * Socket operations for use by nfs
72 #include <sys/param.h>
73 #include <sys/systm.h>
75 #include <sys/signalvar.h>
76 #include <sys/kauth.h>
77 #include <sys/mount_internal.h>
78 #include <sys/kernel.h>
79 #include <sys/kpi_mbuf.h>
80 #include <sys/malloc.h>
81 #include <sys/vnode.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/syslog.h>
86 #include <sys/tprintf.h>
87 #include <libkern/OSAtomic.h>
90 #include <kern/clock.h>
91 #include <kern/task.h>
92 #include <kern/thread.h>
93 #include <kern/thread_call.h>
97 #include <netinet/in.h>
98 #include <netinet/tcp.h>
100 #include <nfs/rpcv2.h>
101 #include <nfs/krpc.h>
102 #include <nfs/nfsproto.h>
104 #include <nfs/xdr_subs.h>
105 #include <nfs/nfsm_subs.h>
106 #include <nfs/nfs_gss.h>
107 #include <nfs/nfsmount.h>
108 #include <nfs/nfsnode.h>
111 boolean_t
current_thread_aborted(void);
112 kern_return_t
thread_terminate(thread_t
);
116 int nfsrv_sock_max_rec_queue_length
= 128; /* max # RPC records queued on (UDP) socket */
118 int nfsrv_getstream(struct nfsrv_sock
*,int);
119 int nfsrv_getreq(struct nfsrv_descript
*);
120 extern int nfsv3_procid
[NFS_NPROCS
];
121 #endif /* NFSSERVER */
124 * compare two sockaddr structures
127 nfs_sockaddr_cmp(struct sockaddr
*sa1
, struct sockaddr
*sa2
)
133 if (sa1
->sa_family
!= sa2
->sa_family
)
134 return ((sa1
->sa_family
< sa2
->sa_family
) ? -1 : 1);
135 if (sa1
->sa_len
!= sa2
->sa_len
)
136 return ((sa1
->sa_len
< sa2
->sa_len
) ? -1 : 1);
137 if (sa1
->sa_family
== AF_INET
)
138 return (bcmp(&((struct sockaddr_in
*)sa1
)->sin_addr
,
139 &((struct sockaddr_in
*)sa2
)->sin_addr
, sizeof(((struct sockaddr_in
*)sa1
)->sin_addr
)));
140 if (sa1
->sa_family
== AF_INET6
)
141 return (bcmp(&((struct sockaddr_in6
*)sa1
)->sin6_addr
,
142 &((struct sockaddr_in6
*)sa2
)->sin6_addr
, sizeof(((struct sockaddr_in6
*)sa1
)->sin6_addr
)));
148 int nfs_reconnect(struct nfsmount
*);
149 int nfs_connect_setup(struct nfsmount
*);
150 void nfs_mount_sock_thread(void *, wait_result_t
);
151 void nfs_udp_rcv(socket_t
, void*, int);
152 void nfs_tcp_rcv(socket_t
, void*, int);
153 void nfs_sock_poke(struct nfsmount
*);
154 void nfs_request_match_reply(struct nfsmount
*, mbuf_t
);
155 void nfs_reqdequeue(struct nfsreq
*);
156 void nfs_reqbusy(struct nfsreq
*);
157 struct nfsreq
*nfs_reqnext(struct nfsreq
*);
158 int nfs_wait_reply(struct nfsreq
*);
159 void nfs_softterm(struct nfsreq
*);
160 int nfs_can_squish(struct nfsmount
*);
161 int nfs_is_squishy(struct nfsmount
*);
162 int nfs_is_dead(int, struct nfsmount
*);
164 #ifdef NFS_SOCKET_DEBUGGING
165 #define NFS_SOCK_DBG(X) printf X
167 #define NFS_SOCK_DBG(X)
171 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
172 * Use the mean and mean deviation of rtt for the appropriate type of rpc
173 * for the frequent rpcs and a default for the others.
174 * The justification for doing "other" this way is that these rpcs
175 * happen so infrequently that timer est. would probably be stale.
176 * Also, since many of these rpcs are
177 * non-idempotent, a conservative timeout is desired.
178 * getattr, lookup - A+2D
182 #define NFS_RTO(n, t) \
183 ((t) == 0 ? (n)->nm_timeo : \
185 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
186 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
187 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
188 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
191 * Defines which timer to use for the procnum.
198 static int proct
[NFS_NPROCS
] = {
199 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
203 * There is a congestion window for outstanding rpcs maintained per mount
204 * point. The cwnd size is adjusted in roughly the way that:
205 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
206 * SIGCOMM '88". ACM, August 1988.
207 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
208 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
209 * of rpcs is in progress.
210 * (The sent count and cwnd are scaled for integer arith.)
211 * Variants of "slow start" were tried and were found to be too much of a
212 * performance hit (ave. rtt 3 times larger),
213 * I suspect due to the large rtt that nfs rpcs have.
215 #define NFS_CWNDSCALE 256
216 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
217 static int nfs_backoff
[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
220 * Increment location index to next address/server/location.
223 nfs_location_next(struct nfs_fs_locations
*nlp
, struct nfs_location_index
*nlip
)
225 uint8_t loc
= nlip
->nli_loc
;
226 uint8_t serv
= nlip
->nli_serv
;
227 uint8_t addr
= nlip
->nli_addr
;
229 /* move to next address */
231 if (addr
>= nlp
->nl_locations
[loc
]->nl_servers
[serv
]->ns_addrcount
) {
232 /* no more addresses on current server, go to first address of next server */
236 if (serv
>= nlp
->nl_locations
[loc
]->nl_servcount
) {
237 /* no more servers on current location, go to first server of next location */
240 if (loc
>= nlp
->nl_numlocs
)
241 loc
= 0; /* after last location, wrap back around to first location */
245 * It's possible for this next server to not have any addresses.
246 * Check for that here and go to the next server.
247 * But bail out if we've managed to come back around to the original
248 * location that was passed in. (That would mean no servers had any
249 * addresses. And we don't want to spin here forever.)
251 if ((loc
== nlip
->nli_loc
) && (serv
== nlip
->nli_serv
) && (addr
== nlip
->nli_addr
))
253 if (addr
>= nlp
->nl_locations
[loc
]->nl_servers
[serv
]->ns_addrcount
)
257 nlip
->nli_serv
= serv
;
258 nlip
->nli_addr
= addr
;
262 * Compare two location indices.
265 nfs_location_index_cmp(struct nfs_location_index
*nlip1
, struct nfs_location_index
*nlip2
)
267 if (nlip1
->nli_loc
!= nlip2
->nli_loc
)
268 return (nlip1
->nli_loc
- nlip2
->nli_loc
);
269 if (nlip1
->nli_serv
!= nlip2
->nli_serv
)
270 return (nlip1
->nli_serv
- nlip2
->nli_serv
);
271 return (nlip1
->nli_addr
- nlip2
->nli_addr
);
275 * Get the mntfromname (or path portion only) for a given location.
278 nfs_location_mntfromname(struct nfs_fs_locations
*locs
, struct nfs_location_index idx
, char *s
, int size
, int pathonly
)
280 struct nfs_fs_location
*fsl
= locs
->nl_locations
[idx
.nli_loc
];
286 cnt
= snprintf(p
, size
, "%s:", fsl
->nl_servers
[idx
.nli_serv
]->ns_name
);
290 if (fsl
->nl_path
.np_compcount
== 0) {
291 /* mounting root export on server */
298 /* append each server path component */
299 for (i
=0; (size
> 0) && (i
< (int)fsl
->nl_path
.np_compcount
); i
++) {
300 cnt
= snprintf(p
, size
, "/%s", fsl
->nl_path
.np_components
[i
]);
307 * NFS client connect socket upcall.
308 * (Used only during socket connect/search.)
311 nfs_connect_upcall(socket_t so
, void *arg
, __unused
int waitflag
)
313 struct nfs_socket
*nso
= arg
;
316 int error
= 0, recv
= 1;
318 if (nso
->nso_flags
& NSO_CONNECTING
) {
319 NFS_SOCK_DBG(("nfs connect - socket %p upcall - connecting\n", nso
));
320 wakeup(nso
->nso_wake
);
324 lck_mtx_lock(&nso
->nso_lock
);
325 if ((nso
->nso_flags
& (NSO_UPCALL
|NSO_DISCONNECTING
|NSO_DEAD
)) || !(nso
->nso_flags
& NSO_PINGING
)) {
326 NFS_SOCK_DBG(("nfs connect - socket %p upcall - nevermind\n", nso
));
327 lck_mtx_unlock(&nso
->nso_lock
);
330 NFS_SOCK_DBG(("nfs connect - socket %p upcall\n", nso
));
331 nso
->nso_flags
|= NSO_UPCALL
;
333 /* loop while we make error-free progress */
334 while (!error
&& recv
) {
335 /* make sure we're still interested in this socket */
336 if (nso
->nso_flags
& (NSO_DISCONNECTING
|NSO_DEAD
))
338 lck_mtx_unlock(&nso
->nso_lock
);
340 if (nso
->nso_sotype
== SOCK_STREAM
) {
341 error
= nfs_rpc_record_read(so
, &nso
->nso_rrs
, MSG_DONTWAIT
, &recv
, &m
);
344 error
= sock_receivembuf(so
, NULL
, &m
, MSG_DONTWAIT
, &rcvlen
);
347 lck_mtx_lock(&nso
->nso_lock
);
349 /* match response with request */
350 struct nfsm_chain nmrep
;
351 uint32_t reply
= 0, rxid
= 0, verf_type
, verf_len
;
352 uint32_t reply_status
, rejected_status
, accepted_status
;
354 nfsm_chain_dissect_init(error
, &nmrep
, m
);
355 nfsm_chain_get_32(error
, &nmrep
, rxid
);
356 nfsm_chain_get_32(error
, &nmrep
, reply
);
357 if (!error
&& ((reply
!= RPC_REPLY
) || (rxid
!= nso
->nso_pingxid
)))
359 nfsm_chain_get_32(error
, &nmrep
, reply_status
);
360 if (!error
&& (reply_status
== RPC_MSGDENIED
)) {
361 nfsm_chain_get_32(error
, &nmrep
, rejected_status
);
363 error
= (rejected_status
== RPC_MISMATCH
) ? ERPCMISMATCH
: EACCES
;
365 nfsm_chain_get_32(error
, &nmrep
, verf_type
); /* verifier flavor */
366 nfsm_chain_get_32(error
, &nmrep
, verf_len
); /* verifier length */
369 nfsm_chain_adv(error
, &nmrep
, nfsm_rndup(verf_len
));
370 nfsm_chain_get_32(error
, &nmrep
, accepted_status
);
372 if ((accepted_status
== RPC_PROGMISMATCH
) && !nso
->nso_version
) {
373 uint32_t minvers
, maxvers
;
374 nfsm_chain_get_32(error
, &nmrep
, minvers
);
375 nfsm_chain_get_32(error
, &nmrep
, maxvers
);
377 if (nso
->nso_protocol
== PMAPPROG
) {
378 if ((minvers
> RPCBVERS4
) || (maxvers
< PMAPVERS
))
379 error
= EPROGMISMATCH
;
380 else if ((nso
->nso_saddr
->sa_family
== AF_INET
) &&
381 (PMAPVERS
>= minvers
) && (PMAPVERS
<= maxvers
))
382 nso
->nso_version
= PMAPVERS
;
383 else if (nso
->nso_saddr
->sa_family
== AF_INET6
) {
384 if ((RPCBVERS4
>= minvers
) && (RPCBVERS4
<= maxvers
))
385 nso
->nso_version
= RPCBVERS4
;
386 else if ((RPCBVERS3
>= minvers
) && (RPCBVERS3
<= maxvers
))
387 nso
->nso_version
= RPCBVERS3
;
389 } else if (nso
->nso_protocol
== NFS_PROG
) {
390 if ((minvers
> NFS_VER4
) || (maxvers
< NFS_VER2
))
391 error
= EPROGMISMATCH
;
392 else if ((NFS_VER3
>= minvers
) && (NFS_VER3
<= maxvers
))
393 nso
->nso_version
= NFS_VER3
;
394 else if ((NFS_VER2
>= minvers
) && (NFS_VER2
<= maxvers
))
395 nso
->nso_version
= NFS_VER2
;
396 else if ((NFS_VER4
>= minvers
) && (NFS_VER4
<= maxvers
))
397 nso
->nso_version
= NFS_VER4
;
399 if (!error
&& nso
->nso_version
)
400 accepted_status
= RPC_SUCCESS
;
403 switch (accepted_status
) {
407 case RPC_PROGUNAVAIL
:
408 error
= EPROGUNAVAIL
;
410 case RPC_PROGMISMATCH
:
411 error
= EPROGMISMATCH
;
413 case RPC_PROCUNAVAIL
:
414 error
= EPROCUNAVAIL
;
426 nso
->nso_flags
&= ~NSO_PINGING
;
428 nso
->nso_error
= error
;
429 nso
->nso_flags
|= NSO_DEAD
;
431 nso
->nso_flags
|= NSO_VERIFIED
;
434 /* wake up search thread */
435 wakeup(nso
->nso_wake
);
440 nso
->nso_flags
&= ~NSO_UPCALL
;
441 if ((error
!= EWOULDBLOCK
) && (error
|| !recv
)) {
442 /* problems with the socket... */
443 nso
->nso_error
= error
? error
: EPIPE
;
444 nso
->nso_flags
|= NSO_DEAD
;
445 wakeup(nso
->nso_wake
);
447 if (nso
->nso_flags
& NSO_DISCONNECTING
)
448 wakeup(&nso
->nso_flags
);
449 lck_mtx_unlock(&nso
->nso_lock
);
453 * Create/initialize an nfs_socket structure.
457 __unused
struct nfsmount
*nmp
,
464 struct nfs_socket
**nsop
)
466 struct nfs_socket
*nso
;
469 #ifdef NFS_SOCKET_DEBUGGING
470 char naddr
[MAX_IPv6_STR_LEN
];
473 if (sa
->sa_family
== AF_INET
)
474 sinaddr
= &((struct sockaddr_in
*)sa
)->sin_addr
;
476 sinaddr
= &((struct sockaddr_in6
*)sa
)->sin6_addr
;
477 if (inet_ntop(sa
->sa_family
, sinaddr
, naddr
, sizeof(naddr
)) != naddr
)
478 strlcpy(naddr
, "<unknown>", sizeof(naddr
));
483 /* Create the socket. */
484 MALLOC(nso
, struct nfs_socket
*, sizeof(struct nfs_socket
), M_TEMP
, M_WAITOK
|M_ZERO
);
486 MALLOC(nso
->nso_saddr
, struct sockaddr
*, sa
->sa_len
, M_SONAME
, M_WAITOK
|M_ZERO
);
487 if (!nso
|| !nso
->nso_saddr
) {
492 lck_mtx_init(&nso
->nso_lock
, nfs_request_grp
, LCK_ATTR_NULL
);
493 nso
->nso_sotype
= sotype
;
494 if (nso
->nso_sotype
== SOCK_STREAM
)
495 nfs_rpc_record_state_init(&nso
->nso_rrs
);
497 nso
->nso_timestamp
= now
.tv_sec
;
498 bcopy(sa
, nso
->nso_saddr
, sa
->sa_len
);
499 if (sa
->sa_family
== AF_INET
)
500 ((struct sockaddr_in
*)nso
->nso_saddr
)->sin_port
= htons(port
);
501 else if (sa
->sa_family
== AF_INET6
)
502 ((struct sockaddr_in6
*)nso
->nso_saddr
)->sin6_port
= htons(port
);
503 nso
->nso_protocol
= protocol
;
504 nso
->nso_version
= vers
;
506 error
= sock_socket(sa
->sa_family
, nso
->nso_sotype
, 0, NULL
, NULL
, &nso
->nso_so
);
508 /* Some servers require that the client port be a reserved port number. */
509 if (!error
&& resvport
&& ((sa
->sa_family
== AF_INET
) || (sa
->sa_family
== AF_INET6
))) {
510 struct sockaddr_storage ss
;
511 int level
= (sa
->sa_family
== AF_INET
) ? IPPROTO_IP
: IPPROTO_IPV6
;
512 int optname
= (sa
->sa_family
== AF_INET
) ? IP_PORTRANGE
: IPV6_PORTRANGE
;
513 int portrange
= IP_PORTRANGE_LOW
;
515 error
= sock_setsockopt(nso
->nso_so
, level
, optname
, &portrange
, sizeof(portrange
));
516 if (!error
) { /* bind now to check for failure */
517 ss
.ss_len
= sa
->sa_len
;
518 ss
.ss_family
= sa
->sa_family
;
519 if (ss
.ss_family
== AF_INET
) {
520 ((struct sockaddr_in
*)&ss
)->sin_addr
.s_addr
= INADDR_ANY
;
521 ((struct sockaddr_in
*)&ss
)->sin_port
= htons(0);
522 } else if (ss
.ss_family
== AF_INET6
) {
523 ((struct sockaddr_in6
*)&ss
)->sin6_addr
= in6addr_any
;
524 ((struct sockaddr_in6
*)&ss
)->sin6_port
= htons(0);
529 error
= sock_bind(nso
->nso_so
, (struct sockaddr
*)&ss
);
534 NFS_SOCK_DBG(("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n",
535 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, error
, nso
, naddr
, sotype
,
536 resvport
? "r" : "", port
, protocol
, vers
));
537 nfs_socket_destroy(nso
);
539 NFS_SOCK_DBG(("nfs connect %s created socket %p %s type %d%s port %d prot %d %d\n",
540 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
, naddr
,
541 sotype
, resvport
? "r" : "", port
, protocol
, vers
));
548 * Destroy an nfs_socket structure.
551 nfs_socket_destroy(struct nfs_socket
*nso
)
553 struct timespec ts
= { 4, 0 };
555 lck_mtx_lock(&nso
->nso_lock
);
556 nso
->nso_flags
|= NSO_DISCONNECTING
;
557 if (nso
->nso_flags
& NSO_UPCALL
) /* give upcall a chance to complete */
558 msleep(&nso
->nso_flags
, &nso
->nso_lock
, PZERO
-1, "nfswaitupcall", &ts
);
559 lck_mtx_unlock(&nso
->nso_lock
);
560 sock_shutdown(nso
->nso_so
, SHUT_RDWR
);
561 sock_close(nso
->nso_so
);
562 if (nso
->nso_sotype
== SOCK_STREAM
)
563 nfs_rpc_record_state_cleanup(&nso
->nso_rrs
);
564 lck_mtx_destroy(&nso
->nso_lock
, nfs_request_grp
);
566 FREE(nso
->nso_saddr
, M_SONAME
);
568 FREE(nso
->nso_saddr2
, M_SONAME
);
569 NFS_SOCK_DBG(("nfs connect - socket %p destroyed\n", nso
));
574 * Set common socket options on an nfs_socket.
577 nfs_socket_options(struct nfsmount
*nmp
, struct nfs_socket
*nso
)
580 * Set socket send/receive timeouts
581 * - Receive timeout shouldn't matter because most receives are performed
582 * in the socket upcall non-blocking.
583 * - Send timeout should allow us to react to a blocked socket.
584 * Soft mounts will want to abort sooner.
586 struct timeval timeo
;
590 timeo
.tv_sec
= (NMFLAG(nmp
, SOFT
) || nfs_can_squish(nmp
)) ? 5 : 60;
591 sock_setsockopt(nso
->nso_so
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
592 sock_setsockopt(nso
->nso_so
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
593 if (nso
->nso_sotype
== SOCK_STREAM
) {
594 /* Assume that SOCK_STREAM always requires a connection */
595 sock_setsockopt(nso
->nso_so
, SOL_SOCKET
, SO_KEEPALIVE
, &on
, sizeof(on
));
596 /* set nodelay for TCP */
597 sock_gettype(nso
->nso_so
, NULL
, NULL
, &proto
);
598 if (proto
== IPPROTO_TCP
)
599 sock_setsockopt(nso
->nso_so
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
601 if (nso
->nso_sotype
== SOCK_DGRAM
) { /* set socket buffer sizes for UDP */
602 int reserve
= NFS_UDPSOCKBUF
;
603 sock_setsockopt(nso
->nso_so
, SOL_SOCKET
, SO_SNDBUF
, &reserve
, sizeof(reserve
));
604 sock_setsockopt(nso
->nso_so
, SOL_SOCKET
, SO_RCVBUF
, &reserve
, sizeof(reserve
));
606 /* set SO_NOADDRERR to detect network changes ASAP */
607 sock_setsockopt(nso
->nso_so
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
));
608 /* just playin' it safe with upcalls */
609 sock_setsockopt(nso
->nso_so
, SOL_SOCKET
, SO_UPCALLCLOSEWAIT
, &on
, sizeof(on
));
610 /* socket should be interruptible if the mount is */
611 if (!NMFLAG(nmp
, INTR
))
612 sock_nointerrupt(nso
->nso_so
, 1);
616 * Release resources held in an nfs_socket_search.
619 nfs_socket_search_cleanup(struct nfs_socket_search
*nss
)
621 struct nfs_socket
*nso
, *nsonext
;
623 TAILQ_FOREACH_SAFE(nso
, &nss
->nss_socklist
, nso_link
, nsonext
) {
624 TAILQ_REMOVE(&nss
->nss_socklist
, nso
, nso_link
);
626 nfs_socket_destroy(nso
);
629 nfs_socket_destroy(nss
->nss_sock
);
630 nss
->nss_sock
= NULL
;
635 * Prefer returning certain errors over others.
636 * This function returns a ranking of the given error.
639 nfs_connect_error_class(int error
)
674 * Make sure a socket search returns the best error.
677 nfs_socket_search_update_error(struct nfs_socket_search
*nss
, int error
)
679 if (nfs_connect_error_class(error
) >= nfs_connect_error_class(nss
->nss_error
))
680 nss
->nss_error
= error
;
684 * Continue the socket search until we have something to report.
687 nfs_connect_search_loop(struct nfsmount
*nmp
, struct nfs_socket_search
*nss
)
689 struct nfs_socket
*nso
, *nsonext
;
691 struct nfs_fs_location
*fsl
;
692 struct nfs_fs_server
*fss
;
693 struct sockaddr_storage ss
;
695 int error
, nomore
= 0;
699 NFS_SOCK_DBG(("nfs connect %s search %ld\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, now
.tv_sec
));
701 /* Time to start another socket? */
702 while ((nss
->nss_last
< 0) || (nss
->nss_sockcnt
== 0) ||
703 ((nss
->nss_sockcnt
< 4) && (now
.tv_sec
>= (nss
->nss_last
+ 2)))) {
704 if (nmp
->nm_sockflags
& NMSOCK_UNMOUNT
)
706 /* Find the next address to try... */
707 /* Have we run out of locations? */
708 if (!nomore
&& (nss
->nss_last
!= -1) && !nfs_location_index_cmp(&nss
->nss_nextloc
, &nss
->nss_startloc
))
711 if (nss
->nss_last
< 0)
712 nss
->nss_last
= now
.tv_sec
;
715 /* Can we convert the address to a sockaddr? */
716 fsl
= nmp
->nm_locations
.nl_locations
[nss
->nss_nextloc
.nli_loc
];
717 fss
= fsl
->nl_servers
[nss
->nss_nextloc
.nli_serv
];
718 addrstr
= fss
->ns_addresses
[nss
->nss_nextloc
.nli_addr
];
719 if (!nfs_uaddr2sockaddr(addrstr
, (struct sockaddr
*)&ss
)) {
720 nfs_location_next(&nmp
->nm_locations
, &nss
->nss_nextloc
);
724 /* Check that socket family is acceptable. */
725 if (nmp
->nm_sofamily
&& (ss
.ss_family
!= nmp
->nm_sofamily
)) {
726 nfs_location_next(&nmp
->nm_locations
, &nss
->nss_nextloc
);
731 /* Create the socket. */
732 error
= nfs_socket_create(nmp
, (struct sockaddr
*)&ss
, nss
->nss_sotype
,
733 nss
->nss_port
, nss
->nss_protocol
, nss
->nss_version
,
734 ((nss
->nss_protocol
== NFS_PROG
) && NMFLAG(nmp
, RESVPORT
)), &nso
);
738 nso
->nso_location
= nss
->nss_nextloc
;
740 error
= sock_setupcall(nso
->nso_so
, nfs_connect_upcall
, nso
);
742 lck_mtx_lock(&nso
->nso_lock
);
743 nso
->nso_error
= error
;
744 nso
->nso_flags
|= NSO_DEAD
;
745 lck_mtx_unlock(&nso
->nso_lock
);
748 TAILQ_INSERT_TAIL(&nss
->nss_socklist
, nso
, nso_link
);
750 nfs_location_next(&nmp
->nm_locations
, &nss
->nss_nextloc
);
752 nss
->nss_last
= now
.tv_sec
;
755 /* check each active socket and try to push it along */
756 TAILQ_FOREACH(nso
, &nss
->nss_socklist
, nso_link
) {
757 lck_mtx_lock(&nso
->nso_lock
);
758 if (!(nso
->nso_flags
& NSO_CONNECTED
)) {
759 if ((nso
->nso_sotype
!= SOCK_STREAM
) && NMFLAG(nmp
, NOCONNECT
)) {
760 /* no connection needed, just say it's already connected */
761 nso
->nso_flags
|= NSO_CONNECTED
;
762 NFS_SOCK_DBG(("nfs connect %s UDP socket %p noconnect\n",
763 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
));
764 } else if (!(nso
->nso_flags
& NSO_CONNECTING
)) {
765 /* initiate the connection */
766 nso
->nso_flags
|= NSO_CONNECTING
;
767 lck_mtx_unlock(&nso
->nso_lock
);
768 NFS_SOCK_DBG(("nfs connect %s connecting socket %p\n",
769 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
));
770 error
= sock_connect(nso
->nso_so
, nso
->nso_saddr
, MSG_DONTWAIT
);
771 lck_mtx_lock(&nso
->nso_lock
);
772 if (error
&& (error
!= EINPROGRESS
)) {
773 nso
->nso_error
= error
;
774 nso
->nso_flags
|= NSO_DEAD
;
775 lck_mtx_unlock(&nso
->nso_lock
);
779 if (nso
->nso_flags
& NSO_CONNECTING
) {
780 /* check the connection */
781 if (sock_isconnected(nso
->nso_so
)) {
782 NFS_SOCK_DBG(("nfs connect %s socket %p is connected\n",
783 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
));
784 nso
->nso_flags
&= ~NSO_CONNECTING
;
785 nso
->nso_flags
|= NSO_CONNECTED
;
787 int optlen
= sizeof(error
);
789 sock_getsockopt(nso
->nso_so
, SOL_SOCKET
, SO_ERROR
, &error
, &optlen
);
790 if (error
) { /* we got an error on the socket */
791 NFS_SOCK_DBG(("nfs connect %s socket %p connection error %d\n",
792 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
, error
));
793 if (nss
->nss_flags
& NSS_VERBOSE
)
794 log(LOG_INFO
, "nfs_connect: socket error %d for %s\n",
795 error
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
796 nso
->nso_error
= error
;
797 nso
->nso_flags
|= NSO_DEAD
;
798 lck_mtx_unlock(&nso
->nso_lock
);
803 if (nso
->nso_flags
& NSO_CONNECTED
)
804 nfs_socket_options(nmp
, nso
);
806 if (!(nso
->nso_flags
& NSO_CONNECTED
)) {
807 lck_mtx_unlock(&nso
->nso_lock
);
810 if (!(nso
->nso_flags
& (NSO_PINGING
|NSO_VERIFIED
)) ||
811 ((nso
->nso_sotype
== SOCK_DGRAM
) && (now
.tv_sec
>= nso
->nso_reqtimestamp
+2))) {
812 /* initiate a NULL RPC request */
813 uint64_t xid
= nso
->nso_pingxid
;
814 mbuf_t m
, mreq
= NULL
;
816 size_t reqlen
, sentlen
;
819 if (!(vers
= nso
->nso_version
)) {
820 if (nso
->nso_protocol
== PMAPPROG
)
821 vers
= (nso
->nso_saddr
->sa_family
== AF_INET
) ? PMAPVERS
: RPCBVERS4
;
822 else if (nso
->nso_protocol
== NFS_PROG
)
825 lck_mtx_unlock(&nso
->nso_lock
);
826 error
= nfsm_rpchead2(nmp
, nso
->nso_sotype
, nso
->nso_protocol
, vers
, 0, RPCAUTH_SYS
,
827 vfs_context_ucred(vfs_context_kernel()), NULL
, NULL
, &xid
, &mreq
);
828 lck_mtx_lock(&nso
->nso_lock
);
830 nso
->nso_flags
|= NSO_PINGING
;
831 nso
->nso_pingxid
= R_XID32(xid
);
832 nso
->nso_reqtimestamp
= now
.tv_sec
;
833 bzero(&msg
, sizeof(msg
));
834 if ((nso
->nso_sotype
!= SOCK_STREAM
) && !sock_isconnected(nso
->nso_so
)) {
835 msg
.msg_name
= nso
->nso_saddr
;
836 msg
.msg_namelen
= nso
->nso_saddr
->sa_len
;
838 for (reqlen
=0, m
=mreq
; m
; m
= mbuf_next(m
))
839 reqlen
+= mbuf_len(m
);
840 lck_mtx_unlock(&nso
->nso_lock
);
841 error
= sock_sendmbuf(nso
->nso_so
, &msg
, mreq
, 0, &sentlen
);
842 NFS_SOCK_DBG(("nfs connect %s verifying socket %p send rv %d\n",
843 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
, error
));
844 lck_mtx_lock(&nso
->nso_lock
);
845 if (!error
&& (sentlen
!= reqlen
))
849 nso
->nso_error
= error
;
850 nso
->nso_flags
|= NSO_DEAD
;
851 lck_mtx_unlock(&nso
->nso_lock
);
855 if (nso
->nso_flags
& NSO_VERIFIED
) {
856 /* WOOHOO!! This socket looks good! */
857 NFS_SOCK_DBG(("nfs connect %s socket %p verified\n",
858 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
));
859 if (!nso
->nso_version
) {
860 /* If the version isn't set, the default must have worked. */
861 if (nso
->nso_protocol
== PMAPPROG
)
862 nso
->nso_version
= (nso
->nso_saddr
->sa_family
== AF_INET
) ? PMAPVERS
: RPCBVERS4
;
863 if (nso
->nso_protocol
== NFS_PROG
)
864 nso
->nso_version
= NFS_VER3
;
866 lck_mtx_unlock(&nso
->nso_lock
);
867 TAILQ_REMOVE(&nss
->nss_socklist
, nso
, nso_link
);
872 lck_mtx_unlock(&nso
->nso_lock
);
875 TAILQ_FOREACH_SAFE(nso
, &nss
->nss_socklist
, nso_link
, nsonext
) {
876 lck_mtx_lock(&nso
->nso_lock
);
877 if (now
.tv_sec
>= (nso
->nso_timestamp
+ nss
->nss_timeo
)) {
879 NFS_SOCK_DBG(("nfs connect %s socket %p timed out\n",
880 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
));
881 nso
->nso_error
= ETIMEDOUT
;
882 nso
->nso_flags
|= NSO_DEAD
;
884 if (!(nso
->nso_flags
& NSO_DEAD
)) {
885 lck_mtx_unlock(&nso
->nso_lock
);
888 lck_mtx_unlock(&nso
->nso_lock
);
889 NFS_SOCK_DBG(("nfs connect %s reaping socket %p %d\n",
890 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
, nso
->nso_error
));
891 nfs_socket_search_update_error(nss
, nso
->nso_error
);
892 TAILQ_REMOVE(&nss
->nss_socklist
, nso
, nso_link
);
894 nfs_socket_destroy(nso
);
900 * Keep looping if we haven't found a socket yet and we have more
901 * sockets to (continue to) try.
904 if (!nss
->nss_sock
&& (!TAILQ_EMPTY(&nss
->nss_socklist
) || !nomore
)) {
905 /* log a warning if connect is taking a while */
906 if (((now
.tv_sec
- nss
->nss_timestamp
) >= 30) && ((nss
->nss_flags
& (NSS_VERBOSE
|NSS_WARNED
)) == NSS_VERBOSE
)) {
907 log(LOG_INFO
, "nfs_connect: socket connect taking a while for %s\n",
908 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
909 nss
->nss_flags
|= NSS_WARNED
;
911 if (nmp
->nm_sockflags
& NMSOCK_UNMOUNT
)
913 if ((error
= nfs_sigintr(nmp
, NULL
, current_thread(), 0)))
915 if (nss
->nss_last
>= 0)
916 tsleep(nss
, PSOCK
, "nfs_connect_search_wait", hz
);
920 NFS_SOCK_DBG(("nfs connect %s returning %d\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, error
));
925 * Initialize a new NFS connection.
927 * Search for a location to connect a socket to and initialize the connection.
929 * An NFS mount may have multiple locations/servers/addresses available.
930 * We attempt to connect to each one asynchronously and will start
931 * several sockets in parallel if other locations are slow to answer.
932 * We'll use the first NFS socket we can successfully set up.
934 * The search may involve contacting the portmapper service first.
936 * A mount's initial connection may require negotiating some parameters such
937 * as socket type and NFS version.
940 nfs_connect(struct nfsmount
*nmp
, int verbose
, int timeo
)
942 struct nfs_socket_search nss
;
943 struct nfs_socket
*nso
, *nsonfs
;
944 struct sockaddr_storage ss
;
945 struct sockaddr
*saddr
, *oldsaddr
;
947 struct timeval now
, start
;
948 int error
, savederror
, nfsvers
;
949 uint8_t sotype
= nmp
->nm_sotype
? nmp
->nm_sotype
: SOCK_STREAM
;
950 fhandle_t
*fh
= NULL
;
954 /* paranoia... check that we have at least one address in the locations */
956 for (loc
=0; loc
< nmp
->nm_locations
.nl_numlocs
; loc
++) {
957 for (serv
=0; serv
< nmp
->nm_locations
.nl_locations
[loc
]->nl_servcount
; serv
++) {
958 if (nmp
->nm_locations
.nl_locations
[loc
]->nl_servers
[serv
]->ns_addrcount
)
960 NFS_SOCK_DBG(("nfs connect %s search, server %s has no addresses\n",
961 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
,
962 nmp
->nm_locations
.nl_locations
[loc
]->nl_servers
[serv
]->ns_name
));
964 if (serv
< nmp
->nm_locations
.nl_locations
[loc
]->nl_servcount
)
967 if (loc
>= nmp
->nm_locations
.nl_numlocs
) {
968 NFS_SOCK_DBG(("nfs connect %s search failed, no addresses\n",
969 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
));
973 lck_mtx_lock(&nmp
->nm_lock
);
974 nmp
->nm_sockflags
|= NMSOCK_CONNECTING
;
976 lck_mtx_unlock(&nmp
->nm_lock
);
978 savederror
= error
= 0;
981 /* initialize socket search state */
982 bzero(&nss
, sizeof(nss
));
983 nss
.nss_error
= savederror
;
984 TAILQ_INIT(&nss
.nss_socklist
);
985 nss
.nss_sotype
= sotype
;
986 nss
.nss_startloc
= nmp
->nm_locations
.nl_current
;
987 nss
.nss_timestamp
= start
.tv_sec
;
988 nss
.nss_timeo
= timeo
;
990 nss
.nss_flags
|= NSS_VERBOSE
;
992 /* First time connecting, we may need to negotiate some things */
993 if (!(nmp
->nm_sockflags
& NMSOCK_HASCONNECTED
)) {
995 /* No NFS version specified... */
996 if (!nmp
->nm_nfsport
|| (!NM_OMATTR_GIVEN(nmp
, FH
) && !nmp
->nm_mountport
)) {
997 /* ...connect to portmapper first if we (may) need any ports. */
998 nss
.nss_port
= PMAPPORT
;
999 nss
.nss_protocol
= PMAPPROG
;
1000 nss
.nss_version
= 0;
1002 /* ...connect to NFS port first. */
1003 nss
.nss_port
= nmp
->nm_nfsport
;
1004 nss
.nss_protocol
= NFS_PROG
;
1005 nss
.nss_version
= 0;
1007 } else if (nmp
->nm_vers
>= NFS_VER4
) {
1008 /* For NFSv4, we use the given (or default) port. */
1009 nss
.nss_port
= nmp
->nm_nfsport
? nmp
->nm_nfsport
: NFS_PORT
;
1010 nss
.nss_protocol
= NFS_PROG
;
1011 nss
.nss_version
= 4;
1013 /* For NFSv3/v2... */
1014 if (!nmp
->nm_nfsport
|| (!NM_OMATTR_GIVEN(nmp
, FH
) && !nmp
->nm_mountport
)) {
1015 /* ...connect to portmapper first if we need any ports. */
1016 nss
.nss_port
= PMAPPORT
;
1017 nss
.nss_protocol
= PMAPPROG
;
1018 nss
.nss_version
= 0;
1020 /* ...connect to NFS port first. */
1021 nss
.nss_port
= nmp
->nm_nfsport
;
1022 nss
.nss_protocol
= NFS_PROG
;
1023 nss
.nss_version
= nmp
->nm_vers
;
1026 NFS_SOCK_DBG(("nfs connect first %s, so type %d port %d prot %d %d\n",
1027 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nss
.nss_sotype
, nss
.nss_port
,
1028 nss
.nss_protocol
, nss
.nss_version
));
1030 /* we've connected before, just connect to NFS port */
1031 if (!nmp
->nm_nfsport
) {
1032 /* need to ask portmapper which port that would be */
1033 nss
.nss_port
= PMAPPORT
;
1034 nss
.nss_protocol
= PMAPPROG
;
1035 nss
.nss_version
= 0;
1037 nss
.nss_port
= nmp
->nm_nfsport
;
1038 nss
.nss_protocol
= NFS_PROG
;
1039 nss
.nss_version
= nmp
->nm_vers
;
1041 NFS_SOCK_DBG(("nfs connect %s, so type %d port %d prot %d %d\n",
1042 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nss
.nss_sotype
, nss
.nss_port
,
1043 nss
.nss_protocol
, nss
.nss_version
));
1046 /* Set next location to first valid location. */
1047 /* If start location is invalid, find next location. */
1048 nss
.nss_nextloc
= nss
.nss_startloc
;
1049 if ((nss
.nss_nextloc
.nli_serv
>= nmp
->nm_locations
.nl_locations
[nss
.nss_nextloc
.nli_loc
]->nl_servcount
) ||
1050 (nss
.nss_nextloc
.nli_addr
>= nmp
->nm_locations
.nl_locations
[nss
.nss_nextloc
.nli_loc
]->nl_servers
[nss
.nss_nextloc
.nli_serv
]->ns_addrcount
)) {
1051 nfs_location_next(&nmp
->nm_locations
, &nss
.nss_nextloc
);
1052 if (!nfs_location_index_cmp(&nss
.nss_nextloc
, &nss
.nss_startloc
)) {
1053 NFS_SOCK_DBG(("nfs connect %s search failed, couldn't find a valid location index\n",
1054 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
));
1062 error
= nfs_connect_search_loop(nmp
, &nss
);
1063 if (error
|| !nss
.nss_sock
) {
1065 nfs_socket_search_cleanup(&nss
);
1066 if (!error
&& (nss
.nss_sotype
== SOCK_STREAM
) && !nmp
->nm_sotype
&& (nmp
->nm_vers
< NFS_VER4
)) {
1068 sotype
= SOCK_DGRAM
;
1069 savederror
= nss
.nss_error
;
1070 NFS_SOCK_DBG(("nfs connect %s TCP failed %d %d, trying UDP\n",
1071 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, error
, nss
.nss_error
));
1075 error
= nss
.nss_error
? nss
.nss_error
: ETIMEDOUT
;
1076 lck_mtx_lock(&nmp
->nm_lock
);
1077 nmp
->nm_sockflags
&= ~NMSOCK_CONNECTING
;
1079 lck_mtx_unlock(&nmp
->nm_lock
);
1080 if (nss
.nss_flags
& NSS_WARNED
)
1081 log(LOG_INFO
, "nfs_connect: socket connect aborted for %s\n",
1082 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
1086 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
1087 NFS_SOCK_DBG(("nfs connect %s search failed, returning %d\n",
1088 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, error
));
1092 /* try to use nss_sock */
1094 nss
.nss_sock
= NULL
;
1096 /* We may be speaking to portmap first... to determine port(s). */
1097 if (nso
->nso_saddr
->sa_family
== AF_INET
)
1098 port
= ntohs(((struct sockaddr_in
*)nso
->nso_saddr
)->sin_port
);
1100 port
= ntohs(((struct sockaddr_in6
*)nso
->nso_saddr
)->sin6_port
);
1101 if (port
== PMAPPORT
) {
1102 /* Use this portmapper port to get the port #s we need. */
1103 NFS_SOCK_DBG(("nfs connect %s got portmapper socket %p\n",
1104 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
));
1106 /* remove the connect upcall so nfs_portmap_lookup() can use this socket */
1107 sock_setupcall(nso
->nso_so
, NULL
, NULL
);
1109 /* Set up socket address and port for NFS socket. */
1110 bcopy(nso
->nso_saddr
, &ss
, nso
->nso_saddr
->sa_len
);
1112 /* If NFS version not set, try NFSv3 then NFSv2. */
1113 nfsvers
= nmp
->nm_vers
? nmp
->nm_vers
: NFS_VER3
;
1115 if (!(port
= nmp
->nm_nfsport
)) {
1116 if (ss
.ss_family
== AF_INET
)
1117 ((struct sockaddr_in
*)&ss
)->sin_port
= htons(0);
1118 else if (ss
.ss_family
== AF_INET6
)
1119 ((struct sockaddr_in6
*)&ss
)->sin6_port
= htons(0);
1120 error
= nfs_portmap_lookup(nmp
, vfs_context_current(), (struct sockaddr
*)&ss
,
1121 nso
->nso_so
, NFS_PROG
, nfsvers
,
1122 (nso
->nso_sotype
== SOCK_DGRAM
) ? IPPROTO_UDP
: IPPROTO_TCP
, timeo
);
1124 if (ss
.ss_family
== AF_INET
)
1125 port
= ntohs(((struct sockaddr_in
*)&ss
)->sin_port
);
1126 else if (ss
.ss_family
== AF_INET6
)
1127 port
= ntohs(((struct sockaddr_in6
*)&ss
)->sin6_port
);
1129 error
= EPROGUNAVAIL
;
1131 if (error
&& !nmp
->nm_vers
) {
1133 error
= nfs_portmap_lookup(nmp
, vfs_context_current(), (struct sockaddr
*)&ss
,
1134 nso
->nso_so
, NFS_PROG
, nfsvers
,
1135 (nso
->nso_sotype
== SOCK_DGRAM
) ? IPPROTO_UDP
: IPPROTO_TCP
, timeo
);
1137 if (ss
.ss_family
== AF_INET
)
1138 port
= ntohs(((struct sockaddr_in
*)&ss
)->sin_port
);
1139 else if (ss
.ss_family
== AF_INET6
)
1140 port
= ntohs(((struct sockaddr_in6
*)&ss
)->sin6_port
);
1142 error
= EPROGUNAVAIL
;
1146 nfs_socket_search_update_error(&nss
, error
);
1147 nfs_socket_destroy(nso
);
1151 /* Create NFS protocol socket and add it to the list of sockets. */
1152 error
= nfs_socket_create(nmp
, (struct sockaddr
*)&ss
, nso
->nso_sotype
, port
,
1153 NFS_PROG
, nfsvers
, NMFLAG(nmp
, RESVPORT
), &nsonfs
);
1155 nfs_socket_search_update_error(&nss
, error
);
1156 nfs_socket_destroy(nso
);
1159 nsonfs
->nso_location
= nso
->nso_location
;
1160 nsonfs
->nso_wake
= &nss
;
1161 error
= sock_setupcall(nsonfs
->nso_so
, nfs_connect_upcall
, nsonfs
);
1163 nfs_socket_search_update_error(&nss
, error
);
1164 nfs_socket_destroy(nsonfs
);
1165 nfs_socket_destroy(nso
);
1168 TAILQ_INSERT_TAIL(&nss
.nss_socklist
, nsonfs
, nso_link
);
1170 if ((nfsvers
< NFS_VER4
) && !(nmp
->nm_sockflags
& NMSOCK_HASCONNECTED
) && !NM_OMATTR_GIVEN(nmp
, FH
)) {
1171 /* Set up socket address and port for MOUNT socket. */
1173 bcopy(nso
->nso_saddr
, &ss
, nso
->nso_saddr
->sa_len
);
1174 port
= nmp
->nm_mountport
;
1175 if (ss
.ss_family
== AF_INET
)
1176 ((struct sockaddr_in
*)&ss
)->sin_port
= htons(port
);
1177 else if (ss
.ss_family
== AF_INET6
)
1178 ((struct sockaddr_in6
*)&ss
)->sin6_port
= htons(port
);
1180 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */
1181 /* If NFS version is unknown, optimistically choose for NFSv3. */
1182 int mntvers
= (nfsvers
== NFS_VER2
) ? RPCMNT_VER1
: RPCMNT_VER3
;
1183 int mntproto
= (NM_OMFLAG(nmp
, MNTUDP
) || (nso
->nso_sotype
== SOCK_DGRAM
)) ? IPPROTO_UDP
: IPPROTO_TCP
;
1184 error
= nfs_portmap_lookup(nmp
, vfs_context_current(), (struct sockaddr
*)&ss
,
1185 nso
->nso_so
, RPCPROG_MNT
, mntvers
, mntproto
, timeo
);
1188 if (ss
.ss_family
== AF_INET
)
1189 port
= ntohs(((struct sockaddr_in
*)&ss
)->sin_port
);
1190 else if (ss
.ss_family
== AF_INET6
)
1191 port
= ntohs(((struct sockaddr_in6
*)&ss
)->sin6_port
);
1193 error
= EPROGUNAVAIL
;
1195 /* create sockaddr for MOUNT */
1197 MALLOC(nsonfs
->nso_saddr2
, struct sockaddr
*, ss
.ss_len
, M_SONAME
, M_WAITOK
|M_ZERO
);
1198 if (!error
&& !nsonfs
->nso_saddr2
)
1201 bcopy(&ss
, nsonfs
->nso_saddr2
, ss
.ss_len
);
1203 lck_mtx_lock(&nsonfs
->nso_lock
);
1204 nsonfs
->nso_error
= error
;
1205 nsonfs
->nso_flags
|= NSO_DEAD
;
1206 lck_mtx_unlock(&nsonfs
->nso_lock
);
1209 nfs_socket_destroy(nso
);
1213 /* nso is an NFS socket */
1214 NFS_SOCK_DBG(("nfs connect %s got NFS socket %p\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
));
1216 /* If NFS version wasn't specified, it was determined during the connect. */
1217 nfsvers
= nmp
->nm_vers
? nmp
->nm_vers
: (int)nso
->nso_version
;
1219 /* Perform MOUNT call for initial NFSv2/v3 connection/mount. */
1220 if ((nfsvers
< NFS_VER4
) && !(nmp
->nm_sockflags
& NMSOCK_HASCONNECTED
) && !NM_OMATTR_GIVEN(nmp
, FH
)) {
1222 saddr
= nso
->nso_saddr2
;
1224 /* Need sockaddr for MOUNT port */
1225 bcopy(nso
->nso_saddr
, &ss
, nso
->nso_saddr
->sa_len
);
1226 port
= nmp
->nm_mountport
;
1227 if (ss
.ss_family
== AF_INET
)
1228 ((struct sockaddr_in
*)&ss
)->sin_port
= htons(port
);
1229 else if (ss
.ss_family
== AF_INET6
)
1230 ((struct sockaddr_in6
*)&ss
)->sin6_port
= htons(port
);
1232 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */
1233 int mntvers
= (nfsvers
== NFS_VER2
) ? RPCMNT_VER1
: RPCMNT_VER3
;
1234 int mntproto
= (NM_OMFLAG(nmp
, MNTUDP
) || (nso
->nso_sotype
== SOCK_DGRAM
)) ? IPPROTO_UDP
: IPPROTO_TCP
;
1235 error
= nfs_portmap_lookup(nmp
, vfs_context_current(), (struct sockaddr
*)&ss
,
1236 NULL
, RPCPROG_MNT
, mntvers
, mntproto
, timeo
);
1237 if (ss
.ss_family
== AF_INET
)
1238 port
= ntohs(((struct sockaddr_in
*)&ss
)->sin_port
);
1239 else if (ss
.ss_family
== AF_INET6
)
1240 port
= ntohs(((struct sockaddr_in6
*)&ss
)->sin6_port
);
1244 saddr
= (struct sockaddr
*)&ss
;
1246 error
= EPROGUNAVAIL
;
1250 MALLOC(fh
, fhandle_t
*, sizeof(fhandle_t
), M_TEMP
, M_WAITOK
|M_ZERO
);
1252 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
1253 if (!saddr
|| !fh
|| !path
) {
1259 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
1262 nfs_socket_search_update_error(&nss
, error
);
1263 nfs_socket_destroy(nso
);
1266 nfs_location_mntfromname(&nmp
->nm_locations
, nso
->nso_location
, path
, MAXPATHLEN
, 1);
1267 error
= nfs3_mount_rpc(nmp
, saddr
, nso
->nso_sotype
, nfsvers
,
1268 path
, vfs_context_current(), timeo
, fh
, &nmp
->nm_servsec
);
1269 NFS_SOCK_DBG(("nfs connect %s socket %p mount %d\n",
1270 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
, error
));
1272 /* Make sure we can agree on a security flavor. */
1273 int o
, s
; /* indices into mount option and server security flavor lists */
1276 if ((nfsvers
== NFS_VER3
) && !nmp
->nm_servsec
.count
) {
1277 /* Some servers return an empty list to indicate RPCAUTH_SYS? */
1278 nmp
->nm_servsec
.count
= 1;
1279 nmp
->nm_servsec
.flavors
[0] = RPCAUTH_SYS
;
1281 if (nmp
->nm_sec
.count
) {
1282 /* Choose the first flavor in our list that the server supports. */
1283 if (!nmp
->nm_servsec
.count
) {
1284 /* we don't know what the server supports, just use our first choice */
1285 nmp
->nm_auth
= nmp
->nm_sec
.flavors
[0];
1288 for (o
=0; !found
&& (o
< nmp
->nm_sec
.count
); o
++)
1289 for (s
=0; !found
&& (s
< nmp
->nm_servsec
.count
); s
++)
1290 if (nmp
->nm_sec
.flavors
[o
] == nmp
->nm_servsec
.flavors
[s
]) {
1291 nmp
->nm_auth
= nmp
->nm_sec
.flavors
[o
];
1295 /* Choose the first one we support from the server's list. */
1296 if (!nmp
->nm_servsec
.count
) {
1297 nmp
->nm_auth
= RPCAUTH_SYS
;
1300 for (s
=0; s
< nmp
->nm_servsec
.count
; s
++)
1301 switch (nmp
->nm_servsec
.flavors
[s
]) {
1303 /* prefer RPCAUTH_SYS to RPCAUTH_NONE */
1304 if (found
&& (nmp
->nm_auth
== RPCAUTH_NONE
))
1311 nmp
->nm_auth
= nmp
->nm_servsec
.flavors
[s
];
1317 error
= !found
? EAUTH
: 0;
1319 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
1322 nfs_socket_search_update_error(&nss
, error
);
1325 nfs_socket_destroy(nso
);
1329 FREE(nmp
->nm_fh
, M_TEMP
);
1332 NFS_BITMAP_SET(nmp
->nm_flags
, NFS_MFLAG_CALLUMNT
);
1335 /* put the real upcall in place */
1336 upcall
= (nso
->nso_sotype
== SOCK_STREAM
) ? nfs_tcp_rcv
: nfs_udp_rcv
;
1337 error
= sock_setupcall(nso
->nso_so
, upcall
, nmp
);
1339 nfs_socket_search_update_error(&nss
, error
);
1340 nfs_socket_destroy(nso
);
1344 if (!(nmp
->nm_sockflags
& NMSOCK_HASCONNECTED
)) {
1345 /* set mntfromname to this location */
1346 if (!NM_OMATTR_GIVEN(nmp
, MNTFROM
))
1347 nfs_location_mntfromname(&nmp
->nm_locations
, nso
->nso_location
,
1348 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
,
1349 sizeof(vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
), 0);
1350 /* some negotiated values need to remain unchanged for the life of the mount */
1351 if (!nmp
->nm_sotype
)
1352 nmp
->nm_sotype
= nso
->nso_sotype
;
1353 if (!nmp
->nm_vers
) {
1354 nmp
->nm_vers
= nfsvers
;
1355 /* If we negotiated NFSv4, set nm_nfsport if we ended up on the standard NFS port */
1356 if ((nfsvers
>= NFS_VER4
) && !NFS_BITMAP_ISSET(nmp
->nm_mattrs
, NFS_MATTR_NFS_PORT
)) {
1357 if (nso
->nso_saddr
->sa_family
== AF_INET
)
1358 port
= ((struct sockaddr_in
*)nso
->nso_saddr
)->sin_port
= htons(port
);
1359 else if (nso
->nso_saddr
->sa_family
== AF_INET6
)
1360 port
= ((struct sockaddr_in6
*)nso
->nso_saddr
)->sin6_port
= htons(port
);
1363 if (port
== NFS_PORT
)
1364 nmp
->nm_nfsport
= NFS_PORT
;
1367 /* do some version-specific pre-mount set up */
1368 if (nmp
->nm_vers
>= NFS_VER4
) {
1370 nmp
->nm_mounttime
= ((uint64_t)now
.tv_sec
<< 32) | now
.tv_usec
;
1371 if (!NMFLAG(nmp
, NOCALLBACK
))
1372 nfs4_mount_callback_setup(nmp
);
1376 /* Initialize NFS socket state variables */
1377 lck_mtx_lock(&nmp
->nm_lock
);
1378 nmp
->nm_srtt
[0] = nmp
->nm_srtt
[1] = nmp
->nm_srtt
[2] =
1379 nmp
->nm_srtt
[3] = (NFS_TIMEO
<< 3);
1380 nmp
->nm_sdrtt
[0] = nmp
->nm_sdrtt
[1] = nmp
->nm_sdrtt
[2] =
1381 nmp
->nm_sdrtt
[3] = 0;
1382 if (nso
->nso_sotype
== SOCK_DGRAM
) {
1383 nmp
->nm_cwnd
= NFS_MAXCWND
/ 2; /* Initial send window */
1385 } else if (nso
->nso_sotype
== SOCK_STREAM
) {
1386 nmp
->nm_timeouts
= 0;
1388 nmp
->nm_sockflags
&= ~NMSOCK_CONNECTING
;
1389 nmp
->nm_sockflags
|= NMSOCK_SETUP
;
1390 /* move the socket to the mount structure */
1392 oldsaddr
= nmp
->nm_saddr
;
1393 nmp
->nm_saddr
= nso
->nso_saddr
;
1394 lck_mtx_unlock(&nmp
->nm_lock
);
1395 error
= nfs_connect_setup(nmp
);
1396 lck_mtx_lock(&nmp
->nm_lock
);
1397 nmp
->nm_sockflags
&= ~NMSOCK_SETUP
;
1399 nmp
->nm_sockflags
|= NMSOCK_READY
;
1400 wakeup(&nmp
->nm_sockflags
);
1403 NFS_SOCK_DBG(("nfs connect %s socket %p setup failed %d\n",
1404 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, nso
, error
));
1405 nfs_socket_search_update_error(&nss
, error
);
1406 nmp
->nm_saddr
= oldsaddr
;
1407 if (!(nmp
->nm_sockflags
& NMSOCK_HASCONNECTED
)) {
1408 /* undo settings made prior to setup */
1409 if (!NFS_BITMAP_ISSET(nmp
->nm_mattrs
, NFS_MATTR_SOCKET_TYPE
))
1411 if (!NFS_BITMAP_ISSET(nmp
->nm_mattrs
, NFS_MATTR_NFS_VERSION
)) {
1412 if (nmp
->nm_vers
>= NFS_VER4
) {
1413 if (!NFS_BITMAP_ISSET(nmp
->nm_mattrs
, NFS_MATTR_NFS_PORT
))
1414 nmp
->nm_nfsport
= 0;
1416 nfs4_mount_callback_shutdown(nmp
);
1417 if (IS_VALID_CRED(nmp
->nm_mcred
))
1418 kauth_cred_unref(&nmp
->nm_mcred
);
1419 bzero(&nmp
->nm_un
, sizeof(nmp
->nm_un
));
1424 lck_mtx_unlock(&nmp
->nm_lock
);
1426 nfs_socket_destroy(nso
);
1430 /* update current location */
1431 if ((nmp
->nm_locations
.nl_current
.nli_flags
& NLI_VALID
) &&
1432 (nmp
->nm_locations
.nl_current
.nli_serv
!= nso
->nso_location
.nli_serv
)) {
1433 /* server has changed, we should initiate failover/recovery */
1436 nmp
->nm_locations
.nl_current
= nso
->nso_location
;
1437 nmp
->nm_locations
.nl_current
.nli_flags
|= NLI_VALID
;
1439 if (!(nmp
->nm_sockflags
& NMSOCK_HASCONNECTED
)) {
1440 /* We have now successfully connected... make a note of it. */
1441 nmp
->nm_sockflags
|= NMSOCK_HASCONNECTED
;
1444 lck_mtx_unlock(&nmp
->nm_lock
);
1446 FREE(oldsaddr
, M_SONAME
);
1448 if (nss
.nss_flags
& NSS_WARNED
)
1449 log(LOG_INFO
, "nfs_connect: socket connect completed for %s\n",
1450 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
1453 nfs_socket_search_cleanup(&nss
);
1457 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
1458 NFS_SOCK_DBG(("nfs connect %s success\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
));
1463 /* setup & confirm socket connection is functional */
1465 nfs_connect_setup(struct nfsmount
*nmp
)
1469 if (nmp
->nm_vers
>= NFS_VER4
) {
1470 if (nmp
->nm_state
& NFSSTA_CLIENTID
) {
1471 /* first, try to renew our current state */
1472 error
= nfs4_renew(nmp
, R_SETUP
);
1473 if ((error
== NFSERR_ADMIN_REVOKED
) ||
1474 (error
== NFSERR_CB_PATH_DOWN
) ||
1475 (error
== NFSERR_EXPIRED
) ||
1476 (error
== NFSERR_LEASE_MOVED
) ||
1477 (error
== NFSERR_STALE_CLIENTID
)) {
1478 lck_mtx_lock(&nmp
->nm_lock
);
1479 nfs_need_recover(nmp
, error
);
1480 lck_mtx_unlock(&nmp
->nm_lock
);
1483 error
= nfs4_setclientid(nmp
);
1489 * NFS socket reconnect routine:
1490 * Called when a connection is broken.
1491 * - disconnect the old socket
1492 * - nfs_connect() again
1493 * - set R_MUSTRESEND for all outstanding requests on mount point
1494 * If this fails the mount point is DEAD!
1497 nfs_reconnect(struct nfsmount
*nmp
)
1501 thread_t thd
= current_thread();
1502 int error
, wentdown
= 0, verbose
= 1;
1507 lastmsg
= now
.tv_sec
- (nmp
->nm_tprintf_delay
- nmp
->nm_tprintf_initial_delay
);
1509 nfs_disconnect(nmp
);
1512 lck_mtx_lock(&nmp
->nm_lock
);
1513 timeo
= nfs_is_squishy(nmp
) ? 8 : 30;
1514 lck_mtx_unlock(&nmp
->nm_lock
);
1516 while ((error
= nfs_connect(nmp
, verbose
, timeo
))) {
1518 nfs_disconnect(nmp
);
1519 if ((error
== EINTR
) || (error
== ERESTART
))
1524 if ((lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
) {
1525 lastmsg
= now
.tv_sec
;
1526 nfs_down(nmp
, thd
, error
, NFSSTA_TIMEO
, "can not connect");
1529 lck_mtx_lock(&nmp
->nm_lock
);
1530 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
1531 /* we're not yet completely mounted and */
1532 /* we can't reconnect, so we fail */
1533 lck_mtx_unlock(&nmp
->nm_lock
);
1536 nfs_mount_check_dead_timeout(nmp
);
1537 if ((error
= nfs_sigintr(nmp
, NULL
, thd
, 1))) {
1538 lck_mtx_unlock(&nmp
->nm_lock
);
1541 lck_mtx_unlock(&nmp
->nm_lock
);
1542 tsleep(&lbolt
, PSOCK
, "nfs_reconnect_delay", 0);
1543 if ((error
= nfs_sigintr(nmp
, NULL
, thd
, 0)))
1548 nfs_up(nmp
, thd
, NFSSTA_TIMEO
, "connected");
1551 * Loop through outstanding request list and mark all requests
1552 * as needing a resend. (Though nfs_need_reconnect() probably
1553 * marked them all already.)
1555 lck_mtx_lock(nfs_request_mutex
);
1556 TAILQ_FOREACH(rq
, &nfs_reqq
, r_chain
) {
1557 if (rq
->r_nmp
== nmp
) {
1558 lck_mtx_lock(&rq
->r_mtx
);
1559 if (!rq
->r_error
&& !rq
->r_nmrep
.nmc_mhead
&& !(rq
->r_flags
& R_MUSTRESEND
)) {
1560 rq
->r_flags
|= R_MUSTRESEND
;
1563 if ((rq
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
|R_SENDING
)) == R_ASYNC
)
1564 nfs_asyncio_resend(rq
);
1566 lck_mtx_unlock(&rq
->r_mtx
);
1569 lck_mtx_unlock(nfs_request_mutex
);
1574 * NFS disconnect. Clean up and unlink.
1577 nfs_disconnect(struct nfsmount
*nmp
)
1579 struct nfs_socket
*nso
;
1581 lck_mtx_lock(&nmp
->nm_lock
);
1584 struct timespec ts
= { 1, 0 };
1585 if (nmp
->nm_state
& NFSSTA_SENDING
) { /* wait for sending to complete */
1586 nmp
->nm_state
|= NFSSTA_WANTSND
;
1587 msleep(&nmp
->nm_state
, &nmp
->nm_lock
, PZERO
-1, "nfswaitsending", &ts
);
1590 if (nmp
->nm_sockflags
& NMSOCK_POKE
) { /* wait for poking to complete */
1591 msleep(&nmp
->nm_sockflags
, &nmp
->nm_lock
, PZERO
-1, "nfswaitpoke", &ts
);
1594 nmp
->nm_sockflags
|= NMSOCK_DISCONNECTING
;
1595 nmp
->nm_sockflags
&= ~NMSOCK_READY
;
1598 if (nso
->nso_saddr
== nmp
->nm_saddr
)
1599 nso
->nso_saddr
= NULL
;
1600 lck_mtx_unlock(&nmp
->nm_lock
);
1601 nfs_socket_destroy(nso
);
1602 lck_mtx_lock(&nmp
->nm_lock
);
1603 nmp
->nm_sockflags
&= ~NMSOCK_DISCONNECTING
;
1604 lck_mtx_unlock(&nmp
->nm_lock
);
1606 lck_mtx_unlock(&nmp
->nm_lock
);
1611 * mark an NFS mount as needing a reconnect/resends.
1614 nfs_need_reconnect(struct nfsmount
*nmp
)
1618 lck_mtx_lock(&nmp
->nm_lock
);
1619 nmp
->nm_sockflags
&= ~(NMSOCK_READY
|NMSOCK_SETUP
);
1620 lck_mtx_unlock(&nmp
->nm_lock
);
1623 * Loop through outstanding request list and
1624 * mark all requests as needing a resend.
1626 lck_mtx_lock(nfs_request_mutex
);
1627 TAILQ_FOREACH(rq
, &nfs_reqq
, r_chain
) {
1628 if (rq
->r_nmp
== nmp
) {
1629 lck_mtx_lock(&rq
->r_mtx
);
1630 if (!rq
->r_error
&& !rq
->r_nmrep
.nmc_mhead
&& !(rq
->r_flags
& R_MUSTRESEND
)) {
1631 rq
->r_flags
|= R_MUSTRESEND
;
1634 if ((rq
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
|R_SENDING
)) == R_ASYNC
)
1635 nfs_asyncio_resend(rq
);
1637 lck_mtx_unlock(&rq
->r_mtx
);
1640 lck_mtx_unlock(nfs_request_mutex
);
1645 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
1648 nfs_mount_sock_thread(void *arg
, __unused wait_result_t wr
)
1650 struct nfsmount
*nmp
= arg
;
1651 struct timespec ts
= { 30, 0 };
1652 thread_t thd
= current_thread();
1655 int error
, dofinish
;
1658 lck_mtx_lock(&nmp
->nm_lock
);
1660 while (!(nmp
->nm_sockflags
& NMSOCK_READY
) ||
1661 !TAILQ_EMPTY(&nmp
->nm_resendq
) ||
1662 !LIST_EMPTY(&nmp
->nm_monlist
) ||
1663 nmp
->nm_deadto_start
||
1664 (nmp
->nm_state
& NFSSTA_RECOVER
) ||
1665 ((nmp
->nm_vers
>= NFS_VER4
) && !TAILQ_EMPTY(&nmp
->nm_dreturnq
)))
1667 if (nmp
->nm_sockflags
& NMSOCK_UNMOUNT
)
1669 /* do reconnect, if necessary */
1670 if (!(nmp
->nm_sockflags
& NMSOCK_READY
) && !(nmp
->nm_state
& NFSSTA_FORCE
)) {
1671 if (nmp
->nm_reconnect_start
<= 0) {
1673 nmp
->nm_reconnect_start
= now
.tv_sec
;
1675 lck_mtx_unlock(&nmp
->nm_lock
);
1676 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
));
1677 if (nfs_reconnect(nmp
) == 0)
1678 nmp
->nm_reconnect_start
= 0;
1679 lck_mtx_lock(&nmp
->nm_lock
);
1681 if ((nmp
->nm_sockflags
& NMSOCK_READY
) &&
1682 (nmp
->nm_state
& NFSSTA_RECOVER
) &&
1683 !(nmp
->nm_sockflags
& NMSOCK_UNMOUNT
) &&
1684 !(nmp
->nm_state
& NFSSTA_FORCE
)) {
1685 /* perform state recovery */
1686 lck_mtx_unlock(&nmp
->nm_lock
);
1688 lck_mtx_lock(&nmp
->nm_lock
);
1690 /* handle NFSv4 delegation returns */
1691 while ((nmp
->nm_vers
>= NFS_VER4
) && !(nmp
->nm_state
& NFSSTA_FORCE
) &&
1692 (nmp
->nm_sockflags
& NMSOCK_READY
) && !(nmp
->nm_state
& NFSSTA_RECOVER
) &&
1693 ((np
= TAILQ_FIRST(&nmp
->nm_dreturnq
)))) {
1694 lck_mtx_unlock(&nmp
->nm_lock
);
1695 nfs4_delegation_return(np
, R_RECOVER
, thd
, nmp
->nm_mcred
);
1696 lck_mtx_lock(&nmp
->nm_lock
);
1698 /* do resends, if necessary/possible */
1699 while ((((nmp
->nm_sockflags
& NMSOCK_READY
) && !(nmp
->nm_state
& NFSSTA_RECOVER
)) || (nmp
->nm_state
& NFSSTA_FORCE
)) &&
1700 ((req
= TAILQ_FIRST(&nmp
->nm_resendq
)))) {
1701 if (req
->r_resendtime
)
1703 while (req
&& !(nmp
->nm_state
& NFSSTA_FORCE
) && req
->r_resendtime
&& (now
.tv_sec
< req
->r_resendtime
))
1704 req
= TAILQ_NEXT(req
, r_rchain
);
1707 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
1708 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
1709 lck_mtx_unlock(&nmp
->nm_lock
);
1710 lck_mtx_lock(&req
->r_mtx
);
1711 if (req
->r_error
|| req
->r_nmrep
.nmc_mhead
) {
1712 dofinish
= req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
1713 req
->r_flags
&= ~R_RESENDQ
;
1715 lck_mtx_unlock(&req
->r_mtx
);
1717 nfs_asyncio_finish(req
);
1718 lck_mtx_lock(&nmp
->nm_lock
);
1721 if ((req
->r_flags
& R_RESTART
) || nfs_request_using_gss(req
)) {
1722 req
->r_flags
&= ~R_RESTART
;
1723 req
->r_resendtime
= 0;
1724 lck_mtx_unlock(&req
->r_mtx
);
1725 /* async RPCs on GSS mounts need to be rebuilt and resent. */
1726 nfs_reqdequeue(req
);
1727 if (nfs_request_using_gss(req
)) {
1728 nfs_gss_clnt_rpcdone(req
);
1729 error
= nfs_gss_clnt_args_restore(req
);
1730 if (error
== ENEEDAUTH
)
1733 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
1734 nfs_request_using_gss(req
) ? " gss" : "", req
->r_procnum
, req
->r_xid
,
1735 req
->r_flags
, req
->r_rtt
));
1736 error
= !req
->r_nmp
? ENXIO
: 0; /* unmounted? */
1738 error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0);
1740 error
= nfs_request_add_header(req
);
1742 error
= nfs_request_send(req
, 0);
1743 lck_mtx_lock(&req
->r_mtx
);
1744 if (req
->r_flags
& R_RESENDQ
)
1745 req
->r_flags
&= ~R_RESENDQ
;
1747 req
->r_error
= error
;
1749 dofinish
= error
&& req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
1750 lck_mtx_unlock(&req
->r_mtx
);
1752 nfs_asyncio_finish(req
);
1753 lck_mtx_lock(&nmp
->nm_lock
);
1757 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
1758 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
1759 error
= !req
->r_nmp
? ENXIO
: 0; /* unmounted? */
1761 error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0);
1763 req
->r_flags
|= R_SENDING
;
1764 lck_mtx_unlock(&req
->r_mtx
);
1765 error
= nfs_send(req
, 0);
1766 lck_mtx_lock(&req
->r_mtx
);
1768 if (req
->r_flags
& R_RESENDQ
)
1769 req
->r_flags
&= ~R_RESENDQ
;
1771 lck_mtx_unlock(&req
->r_mtx
);
1772 lck_mtx_lock(&nmp
->nm_lock
);
1776 req
->r_error
= error
;
1777 if (req
->r_flags
& R_RESENDQ
)
1778 req
->r_flags
&= ~R_RESENDQ
;
1780 dofinish
= req
->r_callback
.rcb_func
&& !(req
->r_flags
& R_WAITSENT
);
1781 lck_mtx_unlock(&req
->r_mtx
);
1783 nfs_asyncio_finish(req
);
1784 lck_mtx_lock(&nmp
->nm_lock
);
1786 if (nmp
->nm_deadto_start
)
1787 nfs_mount_check_dead_timeout(nmp
);
1788 if (nmp
->nm_state
& (NFSSTA_FORCE
|NFSSTA_DEAD
))
1790 /* check monitored nodes, if necessary/possible */
1791 if (!LIST_EMPTY(&nmp
->nm_monlist
)) {
1792 nmp
->nm_state
|= NFSSTA_MONITOR_SCAN
;
1793 LIST_FOREACH(np
, &nmp
->nm_monlist
, n_monlink
) {
1794 if (!(nmp
->nm_sockflags
& NMSOCK_READY
) || (nmp
->nm_state
& (NFSSTA_RECOVER
|NFSSTA_UNMOUNTING
|NFSSTA_FORCE
)))
1796 np
->n_mflag
|= NMMONSCANINPROG
;
1797 lck_mtx_unlock(&nmp
->nm_lock
);
1798 error
= nfs_getattr(np
, NULL
, vfs_context_kernel(), (NGA_UNCACHED
|NGA_MONITOR
));
1799 if (!error
&& ISSET(np
->n_flag
, NUPDATESIZE
)) /* update quickly to avoid multiple events */
1800 nfs_data_update_size(np
, 0);
1801 lck_mtx_lock(&nmp
->nm_lock
);
1802 np
->n_mflag
&= ~NMMONSCANINPROG
;
1803 if (np
->n_mflag
& NMMONSCANWANT
) {
1804 np
->n_mflag
&= ~NMMONSCANWANT
;
1805 wakeup(&np
->n_mflag
);
1807 if (error
|| !(nmp
->nm_sockflags
& NMSOCK_READY
) || (nmp
->nm_state
& (NFSSTA_RECOVER
|NFSSTA_UNMOUNTING
|NFSSTA_FORCE
)))
1810 nmp
->nm_state
&= ~NFSSTA_MONITOR_SCAN
;
1811 if (nmp
->nm_state
& NFSSTA_UNMOUNTING
)
1812 wakeup(&nmp
->nm_state
); /* let unmounting thread know scan is done */
1814 if ((nmp
->nm_sockflags
& NMSOCK_READY
) || (nmp
->nm_state
& (NFSSTA_RECOVER
|NFSSTA_UNMOUNTING
))) {
1815 if (nmp
->nm_deadto_start
|| !TAILQ_EMPTY(&nmp
->nm_resendq
) ||
1816 (nmp
->nm_state
& NFSSTA_RECOVER
))
1820 msleep(&nmp
->nm_sockthd
, &nmp
->nm_lock
, PSOCK
, "nfssockthread", &ts
);
1824 /* If we're unmounting, send the unmount RPC, if requested/appropriate. */
1825 if ((nmp
->nm_sockflags
& NMSOCK_UNMOUNT
) &&
1826 (nmp
->nm_state
& NFSSTA_MOUNTED
) && NMFLAG(nmp
, CALLUMNT
) &&
1827 (nmp
->nm_vers
< NFS_VER4
) && !(nmp
->nm_state
& (NFSSTA_FORCE
|NFSSTA_DEAD
))) {
1828 lck_mtx_unlock(&nmp
->nm_lock
);
1829 nfs3_umount_rpc(nmp
, vfs_context_kernel(),
1830 (nmp
->nm_sockflags
& NMSOCK_READY
) ? 6 : 2);
1831 lck_mtx_lock(&nmp
->nm_lock
);
1834 if (nmp
->nm_sockthd
== thd
)
1835 nmp
->nm_sockthd
= NULL
;
1836 lck_mtx_unlock(&nmp
->nm_lock
);
1837 wakeup(&nmp
->nm_sockthd
);
1838 thread_terminate(thd
);
1841 /* start or wake a mount's socket thread */
1843 nfs_mount_sock_thread_wake(struct nfsmount
*nmp
)
1845 if (nmp
->nm_sockthd
)
1846 wakeup(&nmp
->nm_sockthd
);
1847 else if (kernel_thread_start(nfs_mount_sock_thread
, nmp
, &nmp
->nm_sockthd
) == KERN_SUCCESS
)
1848 thread_deallocate(nmp
->nm_sockthd
);
1852 * Check if we should mark the mount dead because the
1853 * unresponsive mount has reached the dead timeout.
1854 * (must be called with nmp locked)
1857 nfs_mount_check_dead_timeout(struct nfsmount
*nmp
)
1861 if (nmp
->nm_deadto_start
== 0)
1863 if (nmp
->nm_state
& NFSSTA_DEAD
)
1865 nfs_is_squishy(nmp
);
1866 if (nmp
->nm_curdeadtimeout
<= 0)
1869 if ((now
.tv_sec
- nmp
->nm_deadto_start
) < nmp
->nm_curdeadtimeout
)
1871 printf("nfs server %s: %sdead\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
,
1872 (nmp
->nm_curdeadtimeout
!= nmp
->nm_deadtimeout
) ? "squished " : "");
1873 nmp
->nm_state
|= NFSSTA_DEAD
;
1874 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, VQ_DEAD
, 0);
1878 * NFS callback channel socket state
1880 struct nfs_callback_socket
1882 TAILQ_ENTRY(nfs_callback_socket
) ncbs_link
;
1883 socket_t ncbs_so
; /* the socket */
1884 struct sockaddr_storage ncbs_saddr
; /* socket address */
1885 struct nfs_rpc_record_state ncbs_rrs
; /* RPC record parsing state */
1886 time_t ncbs_stamp
; /* last accessed at */
1887 uint32_t ncbs_flags
; /* see below */
1889 #define NCBSOCK_UPCALL 0x0001
1890 #define NCBSOCK_UPCALLWANT 0x0002
1891 #define NCBSOCK_DEAD 0x0004
1894 * NFS callback channel state
1896 * One listening socket for accepting socket connections from servers and
1897 * a list of connected sockets to handle callback requests on.
1898 * Mounts registered with the callback channel are assigned IDs and
1899 * put on a list so that the callback request handling code can match
1900 * the requests up with mounts.
1902 socket_t nfs4_cb_so
= NULL
;
1903 socket_t nfs4_cb_so6
= NULL
;
1904 in_port_t nfs4_cb_port
= 0;
1905 in_port_t nfs4_cb_port6
= 0;
1906 uint32_t nfs4_cb_id
= 0;
1907 uint32_t nfs4_cb_so_usecount
= 0;
1908 TAILQ_HEAD(nfs4_cb_sock_list
,nfs_callback_socket
) nfs4_cb_socks
;
1909 TAILQ_HEAD(nfs4_cb_mount_list
,nfsmount
) nfs4_cb_mounts
;
1911 int nfs4_cb_handler(struct nfs_callback_socket
*, mbuf_t
);
1914 * Set up the callback channel for the NFS mount.
1916 * Initializes the callback channel socket state and
1917 * assigns a callback ID to the mount.
1920 nfs4_mount_callback_setup(struct nfsmount
*nmp
)
1922 struct sockaddr_in sin
;
1923 struct sockaddr_in6 sin6
;
1925 socket_t so6
= NULL
;
1926 struct timeval timeo
;
1930 lck_mtx_lock(nfs_global_mutex
);
1931 if (nfs4_cb_id
== 0) {
1932 TAILQ_INIT(&nfs4_cb_mounts
);
1933 TAILQ_INIT(&nfs4_cb_socks
);
1936 nmp
->nm_cbid
= nfs4_cb_id
++;
1937 if (nmp
->nm_cbid
== 0)
1938 nmp
->nm_cbid
= nfs4_cb_id
++;
1939 nfs4_cb_so_usecount
++;
1940 TAILQ_INSERT_HEAD(&nfs4_cb_mounts
, nmp
, nm_cblink
);
1943 lck_mtx_unlock(nfs_global_mutex
);
1948 error
= sock_socket(AF_INET
, SOCK_STREAM
, IPPROTO_TCP
, nfs4_cb_accept
, NULL
, &nfs4_cb_so
);
1950 log(LOG_INFO
, "nfs callback setup: error %d creating listening IPv4 socket\n", error
);
1955 sock_setsockopt(so
, SOL_SOCKET
, SO_REUSEADDR
, &on
, sizeof(on
));
1956 sin
.sin_len
= sizeof(struct sockaddr_in
);
1957 sin
.sin_family
= AF_INET
;
1958 sin
.sin_addr
.s_addr
= htonl(INADDR_ANY
);
1959 sin
.sin_port
= htons(nfs_callback_port
); /* try to use specified port */
1960 error
= sock_bind(so
, (struct sockaddr
*)&sin
);
1962 log(LOG_INFO
, "nfs callback setup: error %d binding listening IPv4 socket\n", error
);
1965 error
= sock_getsockname(so
, (struct sockaddr
*)&sin
, sin
.sin_len
);
1967 log(LOG_INFO
, "nfs callback setup: error %d getting listening IPv4 socket port\n", error
);
1970 nfs4_cb_port
= ntohs(sin
.sin_port
);
1972 error
= sock_listen(so
, 32);
1974 log(LOG_INFO
, "nfs callback setup: error %d on IPv4 listen\n", error
);
1978 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
1981 error
= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
1983 log(LOG_INFO
, "nfs callback setup: error %d setting IPv4 socket rx timeout\n", error
);
1984 error
= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
1986 log(LOG_INFO
, "nfs callback setup: error %d setting IPv4 socket tx timeout\n", error
);
1987 sock_setsockopt(so
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
1988 sock_setsockopt(so
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
));
1989 sock_setsockopt(so
, SOL_SOCKET
, SO_UPCALLCLOSEWAIT
, &on
, sizeof(on
));
1993 error
= sock_socket(AF_INET6
, SOCK_STREAM
, IPPROTO_TCP
, nfs4_cb_accept
, NULL
, &nfs4_cb_so6
);
1995 log(LOG_INFO
, "nfs callback setup: error %d creating listening IPv6 socket\n", error
);
2000 sock_setsockopt(so6
, SOL_SOCKET
, SO_REUSEADDR
, &on
, sizeof(on
));
2001 sock_setsockopt(so6
, IPPROTO_IPV6
, IPV6_V6ONLY
, &on
, sizeof(on
));
2002 /* try to use specified port or same port as IPv4 */
2003 port
= nfs_callback_port
? nfs_callback_port
: nfs4_cb_port
;
2005 sin6
.sin6_len
= sizeof(struct sockaddr_in6
);
2006 sin6
.sin6_family
= AF_INET6
;
2007 sin6
.sin6_addr
= in6addr_any
;
2008 sin6
.sin6_port
= htons(port
);
2009 error
= sock_bind(so6
, (struct sockaddr
*)&sin6
);
2011 if (port
!= nfs_callback_port
) {
2012 /* if we simply tried to match the IPv4 port, then try any port */
2014 goto ipv6_bind_again
;
2016 log(LOG_INFO
, "nfs callback setup: error %d binding listening IPv6 socket\n", error
);
2019 error
= sock_getsockname(so6
, (struct sockaddr
*)&sin6
, sin6
.sin6_len
);
2021 log(LOG_INFO
, "nfs callback setup: error %d getting listening IPv6 socket port\n", error
);
2024 nfs4_cb_port6
= ntohs(sin6
.sin6_port
);
2026 error
= sock_listen(so6
, 32);
2028 log(LOG_INFO
, "nfs callback setup: error %d on IPv6 listen\n", error
);
2032 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2035 error
= sock_setsockopt(so6
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
2037 log(LOG_INFO
, "nfs callback setup: error %d setting IPv6 socket rx timeout\n", error
);
2038 error
= sock_setsockopt(so6
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
2040 log(LOG_INFO
, "nfs callback setup: error %d setting IPv6 socket tx timeout\n", error
);
2041 sock_setsockopt(so6
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
2042 sock_setsockopt(so6
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
));
2043 sock_setsockopt(so6
, SOL_SOCKET
, SO_UPCALLCLOSEWAIT
, &on
, sizeof(on
));
2048 nfs4_cb_so
= nfs4_cb_so6
= NULL
;
2049 lck_mtx_unlock(nfs_global_mutex
);
2051 sock_shutdown(so
, SHUT_RDWR
);
2055 sock_shutdown(so6
, SHUT_RDWR
);
2059 lck_mtx_unlock(nfs_global_mutex
);
2064 * Shut down the callback channel for the NFS mount.
2066 * Clears the mount's callback ID and releases the mounts
2067 * reference on the callback socket. Last reference dropped
2068 * will also shut down the callback socket(s).
2071 nfs4_mount_callback_shutdown(struct nfsmount
*nmp
)
2073 struct nfs_callback_socket
*ncbsp
;
2075 struct nfs4_cb_sock_list cb_socks
;
2076 struct timespec ts
= {1,0};
2078 lck_mtx_lock(nfs_global_mutex
);
2079 TAILQ_REMOVE(&nfs4_cb_mounts
, nmp
, nm_cblink
);
2080 /* wait for any callbacks in progress to complete */
2081 while (nmp
->nm_cbrefs
)
2082 msleep(&nmp
->nm_cbrefs
, nfs_global_mutex
, PSOCK
, "cbshutwait", &ts
);
2084 if (--nfs4_cb_so_usecount
) {
2085 lck_mtx_unlock(nfs_global_mutex
);
2090 nfs4_cb_so
= nfs4_cb_so6
= NULL
;
2091 TAILQ_INIT(&cb_socks
);
2092 TAILQ_CONCAT(&cb_socks
, &nfs4_cb_socks
, ncbs_link
);
2093 lck_mtx_unlock(nfs_global_mutex
);
2095 sock_shutdown(so
, SHUT_RDWR
);
2099 sock_shutdown(so6
, SHUT_RDWR
);
2102 while ((ncbsp
= TAILQ_FIRST(&cb_socks
))) {
2103 TAILQ_REMOVE(&cb_socks
, ncbsp
, ncbs_link
);
2104 sock_shutdown(ncbsp
->ncbs_so
, SHUT_RDWR
);
2105 sock_close(ncbsp
->ncbs_so
);
2106 nfs_rpc_record_state_cleanup(&ncbsp
->ncbs_rrs
);
2107 FREE(ncbsp
, M_TEMP
);
2112 * Check periodically for stale/unused nfs callback sockets
2114 #define NFS4_CB_TIMER_PERIOD 30
2115 #define NFS4_CB_IDLE_MAX 300
2117 nfs4_callback_timer(__unused
void *param0
, __unused
void *param1
)
2119 struct nfs_callback_socket
*ncbsp
, *nextncbsp
;
2123 lck_mtx_lock(nfs_global_mutex
);
2124 if (TAILQ_EMPTY(&nfs4_cb_socks
)) {
2125 nfs4_callback_timer_on
= 0;
2126 lck_mtx_unlock(nfs_global_mutex
);
2130 TAILQ_FOREACH_SAFE(ncbsp
, &nfs4_cb_socks
, ncbs_link
, nextncbsp
) {
2131 if (!(ncbsp
->ncbs_flags
& NCBSOCK_DEAD
) &&
2132 (now
.tv_sec
< (ncbsp
->ncbs_stamp
+ NFS4_CB_IDLE_MAX
)))
2134 TAILQ_REMOVE(&nfs4_cb_socks
, ncbsp
, ncbs_link
);
2135 lck_mtx_unlock(nfs_global_mutex
);
2136 sock_shutdown(ncbsp
->ncbs_so
, SHUT_RDWR
);
2137 sock_close(ncbsp
->ncbs_so
);
2138 nfs_rpc_record_state_cleanup(&ncbsp
->ncbs_rrs
);
2139 FREE(ncbsp
, M_TEMP
);
2142 nfs4_callback_timer_on
= 1;
2143 nfs_interval_timer_start(nfs4_callback_timer_call
,
2144 NFS4_CB_TIMER_PERIOD
* 1000);
2145 lck_mtx_unlock(nfs_global_mutex
);
2149 * Accept a new callback socket.
2152 nfs4_cb_accept(socket_t so
, __unused
void *arg
, __unused
int waitflag
)
2154 socket_t newso
= NULL
;
2155 struct nfs_callback_socket
*ncbsp
;
2156 struct nfsmount
*nmp
;
2157 struct timeval timeo
, now
;
2158 int error
, on
= 1, ip
;
2160 if (so
== nfs4_cb_so
)
2162 else if (so
== nfs4_cb_so6
)
2167 /* allocate/initialize a new nfs_callback_socket */
2168 MALLOC(ncbsp
, struct nfs_callback_socket
*, sizeof(struct nfs_callback_socket
), M_TEMP
, M_WAITOK
);
2170 log(LOG_ERR
, "nfs callback accept: no memory for new socket\n");
2173 bzero(ncbsp
, sizeof(*ncbsp
));
2174 ncbsp
->ncbs_saddr
.ss_len
= (ip
== 4) ? sizeof(struct sockaddr_in
) : sizeof(struct sockaddr_in6
);
2175 nfs_rpc_record_state_init(&ncbsp
->ncbs_rrs
);
2177 /* accept a new socket */
2178 error
= sock_accept(so
, (struct sockaddr
*)&ncbsp
->ncbs_saddr
,
2179 ncbsp
->ncbs_saddr
.ss_len
, MSG_DONTWAIT
,
2180 nfs4_cb_rcv
, ncbsp
, &newso
);
2182 log(LOG_INFO
, "nfs callback accept: error %d accepting IPv%d socket\n", error
, ip
);
2183 FREE(ncbsp
, M_TEMP
);
2187 /* set up the new socket */
2188 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2191 error
= sock_setsockopt(newso
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
2193 log(LOG_INFO
, "nfs callback socket: error %d setting IPv%d socket rx timeout\n", error
, ip
);
2194 error
= sock_setsockopt(newso
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
2196 log(LOG_INFO
, "nfs callback socket: error %d setting IPv%d socket tx timeout\n", error
, ip
);
2197 sock_setsockopt(newso
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
2198 sock_setsockopt(newso
, SOL_SOCKET
, SO_REUSEADDR
, &on
, sizeof(on
));
2199 sock_setsockopt(newso
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
));
2200 sock_setsockopt(newso
, SOL_SOCKET
, SO_UPCALLCLOSEWAIT
, &on
, sizeof(on
));
2202 ncbsp
->ncbs_so
= newso
;
2204 ncbsp
->ncbs_stamp
= now
.tv_sec
;
2206 lck_mtx_lock(nfs_global_mutex
);
2208 /* add it to the list */
2209 TAILQ_INSERT_HEAD(&nfs4_cb_socks
, ncbsp
, ncbs_link
);
2211 /* verify it's from a host we have mounted */
2212 TAILQ_FOREACH(nmp
, &nfs4_cb_mounts
, nm_cblink
) {
2213 /* check if socket's source address matches this mount's server address */
2216 if (nfs_sockaddr_cmp((struct sockaddr
*)&ncbsp
->ncbs_saddr
, nmp
->nm_saddr
) == 0)
2219 if (!nmp
) /* we don't want this socket, mark it dead */
2220 ncbsp
->ncbs_flags
|= NCBSOCK_DEAD
;
2222 /* make sure the callback socket cleanup timer is running */
2223 /* (shorten the timer if we've got a socket we don't want) */
2224 if (!nfs4_callback_timer_on
) {
2225 nfs4_callback_timer_on
= 1;
2226 nfs_interval_timer_start(nfs4_callback_timer_call
,
2227 !nmp
? 500 : (NFS4_CB_TIMER_PERIOD
* 1000));
2228 } else if (!nmp
&& (nfs4_callback_timer_on
< 2)) {
2229 nfs4_callback_timer_on
= 2;
2230 thread_call_cancel(nfs4_callback_timer_call
);
2231 nfs_interval_timer_start(nfs4_callback_timer_call
, 500);
2234 lck_mtx_unlock(nfs_global_mutex
);
2238 * Receive mbufs from callback sockets into RPC records and process each record.
2239 * Detect connection has been closed and shut down.
2242 nfs4_cb_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
2244 struct nfs_callback_socket
*ncbsp
= arg
;
2245 struct timespec ts
= {1,0};
2248 int error
= 0, recv
= 1;
2250 lck_mtx_lock(nfs_global_mutex
);
2251 while (ncbsp
->ncbs_flags
& NCBSOCK_UPCALL
) {
2252 /* wait if upcall is already in progress */
2253 ncbsp
->ncbs_flags
|= NCBSOCK_UPCALLWANT
;
2254 msleep(ncbsp
, nfs_global_mutex
, PSOCK
, "cbupcall", &ts
);
2256 ncbsp
->ncbs_flags
|= NCBSOCK_UPCALL
;
2257 lck_mtx_unlock(nfs_global_mutex
);
2259 /* loop while we make error-free progress */
2260 while (!error
&& recv
) {
2261 error
= nfs_rpc_record_read(so
, &ncbsp
->ncbs_rrs
, MSG_DONTWAIT
, &recv
, &m
);
2262 if (m
) /* handle the request */
2263 error
= nfs4_cb_handler(ncbsp
, m
);
2266 /* note: no error and no data indicates server closed its end */
2267 if ((error
!= EWOULDBLOCK
) && (error
|| !recv
)) {
2269 * Socket is either being closed or should be.
2270 * We can't close the socket in the context of the upcall.
2271 * So we mark it as dead and leave it for the cleanup timer to reap.
2273 ncbsp
->ncbs_stamp
= 0;
2274 ncbsp
->ncbs_flags
|= NCBSOCK_DEAD
;
2277 ncbsp
->ncbs_stamp
= now
.tv_sec
;
2280 lck_mtx_lock(nfs_global_mutex
);
2281 ncbsp
->ncbs_flags
&= ~NCBSOCK_UPCALL
;
2282 lck_mtx_unlock(nfs_global_mutex
);
2287 * Handle an NFS callback channel request.
2290 nfs4_cb_handler(struct nfs_callback_socket
*ncbsp
, mbuf_t mreq
)
2292 socket_t so
= ncbsp
->ncbs_so
;
2293 struct nfsm_chain nmreq
, nmrep
;
2294 mbuf_t mhead
= NULL
, mrest
= NULL
, m
;
2296 struct nfsmount
*nmp
;
2299 nfs_stateid stateid
;
2300 uint32_t bitmap
[NFS_ATTR_BITMAP_LEN
], rbitmap
[NFS_ATTR_BITMAP_LEN
], bmlen
, truncate
, attrbytes
;
2301 uint32_t val
, xid
, procnum
, taglen
, cbid
, numops
, op
, status
;
2302 uint32_t auth_type
, auth_len
;
2303 uint32_t numres
, *pnumres
;
2304 int error
= 0, replen
, len
;
2307 xid
= numops
= op
= status
= procnum
= taglen
= cbid
= 0;
2309 nfsm_chain_dissect_init(error
, &nmreq
, mreq
);
2310 nfsm_chain_get_32(error
, &nmreq
, xid
); // RPC XID
2311 nfsm_chain_get_32(error
, &nmreq
, val
); // RPC Call
2312 nfsm_assert(error
, (val
== RPC_CALL
), EBADRPC
);
2313 nfsm_chain_get_32(error
, &nmreq
, val
); // RPC Version
2314 nfsm_assert(error
, (val
== RPC_VER2
), ERPCMISMATCH
);
2315 nfsm_chain_get_32(error
, &nmreq
, val
); // RPC Program Number
2316 nfsm_assert(error
, (val
== NFS4_CALLBACK_PROG
), EPROGUNAVAIL
);
2317 nfsm_chain_get_32(error
, &nmreq
, val
); // NFS Callback Program Version Number
2318 nfsm_assert(error
, (val
== NFS4_CALLBACK_PROG_VERSION
), EPROGMISMATCH
);
2319 nfsm_chain_get_32(error
, &nmreq
, procnum
); // NFS Callback Procedure Number
2320 nfsm_assert(error
, (procnum
<= NFSPROC4_CB_COMPOUND
), EPROCUNAVAIL
);
2322 /* Handle authentication */
2323 /* XXX just ignore auth for now - handling kerberos may be tricky */
2324 nfsm_chain_get_32(error
, &nmreq
, auth_type
); // RPC Auth Flavor
2325 nfsm_chain_get_32(error
, &nmreq
, auth_len
); // RPC Auth Length
2326 nfsm_assert(error
, (auth_len
<= RPCAUTH_MAXSIZ
), EBADRPC
);
2327 if (!error
&& (auth_len
> 0))
2328 nfsm_chain_adv(error
, &nmreq
, nfsm_rndup(auth_len
));
2329 nfsm_chain_adv(error
, &nmreq
, NFSX_UNSIGNED
); // verifier flavor (should be AUTH_NONE)
2330 nfsm_chain_get_32(error
, &nmreq
, auth_len
); // verifier length
2331 nfsm_assert(error
, (auth_len
<= RPCAUTH_MAXSIZ
), EBADRPC
);
2332 if (!error
&& (auth_len
> 0))
2333 nfsm_chain_adv(error
, &nmreq
, nfsm_rndup(auth_len
));
2341 case NFSPROC4_CB_NULL
:
2342 status
= NFSERR_RETVOID
;
2344 case NFSPROC4_CB_COMPOUND
:
2345 /* tag, minorversion, cb ident, numops, op array */
2346 nfsm_chain_get_32(error
, &nmreq
, taglen
); /* tag length */
2347 nfsm_assert(error
, (val
<= NFS4_OPAQUE_LIMIT
), EBADRPC
);
2349 /* start building the body of the response */
2350 nfsm_mbuf_get(error
, &mrest
, nfsm_rndup(taglen
) + 5*NFSX_UNSIGNED
);
2351 nfsm_chain_init(&nmrep
, mrest
);
2353 /* copy tag from request to response */
2354 nfsm_chain_add_32(error
, &nmrep
, taglen
); /* tag length */
2355 for (len
= (int)taglen
; !error
&& (len
> 0); len
-= NFSX_UNSIGNED
) {
2356 nfsm_chain_get_32(error
, &nmreq
, val
);
2357 nfsm_chain_add_32(error
, &nmrep
, val
);
2360 /* insert number of results placeholder */
2362 nfsm_chain_add_32(error
, &nmrep
, numres
);
2363 pnumres
= (uint32_t*)(nmrep
.nmc_ptr
- NFSX_UNSIGNED
);
2365 nfsm_chain_get_32(error
, &nmreq
, val
); /* minorversion */
2366 nfsm_assert(error
, (val
== 0), NFSERR_MINOR_VERS_MISMATCH
);
2367 nfsm_chain_get_32(error
, &nmreq
, cbid
); /* callback ID */
2368 nfsm_chain_get_32(error
, &nmreq
, numops
); /* number of operations */
2370 if ((error
== EBADRPC
) || (error
== NFSERR_MINOR_VERS_MISMATCH
))
2372 else if ((error
== ENOBUFS
) || (error
== ENOMEM
))
2373 status
= NFSERR_RESOURCE
;
2375 status
= NFSERR_SERVERFAULT
;
2377 nfsm_chain_null(&nmrep
);
2380 /* match the callback ID to a registered mount */
2381 lck_mtx_lock(nfs_global_mutex
);
2382 TAILQ_FOREACH(nmp
, &nfs4_cb_mounts
, nm_cblink
) {
2383 if (nmp
->nm_cbid
!= cbid
)
2385 /* verify socket's source address matches this mount's server address */
2388 if (nfs_sockaddr_cmp((struct sockaddr
*)&ncbsp
->ncbs_saddr
, nmp
->nm_saddr
) == 0)
2391 /* mark the NFS mount as busy */
2394 lck_mtx_unlock(nfs_global_mutex
);
2396 /* if no mount match, just drop socket. */
2398 nfsm_chain_null(&nmrep
);
2402 /* process ops, adding results to mrest */
2403 while (numops
> 0) {
2405 nfsm_chain_get_32(error
, &nmreq
, op
);
2409 case NFS_OP_CB_GETATTR
:
2410 // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS)
2412 nfsm_chain_get_fh(error
, &nmreq
, NFS_VER4
, &fh
);
2413 bmlen
= NFS_ATTR_BITMAP_LEN
;
2414 nfsm_chain_get_bitmap(error
, &nmreq
, bitmap
, bmlen
);
2418 numops
= 0; /* don't process any more ops */
2420 /* find the node for the file handle */
2421 error
= nfs_nget(nmp
->nm_mountp
, NULL
, NULL
, fh
.fh_data
, fh
.fh_len
, NULL
, NULL
, RPCAUTH_UNKNOWN
, NG_NOCREATE
, &np
);
2423 status
= NFSERR_BADHANDLE
;
2426 numops
= 0; /* don't process any more ops */
2429 nfsm_chain_add_32(error
, &nmrep
, op
);
2430 nfsm_chain_add_32(error
, &nmrep
, status
);
2431 if (!error
&& (status
== EBADRPC
))
2434 /* only allow returning size, change, and mtime attrs */
2435 NFS_CLEAR_ATTRIBUTES(&rbitmap
);
2437 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_CHANGE
)) {
2438 NFS_BITMAP_SET(&rbitmap
, NFS_FATTR_CHANGE
);
2439 attrbytes
+= 2 * NFSX_UNSIGNED
;
2441 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_SIZE
)) {
2442 NFS_BITMAP_SET(&rbitmap
, NFS_FATTR_SIZE
);
2443 attrbytes
+= 2 * NFSX_UNSIGNED
;
2445 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_TIME_MODIFY
)) {
2446 NFS_BITMAP_SET(&rbitmap
, NFS_FATTR_TIME_MODIFY
);
2447 attrbytes
+= 3 * NFSX_UNSIGNED
;
2449 nfsm_chain_add_bitmap(error
, &nmrep
, rbitmap
, NFS_ATTR_BITMAP_LEN
);
2450 nfsm_chain_add_32(error
, &nmrep
, attrbytes
);
2451 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_CHANGE
))
2452 nfsm_chain_add_64(error
, &nmrep
,
2453 np
->n_vattr
.nva_change
+ ((np
->n_flag
& NMODIFIED
) ? 1 : 0));
2454 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_SIZE
))
2455 nfsm_chain_add_64(error
, &nmrep
, np
->n_size
);
2456 if (NFS_BITMAP_ISSET(&bitmap
, NFS_FATTR_TIME_MODIFY
)) {
2457 nfsm_chain_add_64(error
, &nmrep
, np
->n_vattr
.nva_timesec
[NFSTIME_MODIFY
]);
2458 nfsm_chain_add_32(error
, &nmrep
, np
->n_vattr
.nva_timensec
[NFSTIME_MODIFY
]);
2460 nfs_node_unlock(np
);
2461 vnode_put(NFSTOV(np
));
2465 * If we hit an error building the reply, we can't easily back up.
2466 * So we'll just update the status and hope the server ignores the
2470 case NFS_OP_CB_RECALL
:
2471 // (STATEID, TRUNCATE, FH) -> (STATUS)
2473 nfsm_chain_get_stateid(error
, &nmreq
, &stateid
);
2474 nfsm_chain_get_32(error
, &nmreq
, truncate
);
2475 nfsm_chain_get_fh(error
, &nmreq
, NFS_VER4
, &fh
);
2479 numops
= 0; /* don't process any more ops */
2481 /* find the node for the file handle */
2482 error
= nfs_nget(nmp
->nm_mountp
, NULL
, NULL
, fh
.fh_data
, fh
.fh_len
, NULL
, NULL
, RPCAUTH_UNKNOWN
, NG_NOCREATE
, &np
);
2484 status
= NFSERR_BADHANDLE
;
2487 numops
= 0; /* don't process any more ops */
2488 } else if (!(np
->n_openflags
& N_DELEG_MASK
) ||
2489 bcmp(&np
->n_dstateid
, &stateid
, sizeof(stateid
))) {
2490 /* delegation stateid state doesn't match */
2491 status
= NFSERR_BAD_STATEID
;
2492 numops
= 0; /* don't process any more ops */
2494 if (!status
) /* add node to recall queue, and wake socket thread */
2495 nfs4_delegation_return_enqueue(np
);
2497 nfs_node_unlock(np
);
2498 vnode_put(NFSTOV(np
));
2501 nfsm_chain_add_32(error
, &nmrep
, op
);
2502 nfsm_chain_add_32(error
, &nmrep
, status
);
2503 if (!error
&& (status
== EBADRPC
))
2506 case NFS_OP_CB_ILLEGAL
:
2508 nfsm_chain_add_32(error
, &nmrep
, NFS_OP_CB_ILLEGAL
);
2509 status
= NFSERR_OP_ILLEGAL
;
2510 nfsm_chain_add_32(error
, &nmrep
, status
);
2511 numops
= 0; /* don't process any more ops */
2517 if (!status
&& error
) {
2518 if (error
== EBADRPC
)
2520 else if ((error
== ENOBUFS
) || (error
== ENOMEM
))
2521 status
= NFSERR_RESOURCE
;
2523 status
= NFSERR_SERVERFAULT
;
2527 /* Now, set the numres field */
2528 *pnumres
= txdr_unsigned(numres
);
2529 nfsm_chain_build_done(error
, &nmrep
);
2530 nfsm_chain_null(&nmrep
);
2532 /* drop the callback reference on the mount */
2533 lck_mtx_lock(nfs_global_mutex
);
2536 wakeup(&nmp
->nm_cbrefs
);
2537 lck_mtx_unlock(nfs_global_mutex
);
2542 if (status
== EBADRPC
)
2543 OSAddAtomic64(1, &nfsstats
.rpcinvalid
);
2545 /* build reply header */
2546 error
= mbuf_gethdr(MBUF_WAITOK
, MBUF_TYPE_DATA
, &mhead
);
2547 nfsm_chain_init(&nmrep
, mhead
);
2548 nfsm_chain_add_32(error
, &nmrep
, 0); /* insert space for an RPC record mark */
2549 nfsm_chain_add_32(error
, &nmrep
, xid
);
2550 nfsm_chain_add_32(error
, &nmrep
, RPC_REPLY
);
2551 if ((status
== ERPCMISMATCH
) || (status
& NFSERR_AUTHERR
)) {
2552 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGDENIED
);
2553 if (status
& NFSERR_AUTHERR
) {
2554 nfsm_chain_add_32(error
, &nmrep
, RPC_AUTHERR
);
2555 nfsm_chain_add_32(error
, &nmrep
, (status
& ~NFSERR_AUTHERR
));
2557 nfsm_chain_add_32(error
, &nmrep
, RPC_MISMATCH
);
2558 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
2559 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
2563 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGACCEPTED
);
2564 /* XXX RPCAUTH_NULL verifier */
2565 nfsm_chain_add_32(error
, &nmrep
, RPCAUTH_NULL
);
2566 nfsm_chain_add_32(error
, &nmrep
, 0);
2567 /* accepted status */
2570 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGUNAVAIL
);
2573 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGMISMATCH
);
2574 nfsm_chain_add_32(error
, &nmrep
, NFS4_CALLBACK_PROG_VERSION
);
2575 nfsm_chain_add_32(error
, &nmrep
, NFS4_CALLBACK_PROG_VERSION
);
2578 nfsm_chain_add_32(error
, &nmrep
, RPC_PROCUNAVAIL
);
2581 nfsm_chain_add_32(error
, &nmrep
, RPC_GARBAGE
);
2584 nfsm_chain_add_32(error
, &nmrep
, RPC_SUCCESS
);
2585 if (status
!= NFSERR_RETVOID
)
2586 nfsm_chain_add_32(error
, &nmrep
, status
);
2590 nfsm_chain_build_done(error
, &nmrep
);
2592 nfsm_chain_null(&nmrep
);
2595 error
= mbuf_setnext(nmrep
.nmc_mcur
, mrest
);
2597 printf("nfs cb: mbuf_setnext failed %d\n", error
);
2601 /* Calculate the size of the reply */
2603 for (m
= nmrep
.nmc_mhead
; m
; m
= mbuf_next(m
))
2604 replen
+= mbuf_len(m
);
2605 mbuf_pkthdr_setlen(mhead
, replen
);
2606 error
= mbuf_pkthdr_setrcvif(mhead
, NULL
);
2607 nfsm_chain_set_recmark(error
, &nmrep
, (replen
- NFSX_UNSIGNED
) | 0x80000000);
2608 nfsm_chain_null(&nmrep
);
2610 /* send the reply */
2611 bzero(&msg
, sizeof(msg
));
2612 error
= sock_sendmbuf(so
, &msg
, mhead
, 0, &sentlen
);
2614 if (!error
&& ((int)sentlen
!= replen
))
2615 error
= EWOULDBLOCK
;
2616 if (error
== EWOULDBLOCK
) /* inability to send response is considered fatal */
2620 nfsm_chain_cleanup(&nmrep
);
2632 * Initialize an nfs_rpc_record_state structure.
2635 nfs_rpc_record_state_init(struct nfs_rpc_record_state
*nrrsp
)
2637 bzero(nrrsp
, sizeof(*nrrsp
));
2638 nrrsp
->nrrs_markerleft
= sizeof(nrrsp
->nrrs_fragleft
);
2642 * Clean up an nfs_rpc_record_state structure.
2645 nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state
*nrrsp
)
2647 if (nrrsp
->nrrs_m
) {
2648 mbuf_freem(nrrsp
->nrrs_m
);
2649 nrrsp
->nrrs_m
= nrrsp
->nrrs_mlast
= NULL
;
2654 * Read the next (marked) RPC record from the socket.
2656 * *recvp returns if any data was received.
2657 * *mp returns the next complete RPC record
2660 nfs_rpc_record_read(socket_t so
, struct nfs_rpc_record_state
*nrrsp
, int flags
, int *recvp
, mbuf_t
*mp
)
2671 /* read the TCP RPC record marker */
2672 while (!error
&& nrrsp
->nrrs_markerleft
) {
2673 aio
.iov_base
= ((char*)&nrrsp
->nrrs_fragleft
+
2674 sizeof(nrrsp
->nrrs_fragleft
) - nrrsp
->nrrs_markerleft
);
2675 aio
.iov_len
= nrrsp
->nrrs_markerleft
;
2676 bzero(&msg
, sizeof(msg
));
2679 error
= sock_receive(so
, &msg
, flags
, &rcvlen
);
2680 if (error
|| !rcvlen
)
2683 nrrsp
->nrrs_markerleft
-= rcvlen
;
2684 if (nrrsp
->nrrs_markerleft
)
2686 /* record marker complete */
2687 nrrsp
->nrrs_fragleft
= ntohl(nrrsp
->nrrs_fragleft
);
2688 if (nrrsp
->nrrs_fragleft
& 0x80000000) {
2689 nrrsp
->nrrs_lastfrag
= 1;
2690 nrrsp
->nrrs_fragleft
&= ~0x80000000;
2692 nrrsp
->nrrs_reclen
+= nrrsp
->nrrs_fragleft
;
2693 if (nrrsp
->nrrs_reclen
> NFS_MAXPACKET
) {
2694 /* This is SERIOUS! We are out of sync with the sender. */
2695 log(LOG_ERR
, "impossible RPC record length (%d) on callback", nrrsp
->nrrs_reclen
);
2700 /* read the TCP RPC record fragment */
2701 while (!error
&& !nrrsp
->nrrs_markerleft
&& nrrsp
->nrrs_fragleft
) {
2703 rcvlen
= nrrsp
->nrrs_fragleft
;
2704 error
= sock_receivembuf(so
, NULL
, &m
, flags
, &rcvlen
);
2705 if (error
|| !rcvlen
|| !m
)
2708 /* append mbufs to list */
2709 nrrsp
->nrrs_fragleft
-= rcvlen
;
2710 if (!nrrsp
->nrrs_m
) {
2713 error
= mbuf_setnext(nrrsp
->nrrs_mlast
, m
);
2715 printf("nfs tcp rcv: mbuf_setnext failed %d\n", error
);
2720 while (mbuf_next(m
))
2722 nrrsp
->nrrs_mlast
= m
;
2725 /* done reading fragment? */
2726 if (!error
&& !nrrsp
->nrrs_markerleft
&& !nrrsp
->nrrs_fragleft
) {
2727 /* reset socket fragment parsing state */
2728 nrrsp
->nrrs_markerleft
= sizeof(nrrsp
->nrrs_fragleft
);
2729 if (nrrsp
->nrrs_lastfrag
) {
2730 /* RPC record complete */
2731 *mp
= nrrsp
->nrrs_m
;
2732 /* reset socket record parsing state */
2733 nrrsp
->nrrs_reclen
= 0;
2734 nrrsp
->nrrs_m
= nrrsp
->nrrs_mlast
= NULL
;
2735 nrrsp
->nrrs_lastfrag
= 0;
2745 * The NFS client send routine.
2747 * Send the given NFS request out the mount's socket.
2748 * Holds nfs_sndlock() for the duration of this call.
2750 * - check for request termination (sigintr)
2751 * - wait for reconnect, if necessary
2752 * - UDP: check the congestion window
2753 * - make a copy of the request to send
2754 * - UDP: update the congestion window
2755 * - send the request
2757 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
2758 * rexmit count is also updated if this isn't the first send.
2760 * If the send is not successful, make sure R_MUSTRESEND is set.
2761 * If this wasn't the first transmit, set R_RESENDERR.
2762 * Also, undo any UDP congestion window changes made.
2764 * If the error appears to indicate that the socket should
2765 * be reconnected, mark the socket for reconnection.
2767 * Only return errors when the request should be aborted.
2770 nfs_send(struct nfsreq
*req
, int wait
)
2772 struct nfsmount
*nmp
;
2773 struct nfs_socket
*nso
;
2774 int error
, error2
, sotype
, rexmit
, slpflag
= 0, needrecon
;
2776 struct sockaddr
*sendnam
;
2779 struct timespec ts
= { 2, 0 };
2782 error
= nfs_sndlock(req
);
2784 lck_mtx_lock(&req
->r_mtx
);
2785 req
->r_error
= error
;
2786 req
->r_flags
&= ~R_SENDING
;
2787 lck_mtx_unlock(&req
->r_mtx
);
2791 error
= nfs_sigintr(req
->r_nmp
, req
, NULL
, 0);
2794 lck_mtx_lock(&req
->r_mtx
);
2795 req
->r_error
= error
;
2796 req
->r_flags
&= ~R_SENDING
;
2797 lck_mtx_unlock(&req
->r_mtx
);
2801 sotype
= nmp
->nm_sotype
;
2804 * If it's a setup RPC but we're not in SETUP... must need reconnect.
2805 * If it's a recovery RPC but the socket's not ready... must need reconnect.
2807 if (((req
->r_flags
& R_SETUP
) && !(nmp
->nm_sockflags
& NMSOCK_SETUP
)) ||
2808 ((req
->r_flags
& R_RECOVER
) && !(nmp
->nm_sockflags
& NMSOCK_READY
))) {
2811 lck_mtx_lock(&req
->r_mtx
);
2812 req
->r_error
= error
;
2813 req
->r_flags
&= ~R_SENDING
;
2814 lck_mtx_unlock(&req
->r_mtx
);
2818 /* If the socket needs reconnection, do that now. */
2819 /* wait until socket is ready - unless this request is part of setup */
2820 lck_mtx_lock(&nmp
->nm_lock
);
2821 if (!(nmp
->nm_sockflags
& NMSOCK_READY
) &&
2822 !((nmp
->nm_sockflags
& NMSOCK_SETUP
) && (req
->r_flags
& R_SETUP
))) {
2823 if (NMFLAG(nmp
, INTR
) && !(req
->r_flags
& R_NOINTR
))
2825 lck_mtx_unlock(&nmp
->nm_lock
);
2828 lck_mtx_lock(&req
->r_mtx
);
2829 req
->r_flags
&= ~R_SENDING
;
2830 req
->r_flags
|= R_MUSTRESEND
;
2832 lck_mtx_unlock(&req
->r_mtx
);
2835 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req
->r_xid
));
2836 lck_mtx_lock(&req
->r_mtx
);
2837 req
->r_flags
&= ~R_MUSTRESEND
;
2839 lck_mtx_unlock(&req
->r_mtx
);
2840 lck_mtx_lock(&nmp
->nm_lock
);
2841 while (!(nmp
->nm_sockflags
& NMSOCK_READY
)) {
2842 /* don't bother waiting if the socket thread won't be reconnecting it */
2843 if (nmp
->nm_state
& NFSSTA_FORCE
) {
2847 if (NMFLAG(nmp
, SOFT
) && (nmp
->nm_reconnect_start
> 0)) {
2850 if ((now
.tv_sec
- nmp
->nm_reconnect_start
) >= 8) {
2851 /* soft mount in reconnect for a while... terminate ASAP */
2852 OSAddAtomic64(1, &nfsstats
.rpctimeouts
);
2853 req
->r_flags
|= R_SOFTTERM
;
2854 req
->r_error
= error
= ETIMEDOUT
;
2858 /* make sure socket thread is running, then wait */
2859 nfs_mount_sock_thread_wake(nmp
);
2860 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 1)))
2862 msleep(req
, &nmp
->nm_lock
, slpflag
|PSOCK
, "nfsconnectwait", &ts
);
2865 lck_mtx_unlock(&nmp
->nm_lock
);
2867 lck_mtx_lock(&req
->r_mtx
);
2868 req
->r_error
= error
;
2869 req
->r_flags
&= ~R_SENDING
;
2870 lck_mtx_unlock(&req
->r_mtx
);
2876 /* note that we're using the mount's socket to do the send */
2877 nmp
->nm_state
|= NFSSTA_SENDING
; /* will be cleared by nfs_sndunlock() */
2878 lck_mtx_unlock(&nmp
->nm_lock
);
2881 lck_mtx_lock(&req
->r_mtx
);
2882 req
->r_flags
&= ~R_SENDING
;
2883 req
->r_flags
|= R_MUSTRESEND
;
2885 lck_mtx_unlock(&req
->r_mtx
);
2889 lck_mtx_lock(&req
->r_mtx
);
2890 rexmit
= (req
->r_flags
& R_SENT
);
2892 if (sotype
== SOCK_DGRAM
) {
2893 lck_mtx_lock(&nmp
->nm_lock
);
2894 if (!(req
->r_flags
& R_CWND
) && (nmp
->nm_sent
>= nmp
->nm_cwnd
)) {
2895 /* if we can't send this out yet, wait on the cwnd queue */
2896 slpflag
= (NMFLAG(nmp
, INTR
) && req
->r_thread
) ? PCATCH
: 0;
2897 lck_mtx_unlock(&nmp
->nm_lock
);
2899 req
->r_flags
&= ~R_SENDING
;
2900 req
->r_flags
|= R_MUSTRESEND
;
2901 lck_mtx_unlock(&req
->r_mtx
);
2906 lck_mtx_lock(&nmp
->nm_lock
);
2907 while (nmp
->nm_sent
>= nmp
->nm_cwnd
) {
2908 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 1)))
2910 TAILQ_INSERT_TAIL(&nmp
->nm_cwndq
, req
, r_cchain
);
2911 msleep(req
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfswaitcwnd", &ts
);
2913 if ((req
->r_cchain
.tqe_next
!= NFSREQNOLIST
)) {
2914 TAILQ_REMOVE(&nmp
->nm_cwndq
, req
, r_cchain
);
2915 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
2918 lck_mtx_unlock(&nmp
->nm_lock
);
2922 * We update these *before* the send to avoid racing
2923 * against others who may be looking to send requests.
2926 /* first transmit */
2927 req
->r_flags
|= R_CWND
;
2928 nmp
->nm_sent
+= NFS_CWNDSCALE
;
2931 * When retransmitting, turn timing off
2932 * and divide congestion window by 2.
2934 req
->r_flags
&= ~R_TIMING
;
2936 if (nmp
->nm_cwnd
< NFS_CWNDSCALE
)
2937 nmp
->nm_cwnd
= NFS_CWNDSCALE
;
2939 lck_mtx_unlock(&nmp
->nm_lock
);
2942 req
->r_flags
&= ~R_MUSTRESEND
;
2943 lck_mtx_unlock(&req
->r_mtx
);
2945 error
= mbuf_copym(req
->r_mhead
, 0, MBUF_COPYALL
,
2946 wait
? MBUF_WAITOK
: MBUF_DONTWAIT
, &mreqcopy
);
2949 log(LOG_INFO
, "nfs_send: mbuf copy failed %d\n", error
);
2951 lck_mtx_lock(&req
->r_mtx
);
2952 req
->r_flags
&= ~R_SENDING
;
2953 req
->r_flags
|= R_MUSTRESEND
;
2955 lck_mtx_unlock(&req
->r_mtx
);
2959 bzero(&msg
, sizeof(msg
));
2960 if ((sotype
!= SOCK_STREAM
) && !sock_isconnected(nso
->nso_so
) && ((sendnam
= nmp
->nm_saddr
))) {
2961 msg
.msg_name
= (caddr_t
)sendnam
;
2962 msg
.msg_namelen
= sendnam
->sa_len
;
2964 error
= sock_sendmbuf(nso
->nso_so
, &msg
, mreqcopy
, 0, &sentlen
);
2965 #ifdef NFS_SOCKET_DEBUGGING
2966 if (error
|| (sentlen
!= req
->r_mreqlen
))
2967 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
2968 req
->r_xid
, (int)sentlen
, (int)req
->r_mreqlen
, error
));
2970 if (!error
&& (sentlen
!= req
->r_mreqlen
))
2971 error
= EWOULDBLOCK
;
2972 needrecon
= ((sotype
== SOCK_STREAM
) && sentlen
&& (sentlen
!= req
->r_mreqlen
));
2974 lck_mtx_lock(&req
->r_mtx
);
2975 req
->r_flags
&= ~R_SENDING
;
2977 if (rexmit
&& (++req
->r_rexmit
> NFS_MAXREXMIT
))
2978 req
->r_rexmit
= NFS_MAXREXMIT
;
2982 req
->r_flags
&= ~R_RESENDERR
;
2984 OSAddAtomic64(1, &nfsstats
.rpcretries
);
2985 req
->r_flags
|= R_SENT
;
2986 if (req
->r_flags
& R_WAITSENT
) {
2987 req
->r_flags
&= ~R_WAITSENT
;
2991 lck_mtx_unlock(&req
->r_mtx
);
2996 req
->r_flags
|= R_MUSTRESEND
;
2998 req
->r_flags
|= R_RESENDERR
;
2999 if ((error
== EINTR
) || (error
== ERESTART
))
3000 req
->r_error
= error
;
3001 lck_mtx_unlock(&req
->r_mtx
);
3003 if (sotype
== SOCK_DGRAM
) {
3005 * Note: even though a first send may fail, we consider
3006 * the request sent for congestion window purposes.
3007 * So we don't need to undo any of the changes made above.
3010 * Socket errors ignored for connectionless sockets??
3011 * For now, ignore them all
3013 if ((error
!= EINTR
) && (error
!= ERESTART
) &&
3014 (error
!= EWOULDBLOCK
) && (error
!= EIO
) && (nso
== nmp
->nm_nso
)) {
3015 int clearerror
= 0, optlen
= sizeof(clearerror
);
3016 sock_getsockopt(nso
->nso_so
, SOL_SOCKET
, SO_ERROR
, &clearerror
, &optlen
);
3017 #ifdef NFS_SOCKET_DEBUGGING
3019 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
3020 error
, clearerror
));
3025 /* check if it appears we should reconnect the socket */
3028 /* if send timed out, reconnect if on TCP */
3029 if (sotype
!= SOCK_STREAM
)
3046 if (needrecon
&& (nso
== nmp
->nm_nso
)) { /* mark socket as needing reconnect */
3047 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req
->r_xid
, error
));
3048 nfs_need_reconnect(nmp
);
3054 * Don't log some errors:
3055 * EPIPE errors may be common with servers that drop idle connections.
3056 * EADDRNOTAVAIL may occur on network transitions.
3057 * ENOTCONN may occur under some network conditions.
3059 if ((error
== EPIPE
) || (error
== EADDRNOTAVAIL
) || (error
== ENOTCONN
))
3061 if (error
&& (error
!= EINTR
) && (error
!= ERESTART
))
3062 log(LOG_INFO
, "nfs send error %d for server %s\n", error
,
3063 !req
->r_nmp
? "<unmounted>" :
3064 vfs_statfs(req
->r_nmp
->nm_mountp
)->f_mntfromname
);
3066 if (nfs_is_dead(error
, nmp
))
3069 /* prefer request termination error over other errors */
3070 error2
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0);
3074 /* only allow the following errors to be returned */
3075 if ((error
!= EINTR
) && (error
!= ERESTART
) && (error
!= EIO
) &&
3076 (error
!= ENXIO
) && (error
!= ETIMEDOUT
))
3082 * NFS client socket upcalls
3084 * Pull RPC replies out of an NFS mount's socket and match them
3085 * up with the pending request.
3087 * The datagram code is simple because we always get whole
3088 * messages out of the socket.
3090 * The stream code is more involved because we have to parse
3091 * the RPC records out of the stream.
3094 /* NFS client UDP socket upcall */
3096 nfs_udp_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
3098 struct nfsmount
*nmp
= arg
;
3099 struct nfs_socket
*nso
= nmp
->nm_nso
;
3104 if (nmp
->nm_sockflags
& NMSOCK_CONNECTING
)
3108 /* make sure we're on the current socket */
3109 if (!nso
|| (nso
->nso_so
!= so
))
3114 error
= sock_receivembuf(so
, NULL
, &m
, MSG_DONTWAIT
, &rcvlen
);
3116 nfs_request_match_reply(nmp
, m
);
3117 } while (m
&& !error
);
3119 if (error
&& (error
!= EWOULDBLOCK
)) {
3120 /* problems with the socket... mark for reconnection */
3121 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error
));
3122 nfs_need_reconnect(nmp
);
3126 /* NFS client TCP socket upcall */
3128 nfs_tcp_rcv(socket_t so
, void *arg
, __unused
int waitflag
)
3130 struct nfsmount
*nmp
= arg
;
3131 struct nfs_socket
*nso
= nmp
->nm_nso
;
3132 struct nfs_rpc_record_state nrrs
;
3137 if (nmp
->nm_sockflags
& NMSOCK_CONNECTING
)
3140 /* make sure we're on the current socket */
3141 lck_mtx_lock(&nmp
->nm_lock
);
3143 if (!nso
|| (nso
->nso_so
!= so
) || (nmp
->nm_sockflags
& (NMSOCK_DISCONNECTING
))) {
3144 lck_mtx_unlock(&nmp
->nm_lock
);
3147 lck_mtx_unlock(&nmp
->nm_lock
);
3149 /* make sure this upcall should be trying to do work */
3150 lck_mtx_lock(&nso
->nso_lock
);
3151 if (nso
->nso_flags
& (NSO_UPCALL
|NSO_DISCONNECTING
|NSO_DEAD
)) {
3152 lck_mtx_unlock(&nso
->nso_lock
);
3155 nso
->nso_flags
|= NSO_UPCALL
;
3156 nrrs
= nso
->nso_rrs
;
3157 lck_mtx_unlock(&nso
->nso_lock
);
3159 /* loop while we make error-free progress */
3160 while (!error
&& recv
) {
3161 error
= nfs_rpc_record_read(so
, &nrrs
, MSG_DONTWAIT
, &recv
, &m
);
3162 if (m
) /* match completed response with request */
3163 nfs_request_match_reply(nmp
, m
);
3166 lck_mtx_lock(&nmp
->nm_lock
);
3167 if (nmp
->nm_nso
== nso
) {
3168 /* still the same socket, so update socket's RPC parsing state */
3169 lck_mtx_unlock(&nmp
->nm_lock
);
3170 lck_mtx_lock(&nso
->nso_lock
);
3171 nso
->nso_rrs
= nrrs
;
3172 nso
->nso_flags
&= ~NSO_UPCALL
;
3173 lck_mtx_unlock(&nso
->nso_lock
);
3174 if (nmp
->nm_sockflags
& NMSOCK_DISCONNECTING
)
3175 wakeup(&nmp
->nm_sockflags
);
3177 lck_mtx_unlock(&nmp
->nm_lock
);
3179 #ifdef NFS_SOCKET_DEBUGGING
3180 if (!recv
&& (error
!= EWOULDBLOCK
))
3181 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error
));
3183 /* note: no error and no data indicates server closed its end */
3184 if ((error
!= EWOULDBLOCK
) && (error
|| !recv
)) {
3185 /* problems with the socket... mark for reconnection */
3186 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error
));
3187 nfs_need_reconnect(nmp
);
3192 * "poke" a socket to try to provoke any pending errors
3195 nfs_sock_poke(struct nfsmount
*nmp
)
3203 lck_mtx_lock(&nmp
->nm_lock
);
3204 if ((nmp
->nm_sockflags
& NMSOCK_UNMOUNT
) ||
3205 !(nmp
->nm_sockflags
& NMSOCK_READY
) || !nmp
->nm_nso
|| !nmp
->nm_nso
->nso_so
) {
3206 lck_mtx_unlock(&nmp
->nm_lock
);
3209 lck_mtx_unlock(&nmp
->nm_lock
);
3210 aio
.iov_base
= &dummy
;
3213 bzero(&msg
, sizeof(msg
));
3216 error
= sock_send(nmp
->nm_nso
->nso_so
, &msg
, MSG_DONTWAIT
, &len
);
3217 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error
));
3218 nfs_is_dead(error
, nmp
);
3222 * Match an RPC reply with the corresponding request
3225 nfs_request_match_reply(struct nfsmount
*nmp
, mbuf_t mrep
)
3228 struct nfsm_chain nmrep
;
3229 u_int32_t reply
= 0, rxid
= 0;
3230 int error
= 0, asyncioq
, t1
;
3232 /* Get the xid and check that it is an rpc reply */
3233 nfsm_chain_dissect_init(error
, &nmrep
, mrep
);
3234 nfsm_chain_get_32(error
, &nmrep
, rxid
);
3235 nfsm_chain_get_32(error
, &nmrep
, reply
);
3236 if (error
|| (reply
!= RPC_REPLY
)) {
3237 OSAddAtomic64(1, &nfsstats
.rpcinvalid
);
3243 * Loop through the request list to match up the reply
3244 * Iff no match, just drop it.
3246 lck_mtx_lock(nfs_request_mutex
);
3247 TAILQ_FOREACH(req
, &nfs_reqq
, r_chain
) {
3248 if (req
->r_nmrep
.nmc_mhead
|| (rxid
!= R_XID32(req
->r_xid
)))
3250 /* looks like we have it, grab lock and double check */
3251 lck_mtx_lock(&req
->r_mtx
);
3252 if (req
->r_nmrep
.nmc_mhead
|| (rxid
!= R_XID32(req
->r_xid
))) {
3253 lck_mtx_unlock(&req
->r_mtx
);
3257 req
->r_nmrep
= nmrep
;
3258 lck_mtx_lock(&nmp
->nm_lock
);
3259 if (nmp
->nm_sotype
== SOCK_DGRAM
) {
3261 * Update congestion window.
3262 * Do the additive increase of one rpc/rtt.
3264 FSDBG(530, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
3265 if (nmp
->nm_cwnd
<= nmp
->nm_sent
) {
3267 ((NFS_CWNDSCALE
* NFS_CWNDSCALE
) +
3268 (nmp
->nm_cwnd
>> 1)) / nmp
->nm_cwnd
;
3269 if (nmp
->nm_cwnd
> NFS_MAXCWND
)
3270 nmp
->nm_cwnd
= NFS_MAXCWND
;
3272 if (req
->r_flags
& R_CWND
) {
3273 nmp
->nm_sent
-= NFS_CWNDSCALE
;
3274 req
->r_flags
&= ~R_CWND
;
3276 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
3277 /* congestion window is open, poke the cwnd queue */
3278 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
3279 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
3280 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
3285 * Update rtt using a gain of 0.125 on the mean
3286 * and a gain of 0.25 on the deviation.
3288 if (req
->r_flags
& R_TIMING
) {
3290 * Since the timer resolution of
3291 * NFS_HZ is so course, it can often
3292 * result in r_rtt == 0. Since
3293 * r_rtt == N means that the actual
3294 * rtt is between N+dt and N+2-dt ticks,
3297 if (proct
[req
->r_procnum
] == 0)
3298 panic("nfs_request_match_reply: proct[%d] is zero", req
->r_procnum
);
3299 t1
= req
->r_rtt
+ 1;
3300 t1
-= (NFS_SRTT(req
) >> 3);
3301 NFS_SRTT(req
) += t1
;
3304 t1
-= (NFS_SDRTT(req
) >> 2);
3305 NFS_SDRTT(req
) += t1
;
3307 nmp
->nm_timeouts
= 0;
3308 lck_mtx_unlock(&nmp
->nm_lock
);
3309 /* signal anyone waiting on this request */
3311 asyncioq
= (req
->r_callback
.rcb_func
!= NULL
);
3312 if (nfs_request_using_gss(req
))
3313 nfs_gss_clnt_rpcdone(req
);
3314 lck_mtx_unlock(&req
->r_mtx
);
3315 lck_mtx_unlock(nfs_request_mutex
);
3316 /* if it's an async RPC with a callback, queue it up */
3318 nfs_asyncio_finish(req
);
3323 /* not matched to a request, so drop it. */
3324 lck_mtx_unlock(nfs_request_mutex
);
3325 OSAddAtomic64(1, &nfsstats
.rpcunexpected
);
3331 * Wait for the reply for a given request...
3332 * ...potentially resending the request if necessary.
3335 nfs_wait_reply(struct nfsreq
*req
)
3337 struct timespec ts
= { 2, 0 };
3338 int error
= 0, slpflag
, first
= 1;
3340 if (req
->r_nmp
&& NMFLAG(req
->r_nmp
, INTR
) && req
->r_thread
&& !(req
->r_flags
& R_NOINTR
))
3345 lck_mtx_lock(&req
->r_mtx
);
3346 while (!req
->r_nmrep
.nmc_mhead
) {
3347 if ((error
= nfs_sigintr(req
->r_nmp
, req
, first
? NULL
: req
->r_thread
, 0)))
3349 if (((error
= req
->r_error
)) || req
->r_nmrep
.nmc_mhead
)
3351 /* check if we need to resend */
3352 if (req
->r_flags
& R_MUSTRESEND
) {
3353 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
3354 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
3355 req
->r_flags
|= R_SENDING
;
3356 lck_mtx_unlock(&req
->r_mtx
);
3357 if (nfs_request_using_gss(req
)) {
3359 * It's an RPCSEC_GSS request.
3360 * Can't just resend the original request
3361 * without bumping the cred sequence number.
3362 * Go back and re-build the request.
3364 lck_mtx_lock(&req
->r_mtx
);
3365 req
->r_flags
&= ~R_SENDING
;
3366 lck_mtx_unlock(&req
->r_mtx
);
3369 error
= nfs_send(req
, 1);
3370 lck_mtx_lock(&req
->r_mtx
);
3371 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
3372 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
, error
));
3375 if (((error
= req
->r_error
)) || req
->r_nmrep
.nmc_mhead
)
3378 /* need to poll if we're P_NOREMOTEHANG */
3379 if (nfs_noremotehang(req
->r_thread
))
3381 msleep(req
, &req
->r_mtx
, slpflag
| (PZERO
- 1), "nfswaitreply", &ts
);
3382 first
= slpflag
= 0;
3384 lck_mtx_unlock(&req
->r_mtx
);
3390 * An NFS request goes something like this:
3391 * (nb: always frees up mreq mbuf list)
3392 * nfs_request_create()
3393 * - allocates a request struct if one is not provided
3394 * - initial fill-in of the request struct
3395 * nfs_request_add_header()
3396 * - add the RPC header
3397 * nfs_request_send()
3398 * - link it into list
3399 * - call nfs_send() for first transmit
3400 * nfs_request_wait()
3401 * - call nfs_wait_reply() to wait for the reply
3402 * nfs_request_finish()
3403 * - break down rpc header and return with error or nfs reply
3404 * pointed to by nmrep.
3405 * nfs_request_rele()
3406 * nfs_request_destroy()
3407 * - clean up the request struct
3408 * - free the request struct if it was allocated by nfs_request_create()
3412 * Set up an NFS request struct (allocating if no request passed in).
3417 mount_t mp
, /* used only if !np */
3418 struct nfsm_chain
*nmrest
,
3422 struct nfsreq
**reqp
)
3424 struct nfsreq
*req
, *newreq
= NULL
;
3425 struct nfsmount
*nmp
;
3429 /* allocate a new NFS request structure */
3430 MALLOC_ZONE(newreq
, struct nfsreq
*, sizeof(*newreq
), M_NFSREQ
, M_WAITOK
);
3432 mbuf_freem(nmrest
->nmc_mhead
);
3433 nmrest
->nmc_mhead
= NULL
;
3439 bzero(req
, sizeof(*req
));
3441 req
->r_flags
= R_ALLOCATED
;
3443 nmp
= VFSTONFS(np
? NFSTOMP(np
) : mp
);
3446 FREE_ZONE(newreq
, sizeof(*newreq
), M_NFSREQ
);
3449 lck_mtx_lock(&nmp
->nm_lock
);
3450 if ((nmp
->nm_state
& (NFSSTA_FORCE
|NFSSTA_TIMEO
)) ==
3451 (NFSSTA_FORCE
|NFSSTA_TIMEO
)) {
3452 lck_mtx_unlock(&nmp
->nm_lock
);
3453 mbuf_freem(nmrest
->nmc_mhead
);
3454 nmrest
->nmc_mhead
= NULL
;
3456 FREE_ZONE(newreq
, sizeof(*newreq
), M_NFSREQ
);
3460 if ((nmp
->nm_vers
!= NFS_VER4
) && (procnum
>= 0) && (procnum
< NFS_NPROCS
))
3461 OSAddAtomic64(1, &nfsstats
.rpccnt
[procnum
]);
3462 if ((nmp
->nm_vers
== NFS_VER4
) && (procnum
!= NFSPROC4_COMPOUND
) && (procnum
!= NFSPROC4_NULL
))
3463 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum
);
3465 lck_mtx_init(&req
->r_mtx
, nfs_request_grp
, LCK_ATTR_NULL
);
3468 req
->r_thread
= thd
;
3470 req
->r_flags
|= R_NOINTR
;
3471 if (IS_VALID_CRED(cred
)) {
3472 kauth_cred_ref(cred
);
3475 req
->r_procnum
= procnum
;
3476 if (proct
[procnum
] > 0)
3477 req
->r_flags
|= R_TIMING
;
3478 req
->r_nmrep
.nmc_mhead
= NULL
;
3479 SLIST_INIT(&req
->r_gss_seqlist
);
3480 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
3481 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
3482 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
3484 /* set auth flavor to use for request */
3486 req
->r_auth
= RPCAUTH_NONE
;
3487 else if (req
->r_np
&& (req
->r_np
->n_auth
!= RPCAUTH_INVALID
))
3488 req
->r_auth
= req
->r_np
->n_auth
;
3490 req
->r_auth
= nmp
->nm_auth
;
3492 lck_mtx_unlock(&nmp
->nm_lock
);
3494 /* move the request mbuf chain to the nfsreq */
3495 req
->r_mrest
= nmrest
->nmc_mhead
;
3496 nmrest
->nmc_mhead
= NULL
;
3498 req
->r_flags
|= R_INITTED
;
3506 * Clean up and free an NFS request structure.
3509 nfs_request_destroy(struct nfsreq
*req
)
3511 struct nfsmount
*nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
3512 struct gss_seq
*gsp
, *ngsp
;
3513 struct timespec ts
= { 1, 0 };
3514 int clearjbtimeo
= 0;
3516 if (!req
|| !(req
->r_flags
& R_INITTED
))
3518 req
->r_flags
&= ~R_INITTED
;
3519 if (req
->r_lflags
& RL_QUEUED
)
3520 nfs_reqdequeue(req
);
3521 if (req
->r_achain
.tqe_next
!= NFSREQNOLIST
) {
3522 /* still on an async I/O queue? */
3523 lck_mtx_lock(nfsiod_mutex
);
3524 if (nmp
&& (req
->r_achain
.tqe_next
!= NFSREQNOLIST
)) {
3525 TAILQ_REMOVE(&nmp
->nm_iodq
, req
, r_achain
);
3526 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
3528 lck_mtx_unlock(nfsiod_mutex
);
3530 lck_mtx_lock(&req
->r_mtx
);
3532 lck_mtx_lock(&nmp
->nm_lock
);
3533 if (req
->r_flags
& R_CWND
) {
3534 /* Decrement the outstanding request count. */
3535 req
->r_flags
&= ~R_CWND
;
3536 nmp
->nm_sent
-= NFS_CWNDSCALE
;
3537 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
3538 /* congestion window is open, poke the cwnd queue */
3539 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
3540 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
3541 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
3545 if (req
->r_rchain
.tqe_next
!= NFSREQNOLIST
) {
3546 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
3547 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
3548 if (req
->r_flags
& R_RESENDQ
)
3549 req
->r_flags
&= ~R_RESENDQ
;
3551 if (req
->r_cchain
.tqe_next
!= NFSREQNOLIST
) {
3552 TAILQ_REMOVE(&nmp
->nm_cwndq
, req
, r_cchain
);
3553 req
->r_cchain
.tqe_next
= NFSREQNOLIST
;
3555 if (req
->r_flags
& R_JBTPRINTFMSG
) {
3556 req
->r_flags
&= ~R_JBTPRINTFMSG
;
3558 clearjbtimeo
= (nmp
->nm_jbreqs
== 0) ? NFSSTA_JUKEBOXTIMEO
: 0;
3560 lck_mtx_unlock(&nmp
->nm_lock
);
3562 while (req
->r_flags
& R_RESENDQ
)
3563 msleep(req
, &req
->r_mtx
, (PZERO
- 1), "nfsresendqwait", &ts
);
3564 lck_mtx_unlock(&req
->r_mtx
);
3566 nfs_up(nmp
, req
->r_thread
, clearjbtimeo
, NULL
);
3568 mbuf_freem(req
->r_mhead
);
3569 else if (req
->r_mrest
)
3570 mbuf_freem(req
->r_mrest
);
3571 if (req
->r_nmrep
.nmc_mhead
)
3572 mbuf_freem(req
->r_nmrep
.nmc_mhead
);
3573 if (IS_VALID_CRED(req
->r_cred
))
3574 kauth_cred_unref(&req
->r_cred
);
3575 if (nfs_request_using_gss(req
))
3576 nfs_gss_clnt_rpcdone(req
);
3577 SLIST_FOREACH_SAFE(gsp
, &req
->r_gss_seqlist
, gss_seqnext
, ngsp
)
3580 nfs_gss_clnt_ctx_unref(req
);
3581 if (req
->r_wrongsec
)
3582 FREE(req
->r_wrongsec
, M_TEMP
);
3584 lck_mtx_destroy(&req
->r_mtx
, nfs_request_grp
);
3585 if (req
->r_flags
& R_ALLOCATED
)
3586 FREE_ZONE(req
, sizeof(*req
), M_NFSREQ
);
3590 nfs_request_ref(struct nfsreq
*req
, int locked
)
3593 lck_mtx_lock(&req
->r_mtx
);
3594 if (req
->r_refs
<= 0)
3595 panic("nfsreq reference error");
3598 lck_mtx_unlock(&req
->r_mtx
);
3602 nfs_request_rele(struct nfsreq
*req
)
3606 lck_mtx_lock(&req
->r_mtx
);
3607 if (req
->r_refs
<= 0)
3608 panic("nfsreq reference underflow");
3610 destroy
= (req
->r_refs
== 0);
3611 lck_mtx_unlock(&req
->r_mtx
);
3613 nfs_request_destroy(req
);
3618 * Add an (updated) RPC header with authorization to an NFS request.
3621 nfs_request_add_header(struct nfsreq
*req
)
3623 struct nfsmount
*nmp
;
3627 /* free up any previous header */
3628 if ((m
= req
->r_mhead
)) {
3629 while (m
&& (m
!= req
->r_mrest
))
3631 req
->r_mhead
= NULL
;
3634 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
3638 error
= nfsm_rpchead(req
, req
->r_mrest
, &req
->r_xid
, &req
->r_mhead
);
3642 req
->r_mreqlen
= mbuf_pkthdr_len(req
->r_mhead
);
3643 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
3646 lck_mtx_lock(&nmp
->nm_lock
);
3647 if (NMFLAG(nmp
, SOFT
))
3648 req
->r_retry
= nmp
->nm_retry
;
3650 req
->r_retry
= NFS_MAXREXMIT
+ 1; /* past clip limit */
3651 lck_mtx_unlock(&nmp
->nm_lock
);
3658 * Queue an NFS request up and send it out.
3661 nfs_request_send(struct nfsreq
*req
, int wait
)
3663 struct nfsmount
*nmp
;
3666 lck_mtx_lock(&req
->r_mtx
);
3667 req
->r_flags
|= R_SENDING
;
3668 lck_mtx_unlock(&req
->r_mtx
);
3670 lck_mtx_lock(nfs_request_mutex
);
3672 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
3674 lck_mtx_unlock(nfs_request_mutex
);
3679 if (!req
->r_start
) {
3680 req
->r_start
= now
.tv_sec
;
3681 req
->r_lastmsg
= now
.tv_sec
-
3682 ((nmp
->nm_tprintf_delay
) - (nmp
->nm_tprintf_initial_delay
));
3685 OSAddAtomic64(1, &nfsstats
.rpcrequests
);
3688 * Chain request into list of outstanding requests. Be sure
3689 * to put it LAST so timer finds oldest requests first.
3690 * Make sure that the request queue timer is running
3691 * to check for possible request timeout.
3693 TAILQ_INSERT_TAIL(&nfs_reqq
, req
, r_chain
);
3694 req
->r_lflags
|= RL_QUEUED
;
3695 if (!nfs_request_timer_on
) {
3696 nfs_request_timer_on
= 1;
3697 nfs_interval_timer_start(nfs_request_timer_call
,
3700 lck_mtx_unlock(nfs_request_mutex
);
3702 /* Send the request... */
3703 return (nfs_send(req
, wait
));
3707 * Call nfs_wait_reply() to wait for the reply.
3710 nfs_request_wait(struct nfsreq
*req
)
3712 req
->r_error
= nfs_wait_reply(req
);
3716 * Finish up an NFS request by dequeueing it and
3717 * doing the initial NFS request reply processing.
3722 struct nfsm_chain
*nmrepp
,
3725 struct nfsmount
*nmp
;
3728 uint32_t verf_len
= 0;
3729 uint32_t reply_status
= 0;
3730 uint32_t rejected_status
= 0;
3731 uint32_t auth_status
= 0;
3732 uint32_t accepted_status
= 0;
3733 struct nfsm_chain nmrep
;
3734 int error
, clearjbtimeo
;
3736 error
= req
->r_error
;
3739 nmrepp
->nmc_mhead
= NULL
;
3741 /* RPC done, unlink the request. */
3742 nfs_reqdequeue(req
);
3744 mrep
= req
->r_nmrep
.nmc_mhead
;
3746 nmp
= req
->r_np
? NFSTONMP(req
->r_np
) : req
->r_nmp
;
3748 if ((req
->r_flags
& R_CWND
) && nmp
) {
3750 * Decrement the outstanding request count.
3752 req
->r_flags
&= ~R_CWND
;
3753 lck_mtx_lock(&nmp
->nm_lock
);
3754 FSDBG(273, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
3755 nmp
->nm_sent
-= NFS_CWNDSCALE
;
3756 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
3757 /* congestion window is open, poke the cwnd queue */
3758 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
3759 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
3760 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
3763 lck_mtx_unlock(&nmp
->nm_lock
);
3766 if (nfs_request_using_gss(req
)) {
3768 * If the request used an RPCSEC_GSS credential
3769 * then reset its sequence number bit in the
3772 nfs_gss_clnt_rpcdone(req
);
3775 * If we need to re-send, go back and re-build the
3776 * request based on a new sequence number.
3777 * Note that we're using the original XID.
3779 if (error
== EAGAIN
) {
3783 error
= nfs_gss_clnt_args_restore(req
); // remove any trailer mbufs
3784 req
->r_nmrep
.nmc_mhead
= NULL
;
3785 req
->r_flags
|= R_RESTART
;
3786 if (error
== ENEEDAUTH
) {
3787 req
->r_xid
= 0; // get a new XID
3795 * If there was a successful reply, make sure to mark the mount as up.
3796 * If a tprintf message was given (or if this is a timed-out soft mount)
3797 * then post a tprintf message indicating the server is alive again.
3800 if ((req
->r_flags
& R_TPRINTFMSG
) ||
3801 (nmp
&& NMFLAG(nmp
, SOFT
) &&
3802 ((nmp
->nm_state
& (NFSSTA_TIMEO
|NFSSTA_FORCE
)) == NFSSTA_TIMEO
)))
3803 nfs_up(nmp
, req
->r_thread
, NFSSTA_TIMEO
, "is alive again");
3805 nfs_up(nmp
, req
->r_thread
, NFSSTA_TIMEO
, NULL
);
3812 * break down the RPC header and check if ok
3814 nmrep
= req
->r_nmrep
;
3815 nfsm_chain_get_32(error
, &nmrep
, reply_status
);
3817 if (reply_status
== RPC_MSGDENIED
) {
3818 nfsm_chain_get_32(error
, &nmrep
, rejected_status
);
3820 if (rejected_status
== RPC_MISMATCH
) {
3824 nfsm_chain_get_32(error
, &nmrep
, auth_status
);
3826 switch (auth_status
) {
3827 case RPCSEC_GSS_CREDPROBLEM
:
3828 case RPCSEC_GSS_CTXPROBLEM
:
3830 * An RPCSEC_GSS cred or context problem.
3831 * We can't use it anymore.
3832 * Restore the args, renew the context
3833 * and set up for a resend.
3835 error
= nfs_gss_clnt_args_restore(req
);
3836 if (error
&& error
!= ENEEDAUTH
)
3840 error
= nfs_gss_clnt_ctx_renew(req
);
3845 req
->r_nmrep
.nmc_mhead
= NULL
;
3846 req
->r_xid
= 0; // get a new XID
3847 req
->r_flags
|= R_RESTART
;
3856 /* Now check the verifier */
3857 nfsm_chain_get_32(error
, &nmrep
, verf_type
); // verifier flavor
3858 nfsm_chain_get_32(error
, &nmrep
, verf_len
); // verifier length
3861 switch (req
->r_auth
) {
3864 /* Any AUTH_SYS verifier is ignored */
3866 nfsm_chain_adv(error
, &nmrep
, nfsm_rndup(verf_len
));
3867 nfsm_chain_get_32(error
, &nmrep
, accepted_status
);
3872 error
= nfs_gss_clnt_verf_get(req
, &nmrep
,
3873 verf_type
, verf_len
, &accepted_status
);
3878 switch (accepted_status
) {
3880 if (req
->r_procnum
== NFSPROC_NULL
) {
3882 * The NFS null procedure is unique,
3883 * in not returning an NFS status.
3887 nfsm_chain_get_32(error
, &nmrep
, *status
);
3891 if ((nmp
->nm_vers
!= NFS_VER2
) && (*status
== NFSERR_TRYLATER
)) {
3893 * It's a JUKEBOX error - delay and try again
3895 int delay
, slpflag
= (NMFLAG(nmp
, INTR
) && !(req
->r_flags
& R_NOINTR
)) ? PCATCH
: 0;
3898 req
->r_nmrep
.nmc_mhead
= NULL
;
3899 if ((req
->r_delay
>= 30) && !(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
3900 /* we're not yet completely mounted and */
3901 /* we can't complete an RPC, so we fail */
3902 OSAddAtomic64(1, &nfsstats
.rpctimeouts
);
3904 error
= req
->r_error
;
3907 req
->r_delay
= !req
->r_delay
? NFS_TRYLATERDEL
: (req
->r_delay
* 2);
3908 if (req
->r_delay
> 30)
3910 if (nmp
->nm_tprintf_initial_delay
&& (req
->r_delay
>= nmp
->nm_tprintf_initial_delay
)) {
3911 if (!(req
->r_flags
& R_JBTPRINTFMSG
)) {
3912 req
->r_flags
|= R_JBTPRINTFMSG
;
3913 lck_mtx_lock(&nmp
->nm_lock
);
3915 lck_mtx_unlock(&nmp
->nm_lock
);
3917 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_JUKEBOXTIMEO
,
3918 "resource temporarily unavailable (jukebox)");
3920 if (NMFLAG(nmp
, SOFT
) && (req
->r_delay
== 30) && !(req
->r_flags
& R_NOINTR
)) {
3921 /* for soft mounts, just give up after a short while */
3922 OSAddAtomic64(1, &nfsstats
.rpctimeouts
);
3924 error
= req
->r_error
;
3927 delay
= req
->r_delay
;
3928 if (req
->r_callback
.rcb_func
) {
3931 req
->r_resendtime
= now
.tv_sec
+ delay
;
3934 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
3936 tsleep(&lbolt
, PSOCK
|slpflag
, "nfs_jukebox_trylater", 0);
3938 } while (--delay
> 0);
3940 req
->r_xid
= 0; // get a new XID
3941 req
->r_flags
|= R_RESTART
;
3943 FSDBG(273, R_XID32(req
->r_xid
), nmp
, req
, NFSERR_TRYLATER
);
3947 if (req
->r_flags
& R_JBTPRINTFMSG
) {
3948 req
->r_flags
&= ~R_JBTPRINTFMSG
;
3949 lck_mtx_lock(&nmp
->nm_lock
);
3951 clearjbtimeo
= (nmp
->nm_jbreqs
== 0) ? NFSSTA_JUKEBOXTIMEO
: 0;
3952 lck_mtx_unlock(&nmp
->nm_lock
);
3953 nfs_up(nmp
, req
->r_thread
, clearjbtimeo
, "resource available again");
3956 if ((nmp
->nm_vers
>= NFS_VER4
) && (*status
== NFSERR_WRONGSEC
)) {
3958 * Hmmm... we need to try a different security flavor.
3959 * The first time a request hits this, we will allocate an array
3960 * to track flavors to try. We fill the array with the mount's
3961 * preferred flavors or the server's preferred flavors or just the
3962 * flavors we support.
3964 uint32_t srvflavors
[NX_MAX_SEC_FLAVORS
];
3967 /* Call SECINFO to try to get list of flavors from server. */
3968 srvcount
= NX_MAX_SEC_FLAVORS
;
3969 nfs4_secinfo_rpc(nmp
, &req
->r_secinfo
, req
->r_cred
, srvflavors
, &srvcount
);
3971 if (!req
->r_wrongsec
) {
3972 /* first time... set up flavor array */
3973 MALLOC(req
->r_wrongsec
, uint32_t*, NX_MAX_SEC_FLAVORS
*sizeof(uint32_t), M_TEMP
, M_WAITOK
);
3974 if (!req
->r_wrongsec
) {
3979 if (nmp
->nm_sec
.count
) { /* use the mount's preferred list of flavors */
3980 for(; i
< nmp
->nm_sec
.count
; i
++)
3981 req
->r_wrongsec
[i
] = nmp
->nm_sec
.flavors
[i
];
3982 } else if (srvcount
) { /* otherwise use the server's list of flavors */
3983 for(; i
< srvcount
; i
++)
3984 req
->r_wrongsec
[i
] = srvflavors
[i
];
3985 } else { /* otherwise, just try the flavors we support. */
3986 req
->r_wrongsec
[i
++] = RPCAUTH_KRB5P
;
3987 req
->r_wrongsec
[i
++] = RPCAUTH_KRB5I
;
3988 req
->r_wrongsec
[i
++] = RPCAUTH_KRB5
;
3989 req
->r_wrongsec
[i
++] = RPCAUTH_SYS
;
3990 req
->r_wrongsec
[i
++] = RPCAUTH_NONE
;
3992 for(; i
< NX_MAX_SEC_FLAVORS
; i
++) /* invalidate any remaining slots */
3993 req
->r_wrongsec
[i
] = RPCAUTH_INVALID
;
3996 /* clear the current flavor from the list */
3997 for(i
=0; i
< NX_MAX_SEC_FLAVORS
; i
++)
3998 if (req
->r_wrongsec
[i
] == req
->r_auth
)
3999 req
->r_wrongsec
[i
] = RPCAUTH_INVALID
;
4001 /* find the next flavor to try */
4002 for(i
=0; i
< NX_MAX_SEC_FLAVORS
; i
++)
4003 if (req
->r_wrongsec
[i
] != RPCAUTH_INVALID
) {
4004 if (((req
->r_wrongsec
[i
] == RPCAUTH_KRB5P
) ||
4005 (req
->r_wrongsec
[i
] == RPCAUTH_KRB5I
) ||
4006 (req
->r_wrongsec
[i
] == RPCAUTH_KRB5
)) && (req
->r_gss_ctx
&&
4007 (req
->r_gss_ctx
->gss_clnt_service
== RPCSEC_GSS_SVC_SYS
))) {
4008 /* don't bother trying Kerberos if we've already got a fallback context */
4009 req
->r_wrongsec
[i
] = RPCAUTH_INVALID
;
4012 if (!srvcount
) /* no server list, just try it */
4014 /* check that it's in the server's list */
4015 for(j
=0; j
< srvcount
; j
++)
4016 if (req
->r_wrongsec
[i
] == srvflavors
[j
])
4018 if (j
< srvcount
) /* found */
4020 /* not found in server list */
4021 req
->r_wrongsec
[i
] = RPCAUTH_INVALID
;
4023 if (i
== NX_MAX_SEC_FLAVORS
) {
4024 /* nothing left to try! */
4029 /* retry with the next auth flavor */
4030 req
->r_auth
= req
->r_wrongsec
[i
];
4031 req
->r_xid
= 0; // get a new XID
4032 req
->r_flags
|= R_RESTART
;
4034 FSDBG(273, R_XID32(req
->r_xid
), nmp
, req
, NFSERR_WRONGSEC
);
4037 if ((nmp
->nm_vers
>= NFS_VER4
) && req
->r_wrongsec
) {
4039 * We renegotiated security for this request; so update the
4040 * default security flavor for the associated node.
4043 req
->r_np
->n_auth
= req
->r_auth
;
4046 if (*status
== NFS_OK
) {
4048 * Successful NFS request
4051 req
->r_nmrep
.nmc_mhead
= NULL
;
4054 /* Got an NFS error of some kind */
4057 * If the File Handle was stale, invalidate the
4058 * lookup cache, just in case.
4060 if ((*status
== ESTALE
) && req
->r_np
) {
4061 cache_purge(NFSTOV(req
->r_np
));
4062 /* if monitored, also send delete event */
4063 if (vnode_ismonitored(NFSTOV(req
->r_np
)))
4064 nfs_vnode_notify(req
->r_np
, (VNODE_EVENT_ATTRIB
|VNODE_EVENT_DELETE
));
4066 if (nmp
->nm_vers
== NFS_VER2
)
4070 req
->r_nmrep
.nmc_mhead
= NULL
;
4073 case RPC_PROGUNAVAIL
:
4074 error
= EPROGUNAVAIL
;
4076 case RPC_PROGMISMATCH
:
4077 error
= ERPCMISMATCH
;
4079 case RPC_PROCUNAVAIL
:
4080 error
= EPROCUNAVAIL
;
4085 case RPC_SYSTEM_ERR
:
4091 if (req
->r_flags
& R_JBTPRINTFMSG
) {
4092 req
->r_flags
&= ~R_JBTPRINTFMSG
;
4093 lck_mtx_lock(&nmp
->nm_lock
);
4095 clearjbtimeo
= (nmp
->nm_jbreqs
== 0) ? NFSSTA_JUKEBOXTIMEO
: 0;
4096 lck_mtx_unlock(&nmp
->nm_lock
);
4098 nfs_up(nmp
, req
->r_thread
, clearjbtimeo
, NULL
);
4100 FSDBG(273, R_XID32(req
->r_xid
), nmp
, req
,
4101 (!error
&& (*status
== NFS_OK
)) ? 0xf0f0f0f0 : error
);
4106 * NFS request using a GSS/Kerberos security flavor?
4109 nfs_request_using_gss(struct nfsreq
*req
)
4111 if (!req
->r_gss_ctx
)
4113 switch (req
->r_auth
) {
4123 * Perform an NFS request synchronously.
4129 mount_t mp
, /* used only if !np */
4130 struct nfsm_chain
*nmrest
,
4133 struct nfsreq_secinfo_args
*si
,
4134 struct nfsm_chain
*nmrepp
,
4138 return nfs_request2(np
, mp
, nmrest
, procnum
,
4139 vfs_context_thread(ctx
), vfs_context_ucred(ctx
),
4140 si
, 0, nmrepp
, xidp
, status
);
4146 mount_t mp
, /* used only if !np */
4147 struct nfsm_chain
*nmrest
,
4151 struct nfsreq_secinfo_args
*si
,
4153 struct nfsm_chain
*nmrepp
,
4157 struct nfsreq rq
, *req
= &rq
;
4160 if ((error
= nfs_request_create(np
, mp
, nmrest
, procnum
, thd
, cred
, &req
)))
4162 req
->r_flags
|= (flags
& R_OPTMASK
);
4164 req
->r_secinfo
= *si
;
4166 FSDBG_TOP(273, R_XID32(req
->r_xid
), np
, procnum
, 0);
4169 req
->r_flags
&= ~R_RESTART
;
4170 if ((error
= nfs_request_add_header(req
)))
4174 if ((error
= nfs_request_send(req
, 1)))
4176 nfs_request_wait(req
);
4177 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
4179 } while (req
->r_flags
& R_RESTART
);
4181 FSDBG_BOT(273, R_XID32(req
->r_xid
), np
, procnum
, error
);
4182 nfs_request_rele(req
);
4188 * Set up a new null proc request to exchange GSS context tokens with the
4189 * server. Associate the context that we are setting up with the request that we
4196 struct nfsm_chain
*nmrest
,
4200 struct nfs_gss_clnt_ctx
*cp
, /* Set to gss context to renew or setup */
4201 struct nfsm_chain
*nmrepp
,
4204 struct nfsreq rq
, *req
= &rq
;
4207 if ((error
= nfs_request_create(NULL
, mp
, nmrest
, NFSPROC_NULL
, thd
, cred
, &req
)))
4209 req
->r_flags
|= (flags
& R_OPTMASK
);
4212 printf("nfs_request_gss request has no context\n");
4213 nfs_request_rele(req
);
4214 return (NFSERR_EAUTH
);
4216 nfs_gss_clnt_ctx_ref(req
, cp
);
4218 FSDBG_TOP(273, R_XID32(req
->r_xid
), NULL
, NFSPROC_NULL
, 0);
4221 req
->r_flags
&= ~R_RESTART
;
4222 if ((error
= nfs_request_add_header(req
)))
4225 if ((error
= nfs_request_send(req
, 1)))
4227 nfs_request_wait(req
);
4228 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
4230 } while (req
->r_flags
& R_RESTART
);
4232 FSDBG_BOT(273, R_XID32(req
->r_xid
), NULL
, NFSPROC_NULL
, error
);
4233 nfs_request_rele(req
);
4238 * Create and start an asynchronous NFS request.
4243 mount_t mp
, /* used only if !np */
4244 struct nfsm_chain
*nmrest
,
4248 struct nfsreq_secinfo_args
*si
,
4250 struct nfsreq_cbinfo
*cb
,
4251 struct nfsreq
**reqp
)
4254 struct nfsmount
*nmp
;
4257 error
= nfs_request_create(np
, mp
, nmrest
, procnum
, thd
, cred
, reqp
);
4259 FSDBG(274, (req
? R_XID32(req
->r_xid
) : 0), np
, procnum
, error
);
4262 req
->r_flags
|= (flags
& R_OPTMASK
);
4263 req
->r_flags
|= R_ASYNC
;
4265 req
->r_secinfo
= *si
;
4267 req
->r_callback
= *cb
;
4268 error
= nfs_request_add_header(req
);
4270 req
->r_flags
|= R_WAITSENT
;
4271 if (req
->r_callback
.rcb_func
)
4272 nfs_request_ref(req
, 0);
4273 error
= nfs_request_send(req
, 1);
4274 lck_mtx_lock(&req
->r_mtx
);
4275 if (!error
&& !(req
->r_flags
& R_SENT
) && req
->r_callback
.rcb_func
) {
4276 /* make sure to wait until this async I/O request gets sent */
4277 int slpflag
= (req
->r_nmp
&& NMFLAG(req
->r_nmp
, INTR
) && req
->r_thread
&& !(req
->r_flags
& R_NOINTR
)) ? PCATCH
: 0;
4278 struct timespec ts
= { 2, 0 };
4279 while (!(req
->r_flags
& R_SENT
)) {
4280 if ((req
->r_flags
& R_RESENDQ
) && ((nmp
= req
->r_nmp
))) {
4281 lck_mtx_lock(&nmp
->nm_lock
);
4282 if ((nmp
->nm_state
& NFSSTA_RECOVER
) && (req
->r_rchain
.tqe_next
!= NFSREQNOLIST
)) {
4284 * It's not going to get off the resend queue if we're in recovery.
4285 * So, just take it off ourselves. We could be holding mount state
4286 * busy and thus holding up the start of recovery.
4288 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
4289 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
4290 if (req
->r_flags
& R_RESENDQ
)
4291 req
->r_flags
&= ~R_RESENDQ
;
4292 lck_mtx_unlock(&nmp
->nm_lock
);
4293 req
->r_flags
|= R_SENDING
;
4294 lck_mtx_unlock(&req
->r_mtx
);
4295 error
= nfs_send(req
, 1);
4296 lck_mtx_lock(&req
->r_mtx
);
4301 lck_mtx_unlock(&nmp
->nm_lock
);
4303 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
4305 msleep(req
, &req
->r_mtx
, slpflag
| (PZERO
- 1), "nfswaitsent", &ts
);
4309 sent
= req
->r_flags
& R_SENT
;
4310 lck_mtx_unlock(&req
->r_mtx
);
4311 if (error
&& req
->r_callback
.rcb_func
&& !sent
)
4312 nfs_request_rele(req
);
4314 FSDBG(274, R_XID32(req
->r_xid
), np
, procnum
, error
);
4315 if (error
|| req
->r_callback
.rcb_func
)
4316 nfs_request_rele(req
);
4321 * Wait for and finish an asynchronous NFS request.
4324 nfs_request_async_finish(
4326 struct nfsm_chain
*nmrepp
,
4330 int error
= 0, asyncio
= req
->r_callback
.rcb_func
? 1 : 0;
4331 struct nfsmount
*nmp
;
4333 lck_mtx_lock(&req
->r_mtx
);
4335 req
->r_flags
|= R_ASYNCWAIT
;
4336 while (req
->r_flags
& R_RESENDQ
) { /* wait until the request is off the resend queue */
4337 struct timespec ts
= { 2, 0 };
4338 if ((nmp
= req
->r_nmp
)) {
4339 lck_mtx_lock(&nmp
->nm_lock
);
4340 if ((nmp
->nm_state
& NFSSTA_RECOVER
) && (req
->r_rchain
.tqe_next
!= NFSREQNOLIST
)) {
4342 * It's not going to get off the resend queue if we're in recovery.
4343 * So, just take it off ourselves. We could be holding mount state
4344 * busy and thus holding up the start of recovery.
4346 TAILQ_REMOVE(&nmp
->nm_resendq
, req
, r_rchain
);
4347 req
->r_rchain
.tqe_next
= NFSREQNOLIST
;
4348 if (req
->r_flags
& R_RESENDQ
)
4349 req
->r_flags
&= ~R_RESENDQ
;
4350 lck_mtx_unlock(&nmp
->nm_lock
);
4353 lck_mtx_unlock(&nmp
->nm_lock
);
4355 if ((error
= nfs_sigintr(req
->r_nmp
, req
, req
->r_thread
, 0)))
4357 msleep(req
, &req
->r_mtx
, PZERO
-1, "nfsresendqwait", &ts
);
4359 lck_mtx_unlock(&req
->r_mtx
);
4362 nfs_request_wait(req
);
4363 error
= nfs_request_finish(req
, nmrepp
, status
);
4366 while (!error
&& (req
->r_flags
& R_RESTART
)) {
4367 if (asyncio
&& req
->r_resendtime
) { /* send later */
4368 lck_mtx_lock(&req
->r_mtx
);
4369 nfs_asyncio_resend(req
);
4370 lck_mtx_unlock(&req
->r_mtx
);
4371 return (EINPROGRESS
);
4374 req
->r_flags
&= ~R_RESTART
;
4375 if ((error
= nfs_request_add_header(req
)))
4377 if ((error
= nfs_request_send(req
, !asyncio
)))
4380 return (EINPROGRESS
);
4381 nfs_request_wait(req
);
4382 if ((error
= nfs_request_finish(req
, nmrepp
, status
)))
4388 FSDBG(275, R_XID32(req
->r_xid
), req
->r_np
, req
->r_procnum
, error
);
4389 nfs_request_rele(req
);
4394 * Cancel a pending asynchronous NFS request.
4397 nfs_request_async_cancel(struct nfsreq
*req
)
4399 nfs_reqdequeue(req
);
4400 FSDBG(275, R_XID32(req
->r_xid
), req
->r_np
, req
->r_procnum
, 0xD1ED1E);
4401 nfs_request_rele(req
);
4405 * Flag a request as being terminated.
4408 nfs_softterm(struct nfsreq
*req
)
4410 struct nfsmount
*nmp
= req
->r_nmp
;
4411 req
->r_flags
|= R_SOFTTERM
;
4412 req
->r_error
= ETIMEDOUT
;
4413 if (!(req
->r_flags
& R_CWND
) || !nmp
)
4415 /* update congestion window */
4416 req
->r_flags
&= ~R_CWND
;
4417 lck_mtx_lock(&nmp
->nm_lock
);
4418 FSDBG(532, R_XID32(req
->r_xid
), req
, nmp
->nm_sent
, nmp
->nm_cwnd
);
4419 nmp
->nm_sent
-= NFS_CWNDSCALE
;
4420 if ((nmp
->nm_sent
< nmp
->nm_cwnd
) && !TAILQ_EMPTY(&nmp
->nm_cwndq
)) {
4421 /* congestion window is open, poke the cwnd queue */
4422 struct nfsreq
*req2
= TAILQ_FIRST(&nmp
->nm_cwndq
);
4423 TAILQ_REMOVE(&nmp
->nm_cwndq
, req2
, r_cchain
);
4424 req2
->r_cchain
.tqe_next
= NFSREQNOLIST
;
4427 lck_mtx_unlock(&nmp
->nm_lock
);
4431 * Ensure req isn't in use by the timer, then dequeue it.
4434 nfs_reqdequeue(struct nfsreq
*req
)
4436 lck_mtx_lock(nfs_request_mutex
);
4437 while (req
->r_lflags
& RL_BUSY
) {
4438 req
->r_lflags
|= RL_WAITING
;
4439 msleep(&req
->r_lflags
, nfs_request_mutex
, PSOCK
, "reqdeq", NULL
);
4441 if (req
->r_lflags
& RL_QUEUED
) {
4442 TAILQ_REMOVE(&nfs_reqq
, req
, r_chain
);
4443 req
->r_lflags
&= ~RL_QUEUED
;
4445 lck_mtx_unlock(nfs_request_mutex
);
4449 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
4450 * free()'d out from under it.
4453 nfs_reqbusy(struct nfsreq
*req
)
4455 if (req
->r_lflags
& RL_BUSY
)
4456 panic("req locked");
4457 req
->r_lflags
|= RL_BUSY
;
4461 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
4464 nfs_reqnext(struct nfsreq
*req
)
4466 struct nfsreq
* nextreq
;
4471 * We need to get and busy the next req before signalling the
4472 * current one, otherwise wakeup() may block us and we'll race to
4473 * grab the next req.
4475 nextreq
= TAILQ_NEXT(req
, r_chain
);
4476 if (nextreq
!= NULL
)
4477 nfs_reqbusy(nextreq
);
4478 /* unbusy and signal. */
4479 req
->r_lflags
&= ~RL_BUSY
;
4480 if (req
->r_lflags
& RL_WAITING
) {
4481 req
->r_lflags
&= ~RL_WAITING
;
4482 wakeup(&req
->r_lflags
);
4488 * NFS request queue timer routine
4490 * Scan the NFS request queue for any requests that have timed out.
4492 * Alert the system of unresponsive servers.
4493 * Mark expired requests on soft mounts as terminated.
4494 * For UDP, mark/signal requests for retransmission.
4497 nfs_request_timer(__unused
void *param0
, __unused
void *param1
)
4500 struct nfsmount
*nmp
;
4501 int timeo
, maxtime
, finish_asyncio
, error
;
4503 TAILQ_HEAD(nfs_mount_pokeq
, nfsmount
) nfs_mount_poke_queue
;
4505 lck_mtx_lock(nfs_request_mutex
);
4506 req
= TAILQ_FIRST(&nfs_reqq
);
4507 if (req
== NULL
) { /* no requests - turn timer off */
4508 nfs_request_timer_on
= 0;
4509 lck_mtx_unlock(nfs_request_mutex
);
4514 TAILQ_INIT(&nfs_mount_poke_queue
);
4517 for ( ; req
!= NULL
; req
= nfs_reqnext(req
)) {
4519 if (!nmp
) /* unmounted */
4521 if (req
->r_error
|| req
->r_nmrep
.nmc_mhead
)
4523 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 0))) {
4524 if (req
->r_callback
.rcb_func
!= NULL
) {
4525 /* async I/O RPC needs to be finished */
4526 lck_mtx_lock(&req
->r_mtx
);
4527 req
->r_error
= error
;
4528 finish_asyncio
= !(req
->r_flags
& R_WAITSENT
);
4530 lck_mtx_unlock(&req
->r_mtx
);
4532 nfs_asyncio_finish(req
);
4537 lck_mtx_lock(&req
->r_mtx
);
4539 if (nmp
->nm_tprintf_initial_delay
&&
4540 ((req
->r_rexmit
> 2) || (req
->r_flags
& R_RESENDERR
)) &&
4541 ((req
->r_lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
)) {
4542 req
->r_lastmsg
= now
.tv_sec
;
4543 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_TIMEO
,
4545 req
->r_flags
|= R_TPRINTFMSG
;
4546 lck_mtx_lock(&nmp
->nm_lock
);
4547 if (!(nmp
->nm_state
& NFSSTA_MOUNTED
)) {
4548 lck_mtx_unlock(&nmp
->nm_lock
);
4549 /* we're not yet completely mounted and */
4550 /* we can't complete an RPC, so we fail */
4551 OSAddAtomic64(1, &nfsstats
.rpctimeouts
);
4553 finish_asyncio
= ((req
->r_callback
.rcb_func
!= NULL
) && !(req
->r_flags
& R_WAITSENT
));
4555 lck_mtx_unlock(&req
->r_mtx
);
4557 nfs_asyncio_finish(req
);
4560 lck_mtx_unlock(&nmp
->nm_lock
);
4564 * Put a reasonable limit on the maximum timeout,
4565 * and reduce that limit when soft mounts get timeouts or are in reconnect.
4567 if (!NMFLAG(nmp
, SOFT
) && !nfs_can_squish(nmp
))
4568 maxtime
= NFS_MAXTIMEO
;
4569 else if ((req
->r_flags
& (R_SETUP
|R_RECOVER
)) ||
4570 ((nmp
->nm_reconnect_start
<= 0) || ((now
.tv_sec
- nmp
->nm_reconnect_start
) < 8)))
4571 maxtime
= (NFS_MAXTIMEO
/ (nmp
->nm_timeouts
+1))/2;
4573 maxtime
= NFS_MINTIMEO
/4;
4576 * Check for request timeout.
4578 if (req
->r_rtt
>= 0) {
4580 lck_mtx_lock(&nmp
->nm_lock
);
4581 if (req
->r_flags
& R_RESENDERR
) {
4582 /* with resend errors, retry every few seconds */
4585 if (req
->r_procnum
== NFSPROC_NULL
&& req
->r_gss_ctx
!= NULL
)
4586 timeo
= NFS_MINIDEMTIMEO
; // gss context setup
4587 else if (NMFLAG(nmp
, DUMBTIMER
))
4588 timeo
= nmp
->nm_timeo
;
4590 timeo
= NFS_RTO(nmp
, proct
[req
->r_procnum
]);
4592 /* ensure 62.5 ms floor */
4593 while (16 * timeo
< hz
)
4595 if (nmp
->nm_timeouts
> 0)
4596 timeo
*= nfs_backoff
[nmp
->nm_timeouts
- 1];
4598 /* limit timeout to max */
4599 if (timeo
> maxtime
)
4601 if (req
->r_rtt
<= timeo
) {
4602 lck_mtx_unlock(&nmp
->nm_lock
);
4603 lck_mtx_unlock(&req
->r_mtx
);
4606 /* The request has timed out */
4607 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
4608 req
->r_procnum
, proct
[req
->r_procnum
],
4609 req
->r_xid
, req
->r_rtt
, timeo
, nmp
->nm_timeouts
,
4610 (now
.tv_sec
- req
->r_start
)*NFS_HZ
, maxtime
));
4611 if (nmp
->nm_timeouts
< 8)
4613 nfs_mount_check_dead_timeout(nmp
);
4614 /* if it's been a few seconds, try poking the socket */
4615 if ((nmp
->nm_sotype
== SOCK_STREAM
) &&
4616 ((now
.tv_sec
- req
->r_start
) >= 3) &&
4617 !(nmp
->nm_sockflags
& (NMSOCK_POKE
|NMSOCK_UNMOUNT
)) &&
4618 (nmp
->nm_sockflags
& NMSOCK_READY
)) {
4619 nmp
->nm_sockflags
|= NMSOCK_POKE
;
4620 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue
, nmp
, nm_pokeq
);
4622 lck_mtx_unlock(&nmp
->nm_lock
);
4625 /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */
4626 if ((NMFLAG(nmp
, SOFT
) || (req
->r_flags
& (R_SETUP
|R_RECOVER
))) &&
4627 ((req
->r_rexmit
>= req
->r_retry
) || /* too many */
4628 ((now
.tv_sec
- req
->r_start
)*NFS_HZ
> maxtime
))) { /* too long */
4629 OSAddAtomic64(1, &nfsstats
.rpctimeouts
);
4630 lck_mtx_lock(&nmp
->nm_lock
);
4631 if (!(nmp
->nm_state
& NFSSTA_TIMEO
)) {
4632 lck_mtx_unlock(&nmp
->nm_lock
);
4633 /* make sure we note the unresponsive server */
4634 /* (maxtime may be less than tprintf delay) */
4635 nfs_down(req
->r_nmp
, req
->r_thread
, 0, NFSSTA_TIMEO
,
4637 req
->r_lastmsg
= now
.tv_sec
;
4638 req
->r_flags
|= R_TPRINTFMSG
;
4640 lck_mtx_unlock(&nmp
->nm_lock
);
4642 if (req
->r_flags
& R_NOINTR
) {
4643 /* don't terminate nointr requests on timeout */
4644 lck_mtx_unlock(&req
->r_mtx
);
4647 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
4648 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
,
4649 now
.tv_sec
- req
->r_start
));
4651 finish_asyncio
= ((req
->r_callback
.rcb_func
!= NULL
) && !(req
->r_flags
& R_WAITSENT
));
4653 lck_mtx_unlock(&req
->r_mtx
);
4655 nfs_asyncio_finish(req
);
4659 /* for TCP, only resend if explicitly requested */
4660 if ((nmp
->nm_sotype
== SOCK_STREAM
) && !(req
->r_flags
& R_MUSTRESEND
)) {
4661 if (++req
->r_rexmit
> NFS_MAXREXMIT
)
4662 req
->r_rexmit
= NFS_MAXREXMIT
;
4664 lck_mtx_unlock(&req
->r_mtx
);
4669 * The request needs to be (re)sent. Kick the requester to resend it.
4670 * (unless it's already marked as needing a resend)
4672 if ((req
->r_flags
& R_MUSTRESEND
) && (req
->r_rtt
== -1)) {
4673 lck_mtx_unlock(&req
->r_mtx
);
4676 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
4677 req
->r_procnum
, req
->r_xid
, req
->r_flags
, req
->r_rtt
));
4678 req
->r_flags
|= R_MUSTRESEND
;
4681 if ((req
->r_flags
& (R_ASYNC
|R_ASYNCWAIT
|R_SENDING
)) == R_ASYNC
)
4682 nfs_asyncio_resend(req
);
4683 lck_mtx_unlock(&req
->r_mtx
);
4686 lck_mtx_unlock(nfs_request_mutex
);
4688 /* poke any sockets */
4689 while ((nmp
= TAILQ_FIRST(&nfs_mount_poke_queue
))) {
4690 TAILQ_REMOVE(&nfs_mount_poke_queue
, nmp
, nm_pokeq
);
4692 lck_mtx_lock(&nmp
->nm_lock
);
4693 nmp
->nm_sockflags
&= ~NMSOCK_POKE
;
4694 wakeup(&nmp
->nm_sockflags
);
4695 lck_mtx_unlock(&nmp
->nm_lock
);
4698 nfs_interval_timer_start(nfs_request_timer_call
, NFS_REQUESTDELAY
);
4702 * check a thread's proc for the "noremotehang" flag.
4705 nfs_noremotehang(thread_t thd
)
4707 proc_t p
= thd
? get_bsdthreadtask_info(thd
) : NULL
;
4708 return (p
&& proc_noremotehang(p
));
4712 * Test for a termination condition pending on the process.
4713 * This is used to determine if we need to bail on a mount.
4714 * ETIMEDOUT is returned if there has been a soft timeout.
4715 * EINTR is returned if there is a signal pending that is not being ignored
4716 * and the mount is interruptable, or if we are a thread that is in the process
4717 * of cancellation (also SIGKILL posted).
4719 extern int sigprop
[NSIG
+1];
4721 nfs_sigintr(struct nfsmount
*nmp
, struct nfsreq
*req
, thread_t thd
, int nmplocked
)
4729 if (req
&& (req
->r_flags
& R_SOFTTERM
))
4730 return (ETIMEDOUT
); /* request has been terminated. */
4731 if (req
&& (req
->r_flags
& R_NOINTR
))
4732 thd
= NULL
; /* don't check for signal on R_NOINTR */
4735 lck_mtx_lock(&nmp
->nm_lock
);
4736 if (nmp
->nm_state
& NFSSTA_FORCE
) {
4737 /* If a force unmount is in progress then fail. */
4739 } else if (nmp
->nm_mountp
->mnt_kern_flag
& MNTK_FRCUNMOUNT
) {
4740 /* Someone is unmounting us, go soft and mark it. */
4741 NFS_BITMAP_SET(nmp
->nm_flags
, NFS_MFLAG_SOFT
);
4742 nmp
->nm_state
|= NFSSTA_FORCE
;
4745 /* Check if the mount is marked dead. */
4746 if (!error
&& (nmp
->nm_state
& NFSSTA_DEAD
))
4750 * If the mount is hung and we've requested not to hang
4751 * on remote filesystems, then bail now.
4753 if (!error
&& (nmp
->nm_state
& NFSSTA_TIMEO
) && nfs_noremotehang(thd
))
4757 lck_mtx_unlock(&nmp
->nm_lock
);
4761 /* may not have a thread for async I/O */
4766 * Check if the process is aborted, but don't interrupt if we
4767 * were killed by a signal and this is the exiting thread which
4768 * is attempting to dump core.
4770 if (((p
= current_proc()) != kernproc
) && current_thread_aborted() &&
4771 (!(p
->p_acflag
& AXSIG
) || (p
->exit_thread
!= current_thread()) ||
4772 (p
->p_sigacts
== NULL
) ||
4773 (p
->p_sigacts
->ps_sig
< 1) || (p
->p_sigacts
->ps_sig
> NSIG
) ||
4774 !(sigprop
[p
->p_sigacts
->ps_sig
] & SA_CORE
)))
4777 /* mask off thread and process blocked signals. */
4778 if (NMFLAG(nmp
, INTR
) && ((p
= get_bsdthreadtask_info(thd
))) &&
4779 proc_pendingsignals(p
, NFSINT_SIGMASK
))
4785 * Lock a socket against others.
4786 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
4787 * and also to avoid race conditions between the processes with nfs requests
4788 * in progress when a reconnect is necessary.
4791 nfs_sndlock(struct nfsreq
*req
)
4793 struct nfsmount
*nmp
= req
->r_nmp
;
4795 int error
= 0, slpflag
= 0;
4796 struct timespec ts
= { 0, 0 };
4801 lck_mtx_lock(&nmp
->nm_lock
);
4802 statep
= &nmp
->nm_state
;
4804 if (NMFLAG(nmp
, INTR
) && req
->r_thread
&& !(req
->r_flags
& R_NOINTR
))
4806 while (*statep
& NFSSTA_SNDLOCK
) {
4807 if ((error
= nfs_sigintr(nmp
, req
, req
->r_thread
, 1)))
4809 *statep
|= NFSSTA_WANTSND
;
4810 if (nfs_noremotehang(req
->r_thread
))
4812 msleep(statep
, &nmp
->nm_lock
, slpflag
| (PZERO
- 1), "nfsndlck", &ts
);
4813 if (slpflag
== PCATCH
) {
4819 *statep
|= NFSSTA_SNDLOCK
;
4820 lck_mtx_unlock(&nmp
->nm_lock
);
4825 * Unlock the stream socket for others.
4828 nfs_sndunlock(struct nfsreq
*req
)
4830 struct nfsmount
*nmp
= req
->r_nmp
;
4831 int *statep
, wake
= 0;
4835 lck_mtx_lock(&nmp
->nm_lock
);
4836 statep
= &nmp
->nm_state
;
4837 if ((*statep
& NFSSTA_SNDLOCK
) == 0)
4838 panic("nfs sndunlock");
4839 *statep
&= ~(NFSSTA_SNDLOCK
|NFSSTA_SENDING
);
4840 if (*statep
& NFSSTA_WANTSND
) {
4841 *statep
&= ~NFSSTA_WANTSND
;
4844 lck_mtx_unlock(&nmp
->nm_lock
);
4851 struct nfsmount
*nmp
,
4853 struct sockaddr
*saddr
,
4860 struct nfsm_chain
*nmrep
)
4862 int error
= 0, on
= 1, try, sendat
= 2, soproto
, recv
, optlen
, restoreto
= 0;
4863 socket_t newso
= NULL
;
4864 struct sockaddr_storage ss
;
4865 struct timeval orig_rcvto
, orig_sndto
, tv
= { 1, 0 };
4866 mbuf_t m
, mrep
= NULL
;
4868 uint32_t rxid
= 0, reply
= 0, reply_status
, rejected_status
;
4869 uint32_t verf_type
, verf_len
, accepted_status
;
4870 size_t readlen
, sentlen
;
4871 struct nfs_rpc_record_state nrrs
;
4874 /* create socket and set options */
4875 soproto
= (sotype
== SOCK_DGRAM
) ? IPPROTO_UDP
: IPPROTO_TCP
;
4876 if ((error
= sock_socket(saddr
->sa_family
, sotype
, soproto
, NULL
, NULL
, &newso
)))
4880 int level
= (saddr
->sa_family
== AF_INET
) ? IPPROTO_IP
: IPPROTO_IPV6
;
4881 int optname
= (saddr
->sa_family
== AF_INET
) ? IP_PORTRANGE
: IPV6_PORTRANGE
;
4882 int portrange
= IP_PORTRANGE_LOW
;
4883 error
= sock_setsockopt(newso
, level
, optname
, &portrange
, sizeof(portrange
));
4885 ss
.ss_len
= saddr
->sa_len
;
4886 ss
.ss_family
= saddr
->sa_family
;
4887 if (ss
.ss_family
== AF_INET
) {
4888 ((struct sockaddr_in
*)&ss
)->sin_addr
.s_addr
= INADDR_ANY
;
4889 ((struct sockaddr_in
*)&ss
)->sin_port
= htons(0);
4890 } else if (ss
.ss_family
== AF_INET6
) {
4891 ((struct sockaddr_in6
*)&ss
)->sin6_addr
= in6addr_any
;
4892 ((struct sockaddr_in6
*)&ss
)->sin6_port
= htons(0);
4897 error
= sock_bind(newso
, (struct sockaddr
*)&ss
);
4901 if (sotype
== SOCK_STREAM
) {
4902 on
= 4; /* don't wait too long for the socket to connect */
4903 sock_setsockopt(newso
, IPPROTO_TCP
, TCP_CONNECTIONTIMEOUT
, &on
, sizeof(on
));
4904 error
= sock_connect(newso
, saddr
, 0);
4907 if (((error
= sock_setsockopt(newso
, SOL_SOCKET
, SO_RCVTIMEO
, &tv
, sizeof(tv
)))) ||
4908 ((error
= sock_setsockopt(newso
, SOL_SOCKET
, SO_SNDTIMEO
, &tv
, sizeof(tv
)))) ||
4909 ((error
= sock_setsockopt(newso
, SOL_SOCKET
, SO_NOADDRERR
, &on
, sizeof(on
)))))
4913 /* make sure socket is using a one second timeout in this function */
4914 optlen
= sizeof(orig_rcvto
);
4915 error
= sock_getsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &orig_rcvto
, &optlen
);
4917 optlen
= sizeof(orig_sndto
);
4918 error
= sock_getsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &orig_sndto
, &optlen
);
4921 sock_setsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &tv
, sizeof(tv
));
4922 sock_setsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &tv
, sizeof(tv
));
4927 if (sotype
== SOCK_STREAM
) {
4928 sendat
= 0; /* we only resend the request for UDP */
4929 nfs_rpc_record_state_init(&nrrs
);
4932 for (try=0; try < timeo
; try++) {
4933 if ((error
= nfs_sigintr(nmp
, NULL
, !try ? NULL
: thd
, 0)))
4935 if (!try || (try == sendat
)) {
4936 /* send the request (resending periodically for UDP) */
4937 if ((error
= mbuf_copym(mreq
, 0, MBUF_COPYALL
, MBUF_WAITOK
, &m
)))
4939 bzero(&msg
, sizeof(msg
));
4940 if ((sotype
== SOCK_DGRAM
) && !sock_isconnected(so
)) {
4941 msg
.msg_name
= saddr
;
4942 msg
.msg_namelen
= saddr
->sa_len
;
4944 if ((error
= sock_sendmbuf(so
, &msg
, m
, 0, &sentlen
)))
4950 /* wait for the response */
4951 if (sotype
== SOCK_STREAM
) {
4952 /* try to read (more of) record */
4953 error
= nfs_rpc_record_read(so
, &nrrs
, 0, &recv
, &mrep
);
4954 /* if we don't have the whole record yet, we'll keep trying */
4957 bzero(&msg
, sizeof(msg
));
4958 error
= sock_receivembuf(so
, &msg
, &mrep
, 0, &readlen
);
4960 if (error
== EWOULDBLOCK
)
4963 /* parse the response */
4964 nfsm_chain_dissect_init(error
, nmrep
, mrep
);
4965 nfsm_chain_get_32(error
, nmrep
, rxid
);
4966 nfsm_chain_get_32(error
, nmrep
, reply
);
4968 if ((rxid
!= xid
) || (reply
!= RPC_REPLY
))
4970 nfsm_chain_get_32(error
, nmrep
, reply_status
);
4972 if (reply_status
== RPC_MSGDENIED
) {
4973 nfsm_chain_get_32(error
, nmrep
, rejected_status
);
4975 error
= (rejected_status
== RPC_MISMATCH
) ? ERPCMISMATCH
: EACCES
;
4978 nfsm_chain_get_32(error
, nmrep
, verf_type
); /* verifier flavor */
4979 nfsm_chain_get_32(error
, nmrep
, verf_len
); /* verifier length */
4982 nfsm_chain_adv(error
, nmrep
, nfsm_rndup(verf_len
));
4983 nfsm_chain_get_32(error
, nmrep
, accepted_status
);
4985 switch (accepted_status
) {
4989 case RPC_PROGUNAVAIL
:
4990 error
= EPROGUNAVAIL
;
4992 case RPC_PROGMISMATCH
:
4993 error
= EPROGMISMATCH
;
4995 case RPC_PROCUNAVAIL
:
4996 error
= EPROCUNAVAIL
;
5001 case RPC_SYSTEM_ERR
:
5010 sock_setsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &orig_rcvto
, sizeof(tv
));
5011 sock_setsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &orig_sndto
, sizeof(tv
));
5014 sock_shutdown(newso
, SHUT_RDWR
);
5023 struct nfsmount
*nmp
,
5025 struct sockaddr
*sa
,
5032 thread_t thd
= vfs_context_thread(ctx
);
5033 kauth_cred_t cred
= vfs_context_ucred(ctx
);
5034 struct sockaddr_storage ss
;
5035 struct sockaddr
*saddr
= (struct sockaddr
*)&ss
;
5036 struct nfsm_chain nmreq
, nmrep
;
5038 int error
= 0, ip
, pmprog
, pmvers
, pmproc
, ualen
= 0;
5041 char uaddr
[MAX_IPv6_STR_LEN
+16];
5043 bcopy(sa
, saddr
, min(sizeof(ss
), sa
->sa_len
));
5044 if (saddr
->sa_family
== AF_INET
) {
5048 pmproc
= PMAPPROC_GETPORT
;
5049 } else if (saddr
->sa_family
== AF_INET6
) {
5053 pmproc
= RPCBPROC_GETVERSADDR
;
5057 nfsm_chain_null(&nmreq
);
5058 nfsm_chain_null(&nmrep
);
5061 /* send portmapper request to get port/uaddr */
5063 ((struct sockaddr_in
*)saddr
)->sin_port
= htons(PMAPPORT
);
5065 ((struct sockaddr_in6
*)saddr
)->sin6_port
= htons(PMAPPORT
);
5066 nfsm_chain_build_alloc_init(error
, &nmreq
, 8*NFSX_UNSIGNED
);
5067 nfsm_chain_add_32(error
, &nmreq
, protocol
);
5068 nfsm_chain_add_32(error
, &nmreq
, vers
);
5070 nfsm_chain_add_32(error
, &nmreq
, ipproto
);
5071 nfsm_chain_add_32(error
, &nmreq
, 0);
5073 if (ipproto
== IPPROTO_TCP
)
5074 nfsm_chain_add_string(error
, &nmreq
, "tcp6", 4);
5076 nfsm_chain_add_string(error
, &nmreq
, "udp6", 4);
5077 nfsm_chain_add_string(error
, &nmreq
, "", 0); /* uaddr */
5078 nfsm_chain_add_string(error
, &nmreq
, "", 0); /* owner */
5080 nfsm_chain_build_done(error
, &nmreq
);
5082 error
= nfsm_rpchead2(nmp
, (ipproto
== IPPROTO_UDP
) ? SOCK_DGRAM
: SOCK_STREAM
,
5083 pmprog
, pmvers
, pmproc
, RPCAUTH_SYS
, cred
, NULL
, nmreq
.nmc_mhead
,
5086 nmreq
.nmc_mhead
= NULL
;
5087 error
= nfs_aux_request(nmp
, thd
, saddr
, so
, (ipproto
== IPPROTO_UDP
) ? SOCK_DGRAM
: SOCK_STREAM
,
5088 mreq
, R_XID32(xid
), 0, timeo
, &nmrep
);
5090 /* grab port from portmap response */
5092 nfsm_chain_get_32(error
, &nmrep
, port
);
5094 ((struct sockaddr_in
*)sa
)->sin_port
= htons(port
);
5096 /* get uaddr string and convert to sockaddr */
5097 nfsm_chain_get_32(error
, &nmrep
, ualen
);
5099 if (ualen
> ((int)sizeof(uaddr
)-1))
5102 /* program is not available, just return a zero port */
5103 bcopy(sa
, saddr
, min(sizeof(ss
), sa
->sa_len
));
5104 ((struct sockaddr_in6
*)saddr
)->sin6_port
= htons(0);
5106 nfsm_chain_get_opaque(error
, &nmrep
, ualen
, uaddr
);
5108 uaddr
[ualen
] = '\0';
5109 if (!nfs_uaddr2sockaddr(uaddr
, saddr
))
5114 if ((error
== EPROGMISMATCH
) || (error
== EPROCUNAVAIL
) || (error
== EIO
) || (error
== EBADRPC
)) {
5115 /* remote doesn't support rpcbind version or proc (or we couldn't parse uaddr) */
5116 if (pmvers
== RPCBVERS4
) {
5117 /* fall back to v3 and GETADDR */
5119 pmproc
= RPCBPROC_GETADDR
;
5120 nfsm_chain_cleanup(&nmreq
);
5121 nfsm_chain_cleanup(&nmrep
);
5122 bcopy(sa
, saddr
, min(sizeof(ss
), sa
->sa_len
));
5129 bcopy(saddr
, sa
, min(saddr
->sa_len
, sa
->sa_len
));
5132 nfsm_chain_cleanup(&nmreq
);
5133 nfsm_chain_cleanup(&nmrep
);
5138 nfs_msg(thread_t thd
,
5143 proc_t p
= thd
? get_bsdthreadtask_info(thd
) : NULL
;
5147 tpr
= tprintf_open(p
);
5151 tprintf(tpr
, "nfs server %s: %s, error %d\n", server
, msg
, error
);
5153 tprintf(tpr
, "nfs server %s: %s\n", server
, msg
);
5158 #define NFS_SQUISH_MOBILE_ONLY 0x0001 /* Squish mounts only on mobile machines */
5159 #define NFS_SQUISH_AUTOMOUNTED_ONLY 0x0002 /* Squish mounts only if the are automounted */
5160 #define NFS_SQUISH_SOFT 0x0004 /* Treat all soft mounts as though they were on a mobile machine */
5161 #define NFS_SQUISH_QUICK 0x0008 /* Try to squish mounts more quickly. */
5162 #define NFS_SQUISH_SHUTDOWN 0x1000 /* Squish all mounts on shutdown. Currently not implemented */
5164 uint32_t nfs_squishy_flags
= NFS_SQUISH_MOBILE_ONLY
| NFS_SQUISH_AUTOMOUNTED_ONLY
| NFS_SQUISH_QUICK
;
5165 int32_t nfs_is_mobile
;
5167 #define NFS_SQUISHY_DEADTIMEOUT 8 /* Dead time out for squishy mounts */
5168 #define NFS_SQUISHY_QUICKTIMEOUT 4 /* Quicker dead time out when nfs_squish_flags NFS_SQUISH_QUICK bit is set*/
5171 * Could this mount be squished?
5174 nfs_can_squish(struct nfsmount
*nmp
)
5176 uint64_t flags
= vfs_flags(nmp
->nm_mountp
);
5177 int softsquish
= ((nfs_squishy_flags
& NFS_SQUISH_SOFT
) & NMFLAG(nmp
, SOFT
));
5179 if (!softsquish
&& (nfs_squishy_flags
& NFS_SQUISH_MOBILE_ONLY
) && nfs_is_mobile
== 0)
5182 if ((nfs_squishy_flags
& NFS_SQUISH_AUTOMOUNTED_ONLY
) && (flags
& MNT_AUTOMOUNTED
) == 0)
5189 * NFS mounts default to "rw,hard" - but frequently on mobile clients
5190 * the mount may become "not responding". It's desirable to be able
5191 * to unmount these dead mounts, but only if there is no risk of
5192 * losing data or crashing applications. A "squishy" NFS mount is one
5193 * that can be force unmounted with little risk of harm.
5195 * nfs_is_squishy checks if a mount is in a squishy state. A mount is
5196 * in a squishy state iff it is allowed to be squishy and there are no
5197 * dirty pages and there are no mmapped files and there are no files
5198 * open for write. Mounts are allowed to be squishy is controlled by
5199 * the settings of the nfs_squishy_flags and its mobility state. These
5200 * flags can be set by sysctls.
5202 * If nfs_is_squishy determines that we are in a squishy state we will
5203 * update the current dead timeout to at least NFS_SQUISHY_DEADTIMEOUT
5204 * (or NFS_SQUISHY_QUICKTIMEOUT if NFS_SQUISH_QUICK is set) (see
5205 * above) or 1/8th of the mount's nm_deadtimeout value, otherwise we just
5206 * update the current dead timeout with the mount's nm_deadtimeout
5207 * value set at mount time.
5209 * Assumes that nm_lock is held.
5211 * Note this routine is racey, but its effects on setting the
5212 * dead timeout only have effects when we're in trouble and are likely
5213 * to stay that way. Since by default its only for automounted
5214 * volumes on mobile machines; this is a reasonable trade off between
5215 * data integrity and user experience. It can be disabled or set via
5220 nfs_is_squishy(struct nfsmount
*nmp
)
5222 mount_t mp
= nmp
->nm_mountp
;
5224 int timeo
= (nfs_squishy_flags
& NFS_SQUISH_QUICK
) ? NFS_SQUISHY_QUICKTIMEOUT
: NFS_SQUISHY_DEADTIMEOUT
;
5226 NFS_SOCK_DBG(("nfs_is_squishy: %s: nm_curdeadtiemout = %d, nfs_is_mobile = %d\n",
5227 vfs_statfs(mp
)->f_mntfromname
, nmp
->nm_curdeadtimeout
, nfs_is_mobile
));
5229 if (!nfs_can_squish(nmp
))
5232 timeo
= (nmp
->nm_deadtimeout
> timeo
) ? max(nmp
->nm_deadtimeout
/8, timeo
) : timeo
;
5233 NFS_SOCK_DBG(("nfs_is_squishy: nm_writers = %d nm_mappers = %d timeo = %d\n", nmp
->nm_writers
, nmp
->nm_mappers
, timeo
));
5235 if (nmp
->nm_writers
== 0 && nmp
->nm_mappers
== 0) {
5236 uint64_t flags
= mp
? vfs_flags(mp
) : 0;
5240 * Walk the nfs nodes and check for dirty buffers it we're not
5241 * RDONLY and we've not already been declared as squishy since
5242 * this can be a bit expensive.
5244 if (!(flags
& MNT_RDONLY
) && !(nmp
->nm_state
& NFSSTA_SQUISHY
))
5245 squishy
= !nfs_mount_is_dirty(mp
);
5250 nmp
->nm_state
|= NFSSTA_SQUISHY
;
5252 nmp
->nm_state
&= ~NFSSTA_SQUISHY
;
5254 nmp
->nm_curdeadtimeout
= squishy
? timeo
: nmp
->nm_deadtimeout
;
5256 NFS_SOCK_DBG(("nfs_is_squishy: nm_curdeadtimeout = %d\n", nmp
->nm_curdeadtimeout
));
5262 * On a send operation, if we can't reach the server and we've got only one server to talk to
5263 * and NFS_SQUISH_QUICK flag is set and we are in a squishy state then mark the mount as dead
5264 * and ask to be forcibly unmounted. Return 1 if we're dead and 0 otherwise.
5267 nfs_is_dead_lock(int error
, struct nfsmount
*nmp
)
5269 if (nmp
->nm_state
& NFSSTA_DEAD
)
5272 if ((error
!= ENETUNREACH
&& error
!= EHOSTUNREACH
) ||
5273 !(nmp
->nm_locations
.nl_numlocs
== 1 && nmp
->nm_locations
.nl_locations
[0]->nl_servcount
== 1))
5275 if ((nfs_squishy_flags
& NFS_SQUISH_QUICK
) && nfs_is_squishy(nmp
)) {
5276 printf("nfs_is_dead: nfs server %s: unreachable. Squished dead\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
5277 nmp
->nm_state
|= NFSSTA_DEAD
;
5278 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, VQ_DEAD
, 0);
5285 nfs_is_dead(int error
, struct nfsmount
*nmp
)
5289 lck_mtx_lock(&nmp
->nm_lock
);
5290 is_dead
= nfs_is_dead_lock(error
, nmp
);
5291 lck_mtx_unlock(&nmp
->nm_lock
);
5297 nfs_down(struct nfsmount
*nmp
, thread_t thd
, int error
, int flags
, const char *msg
)
5299 int timeoutmask
, wasunresponsive
, unresponsive
, softnobrowse
;
5300 uint32_t do_vfs_signal
;
5306 lck_mtx_lock(&nmp
->nm_lock
);
5308 timeoutmask
= NFSSTA_TIMEO
| NFSSTA_LOCKTIMEO
| NFSSTA_JUKEBOXTIMEO
;
5309 if (NMFLAG(nmp
, MUTEJUKEBOX
)) /* jukebox timeouts don't count as unresponsive if muted */
5310 timeoutmask
&= ~NFSSTA_JUKEBOXTIMEO
;
5311 wasunresponsive
= (nmp
->nm_state
& timeoutmask
);
5313 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
5314 softnobrowse
= (NMFLAG(nmp
, SOFT
) && (vfs_flags(nmp
->nm_mountp
) & MNT_DONTBROWSE
));
5316 if ((flags
& NFSSTA_TIMEO
) && !(nmp
->nm_state
& NFSSTA_TIMEO
))
5317 nmp
->nm_state
|= NFSSTA_TIMEO
;
5318 if ((flags
& NFSSTA_LOCKTIMEO
) && !(nmp
->nm_state
& NFSSTA_LOCKTIMEO
))
5319 nmp
->nm_state
|= NFSSTA_LOCKTIMEO
;
5320 if ((flags
& NFSSTA_JUKEBOXTIMEO
) && !(nmp
->nm_state
& NFSSTA_JUKEBOXTIMEO
))
5321 nmp
->nm_state
|= NFSSTA_JUKEBOXTIMEO
;
5323 unresponsive
= (nmp
->nm_state
& timeoutmask
);
5325 nfs_is_squishy(nmp
);
5327 if (unresponsive
&& (nmp
->nm_curdeadtimeout
> 0)) {
5329 if (!wasunresponsive
) {
5330 nmp
->nm_deadto_start
= now
.tv_sec
;
5331 nfs_mount_sock_thread_wake(nmp
);
5332 } else if ((now
.tv_sec
- nmp
->nm_deadto_start
) > nmp
->nm_curdeadtimeout
) {
5333 if (!(nmp
->nm_state
& NFSSTA_DEAD
))
5334 printf("nfs server %s: %sdead\n", vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
,
5335 (nmp
->nm_curdeadtimeout
!= nmp
->nm_deadtimeout
) ? "squished " : "");
5336 nmp
->nm_state
|= NFSSTA_DEAD
;
5339 lck_mtx_unlock(&nmp
->nm_lock
);
5341 if (nmp
->nm_state
& NFSSTA_DEAD
)
5342 do_vfs_signal
= VQ_DEAD
;
5343 else if (softnobrowse
|| wasunresponsive
|| !unresponsive
)
5346 do_vfs_signal
= VQ_NOTRESP
;
5348 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, do_vfs_signal
, 0);
5350 nfs_msg(thd
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, msg
, error
);
5354 nfs_up(struct nfsmount
*nmp
, thread_t thd
, int flags
, const char *msg
)
5356 int timeoutmask
, wasunresponsive
, unresponsive
, softnobrowse
;
5363 nfs_msg(thd
, vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
, msg
, 0);
5365 lck_mtx_lock(&nmp
->nm_lock
);
5367 timeoutmask
= NFSSTA_TIMEO
| NFSSTA_LOCKTIMEO
| NFSSTA_JUKEBOXTIMEO
;
5368 if (NMFLAG(nmp
, MUTEJUKEBOX
)) /* jukebox timeouts don't count as unresponsive if muted */
5369 timeoutmask
&= ~NFSSTA_JUKEBOXTIMEO
;
5370 wasunresponsive
= (nmp
->nm_state
& timeoutmask
);
5372 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
5373 softnobrowse
= (NMFLAG(nmp
, SOFT
) && (vfs_flags(nmp
->nm_mountp
) & MNT_DONTBROWSE
));
5375 if ((flags
& NFSSTA_TIMEO
) && (nmp
->nm_state
& NFSSTA_TIMEO
))
5376 nmp
->nm_state
&= ~NFSSTA_TIMEO
;
5377 if ((flags
& NFSSTA_LOCKTIMEO
) && (nmp
->nm_state
& NFSSTA_LOCKTIMEO
))
5378 nmp
->nm_state
&= ~NFSSTA_LOCKTIMEO
;
5379 if ((flags
& NFSSTA_JUKEBOXTIMEO
) && (nmp
->nm_state
& NFSSTA_JUKEBOXTIMEO
))
5380 nmp
->nm_state
&= ~NFSSTA_JUKEBOXTIMEO
;
5382 unresponsive
= (nmp
->nm_state
& timeoutmask
);
5384 nmp
->nm_deadto_start
= 0;
5385 nmp
->nm_curdeadtimeout
= nmp
->nm_deadtimeout
;
5386 nmp
->nm_state
&= ~NFSSTA_SQUISHY
;
5387 lck_mtx_unlock(&nmp
->nm_lock
);
5392 do_vfs_signal
= (wasunresponsive
&& !unresponsive
);
5394 vfs_event_signal(&vfs_statfs(nmp
->nm_mountp
)->f_fsid
, VQ_NOTRESP
, 1);
5398 #endif /* NFSCLIENT */
5403 * Generate the rpc reply header
5404 * siz arg. is used to decide if adding a cluster is worthwhile
5408 struct nfsrv_descript
*nd
,
5409 __unused
struct nfsrv_sock
*slp
,
5410 struct nfsm_chain
*nmrepp
,
5415 struct nfsm_chain nmrep
;
5418 err
= nd
->nd_repstat
;
5419 if (err
&& (nd
->nd_vers
== NFS_VER2
))
5423 * If this is a big reply, use a cluster else
5424 * try and leave leading space for the lower level headers.
5426 siz
+= RPC_REPLYSIZ
;
5427 if (siz
>= nfs_mbuf_minclsize
) {
5428 error
= mbuf_getpacket(MBUF_WAITOK
, &mrep
);
5430 error
= mbuf_gethdr(MBUF_WAITOK
, MBUF_TYPE_DATA
, &mrep
);
5433 /* unable to allocate packet */
5434 /* XXX should we keep statistics for these errors? */
5437 if (siz
< nfs_mbuf_minclsize
) {
5438 /* leave space for lower level headers */
5439 tl
= mbuf_data(mrep
);
5440 tl
+= 80/sizeof(*tl
); /* XXX max_hdr? XXX */
5441 mbuf_setdata(mrep
, tl
, 6 * NFSX_UNSIGNED
);
5443 nfsm_chain_init(&nmrep
, mrep
);
5444 nfsm_chain_add_32(error
, &nmrep
, nd
->nd_retxid
);
5445 nfsm_chain_add_32(error
, &nmrep
, RPC_REPLY
);
5446 if (err
== ERPCMISMATCH
|| (err
& NFSERR_AUTHERR
)) {
5447 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGDENIED
);
5448 if (err
& NFSERR_AUTHERR
) {
5449 nfsm_chain_add_32(error
, &nmrep
, RPC_AUTHERR
);
5450 nfsm_chain_add_32(error
, &nmrep
, (err
& ~NFSERR_AUTHERR
));
5452 nfsm_chain_add_32(error
, &nmrep
, RPC_MISMATCH
);
5453 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
5454 nfsm_chain_add_32(error
, &nmrep
, RPC_VER2
);
5458 nfsm_chain_add_32(error
, &nmrep
, RPC_MSGACCEPTED
);
5459 if (nd
->nd_gss_context
!= NULL
) {
5460 /* RPCSEC_GSS verifier */
5461 error
= nfs_gss_svc_verf_put(nd
, &nmrep
);
5463 nfsm_chain_add_32(error
, &nmrep
, RPC_SYSTEM_ERR
);
5467 /* RPCAUTH_NULL verifier */
5468 nfsm_chain_add_32(error
, &nmrep
, RPCAUTH_NULL
);
5469 nfsm_chain_add_32(error
, &nmrep
, 0);
5471 /* accepted status */
5474 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGUNAVAIL
);
5477 nfsm_chain_add_32(error
, &nmrep
, RPC_PROGMISMATCH
);
5478 /* XXX hard coded versions? */
5479 nfsm_chain_add_32(error
, &nmrep
, NFS_VER2
);
5480 nfsm_chain_add_32(error
, &nmrep
, NFS_VER3
);
5483 nfsm_chain_add_32(error
, &nmrep
, RPC_PROCUNAVAIL
);
5486 nfsm_chain_add_32(error
, &nmrep
, RPC_GARBAGE
);
5489 nfsm_chain_add_32(error
, &nmrep
, RPC_SUCCESS
);
5490 if (nd
->nd_gss_context
!= NULL
)
5491 error
= nfs_gss_svc_prepare_reply(nd
, &nmrep
);
5492 if (err
!= NFSERR_RETVOID
)
5493 nfsm_chain_add_32(error
, &nmrep
,
5494 (err
? nfsrv_errmap(nd
, err
) : 0));
5500 nfsm_chain_build_done(error
, &nmrep
);
5502 /* error composing reply header */
5503 /* XXX should we keep statistics for these errors? */
5509 if ((err
!= 0) && (err
!= NFSERR_RETVOID
))
5510 OSAddAtomic64(1, &nfsstats
.srvrpc_errs
);
5515 * The nfs server send routine.
5517 * - return EINTR or ERESTART if interrupted by a signal
5518 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
5519 * - do any cleanup required by recoverable socket errors (???)
5522 nfsrv_send(struct nfsrv_sock
*slp
, mbuf_t nam
, mbuf_t top
)
5525 socket_t so
= slp
->ns_so
;
5526 struct sockaddr
*sendnam
;
5529 bzero(&msg
, sizeof(msg
));
5530 if (nam
&& !sock_isconnected(so
) && (slp
->ns_sotype
!= SOCK_STREAM
)) {
5531 if ((sendnam
= mbuf_data(nam
))) {
5532 msg
.msg_name
= (caddr_t
)sendnam
;
5533 msg
.msg_namelen
= sendnam
->sa_len
;
5536 error
= sock_sendmbuf(so
, &msg
, top
, 0, NULL
);
5539 log(LOG_INFO
, "nfsd send error %d\n", error
);
5541 if ((error
== EWOULDBLOCK
) && (slp
->ns_sotype
== SOCK_STREAM
))
5542 error
= EPIPE
; /* zap TCP sockets if they time out on send */
5544 /* Handle any recoverable (soft) socket errors here. (???) */
5545 if (error
!= EINTR
&& error
!= ERESTART
&& error
!= EIO
&&
5546 error
!= EWOULDBLOCK
&& error
!= EPIPE
)
5553 * Socket upcall routine for the nfsd sockets.
5554 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
5555 * Essentially do as much as possible non-blocking, else punt and it will
5556 * be called with MBUF_WAITOK from an nfsd.
5559 nfsrv_rcv(socket_t so
, void *arg
, int waitflag
)
5561 struct nfsrv_sock
*slp
= arg
;
5563 if (!nfsd_thread_count
|| !(slp
->ns_flag
& SLP_VALID
))
5566 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
5567 nfsrv_rcv_locked(so
, slp
, waitflag
);
5568 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
5571 nfsrv_rcv_locked(socket_t so
, struct nfsrv_sock
*slp
, int waitflag
)
5573 mbuf_t m
, mp
, mhck
, m2
;
5574 int ns_flag
=0, error
;
5578 if ((slp
->ns_flag
& SLP_VALID
) == 0) {
5579 if (waitflag
== MBUF_DONTWAIT
)
5580 lck_rw_done(&slp
->ns_rwlock
);
5586 * Define this to test for nfsds handling this under heavy load.
5588 if (waitflag
== MBUF_DONTWAIT
) {
5589 ns_flag
= SLP_NEEDQ
;
5593 if (slp
->ns_sotype
== SOCK_STREAM
) {
5595 * If there are already records on the queue, defer soreceive()
5596 * to an(other) nfsd so that there is feedback to the TCP layer that
5597 * the nfs servers are heavily loaded.
5600 ns_flag
= SLP_NEEDQ
;
5607 bytes_read
= 1000000000;
5608 error
= sock_receivembuf(so
, NULL
, &mp
, MSG_DONTWAIT
, &bytes_read
);
5609 if (error
|| mp
== NULL
) {
5610 if (error
== EWOULDBLOCK
)
5611 ns_flag
= (waitflag
== MBUF_DONTWAIT
) ? SLP_NEEDQ
: 0;
5613 ns_flag
= SLP_DISCONN
;
5617 if (slp
->ns_rawend
) {
5618 if ((error
= mbuf_setnext(slp
->ns_rawend
, m
)))
5619 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error
);
5620 slp
->ns_cc
+= bytes_read
;
5623 slp
->ns_cc
= bytes_read
;
5625 while ((m2
= mbuf_next(m
)))
5630 * Now try and parse record(s) out of the raw stream data.
5632 error
= nfsrv_getstream(slp
, waitflag
);
5635 ns_flag
= SLP_DISCONN
;
5637 ns_flag
= SLP_NEEDQ
;
5640 struct sockaddr_storage nam
;
5642 if (slp
->ns_reccnt
>= nfsrv_sock_max_rec_queue_length
) {
5643 /* already have max # RPC records queued on this socket */
5644 ns_flag
= SLP_NEEDQ
;
5648 bzero(&msg
, sizeof(msg
));
5649 msg
.msg_name
= (caddr_t
)&nam
;
5650 msg
.msg_namelen
= sizeof(nam
);
5653 bytes_read
= 1000000000;
5654 error
= sock_receivembuf(so
, &msg
, &mp
, MSG_DONTWAIT
| MSG_NEEDSA
, &bytes_read
);
5656 if (msg
.msg_name
&& (mbuf_get(MBUF_WAITOK
, MBUF_TYPE_SONAME
, &mhck
) == 0)) {
5657 mbuf_setlen(mhck
, nam
.ss_len
);
5658 bcopy(&nam
, mbuf_data(mhck
), nam
.ss_len
);
5660 if (mbuf_setnext(m
, mp
)) {
5661 /* trouble... just drop it */
5662 printf("nfsrv_rcv: mbuf_setnext failed\n");
5670 mbuf_setnextpkt(slp
->ns_recend
, m
);
5673 slp
->ns_flag
|= SLP_DOREC
;
5676 mbuf_setnextpkt(m
, NULL
);
5683 * Now try and process the request records, non-blocking.
5687 slp
->ns_flag
|= ns_flag
;
5688 if (waitflag
== MBUF_DONTWAIT
) {
5689 int wake
= (slp
->ns_flag
& SLP_WORKTODO
);
5690 lck_rw_done(&slp
->ns_rwlock
);
5691 if (wake
&& nfsd_thread_count
) {
5692 lck_mtx_lock(nfsd_mutex
);
5693 nfsrv_wakenfsd(slp
);
5694 lck_mtx_unlock(nfsd_mutex
);
5700 * Try and extract an RPC request from the mbuf data list received on a
5701 * stream socket. The "waitflag" argument indicates whether or not it
5705 nfsrv_getstream(struct nfsrv_sock
*slp
, int waitflag
)
5708 char *cp1
, *cp2
, *mdata
;
5709 int len
, mlen
, error
;
5710 mbuf_t om
, m2
, recm
;
5713 if (slp
->ns_flag
& SLP_GETSTREAM
)
5714 panic("nfs getstream");
5715 slp
->ns_flag
|= SLP_GETSTREAM
;
5717 if (slp
->ns_reclen
== 0) {
5718 if (slp
->ns_cc
< NFSX_UNSIGNED
) {
5719 slp
->ns_flag
&= ~SLP_GETSTREAM
;
5723 mdata
= mbuf_data(m
);
5725 if (mlen
>= NFSX_UNSIGNED
) {
5726 bcopy(mdata
, (caddr_t
)&recmark
, NFSX_UNSIGNED
);
5727 mdata
+= NFSX_UNSIGNED
;
5728 mlen
-= NFSX_UNSIGNED
;
5729 mbuf_setdata(m
, mdata
, mlen
);
5731 cp1
= (caddr_t
)&recmark
;
5733 while (cp1
< ((caddr_t
)&recmark
) + NFSX_UNSIGNED
) {
5741 mbuf_setdata(m
, cp2
, mlen
);
5744 slp
->ns_cc
-= NFSX_UNSIGNED
;
5745 recmark
= ntohl(recmark
);
5746 slp
->ns_reclen
= recmark
& ~0x80000000;
5747 if (recmark
& 0x80000000)
5748 slp
->ns_flag
|= SLP_LASTFRAG
;
5750 slp
->ns_flag
&= ~SLP_LASTFRAG
;
5751 if (slp
->ns_reclen
<= 0 || slp
->ns_reclen
> NFS_MAXPACKET
) {
5752 slp
->ns_flag
&= ~SLP_GETSTREAM
;
5758 * Now get the record part.
5760 * Note that slp->ns_reclen may be 0. Linux sometimes
5761 * generates 0-length RPCs
5764 if (slp
->ns_cc
== slp
->ns_reclen
) {
5766 slp
->ns_raw
= slp
->ns_rawend
= NULL
;
5767 slp
->ns_cc
= slp
->ns_reclen
= 0;
5768 } else if (slp
->ns_cc
> slp
->ns_reclen
) {
5772 mdata
= mbuf_data(m
);
5774 while (len
< slp
->ns_reclen
) {
5775 if ((len
+ mlen
) > slp
->ns_reclen
) {
5776 if (mbuf_copym(m
, 0, slp
->ns_reclen
- len
, waitflag
, &m2
)) {
5777 slp
->ns_flag
&= ~SLP_GETSTREAM
;
5778 return (EWOULDBLOCK
);
5781 if (mbuf_setnext(om
, m2
)) {
5782 /* trouble... just drop it */
5783 printf("nfsrv_getstream: mbuf_setnext failed\n");
5785 slp
->ns_flag
&= ~SLP_GETSTREAM
;
5786 return (EWOULDBLOCK
);
5792 mdata
+= slp
->ns_reclen
- len
;
5793 mlen
-= slp
->ns_reclen
- len
;
5794 mbuf_setdata(m
, mdata
, mlen
);
5795 len
= slp
->ns_reclen
;
5796 } else if ((len
+ mlen
) == slp
->ns_reclen
) {
5801 if (mbuf_setnext(om
, NULL
)) {
5802 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
5803 slp
->ns_flag
&= ~SLP_GETSTREAM
;
5804 return (EWOULDBLOCK
);
5807 mdata
= mbuf_data(m
);
5813 mdata
= mbuf_data(m
);
5820 slp
->ns_flag
&= ~SLP_GETSTREAM
;
5825 * Accumulate the fragments into a record.
5827 if (slp
->ns_frag
== NULL
) {
5828 slp
->ns_frag
= recm
;
5831 while ((m2
= mbuf_next(m
)))
5833 if ((error
= mbuf_setnext(m
, recm
)))
5834 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error
);
5836 if (slp
->ns_flag
& SLP_LASTFRAG
) {
5838 mbuf_setnextpkt(slp
->ns_recend
, slp
->ns_frag
);
5840 slp
->ns_rec
= slp
->ns_frag
;
5841 slp
->ns_flag
|= SLP_DOREC
;
5843 slp
->ns_recend
= slp
->ns_frag
;
5844 slp
->ns_frag
= NULL
;
5850 * Parse an RPC header.
5854 struct nfsrv_sock
*slp
,
5856 struct nfsrv_descript
**ndp
)
5860 struct nfsrv_descript
*nd
;
5864 if (!(slp
->ns_flag
& (SLP_VALID
|SLP_DOREC
)) || (slp
->ns_rec
== NULL
))
5866 MALLOC_ZONE(nd
, struct nfsrv_descript
*,
5867 sizeof (struct nfsrv_descript
), M_NFSRVDESC
, M_WAITOK
);
5871 slp
->ns_rec
= mbuf_nextpkt(m
);
5873 mbuf_setnextpkt(m
, NULL
);
5875 slp
->ns_flag
&= ~SLP_DOREC
;
5876 slp
->ns_recend
= NULL
;
5879 if (mbuf_type(m
) == MBUF_TYPE_SONAME
) {
5882 if ((error
= mbuf_setnext(nam
, NULL
)))
5883 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error
);
5887 nfsm_chain_dissect_init(error
, &nd
->nd_nmreq
, m
);
5889 error
= nfsrv_getreq(nd
);
5893 if (nd
->nd_gss_context
)
5894 nfs_gss_svc_ctx_deref(nd
->nd_gss_context
);
5895 FREE_ZONE(nd
, sizeof(*nd
), M_NFSRVDESC
);
5905 * Parse an RPC request
5907 * - fill in the cred struct.
5910 nfsrv_getreq(struct nfsrv_descript
*nd
)
5912 struct nfsm_chain
*nmreq
;
5914 u_int32_t nfsvers
, auth_type
;
5922 nd
->nd_gss_context
= NULL
;
5923 nd
->nd_gss_seqnum
= 0;
5924 nd
->nd_gss_mb
= NULL
;
5926 user_id
= group_id
= -2;
5927 val
= auth_type
= len
= 0;
5929 nmreq
= &nd
->nd_nmreq
;
5930 nfsm_chain_get_32(error
, nmreq
, nd
->nd_retxid
); // XID
5931 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Call
5932 if (!error
&& (val
!= RPC_CALL
))
5936 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Version
5938 if (val
!= RPC_VER2
) {
5939 nd
->nd_repstat
= ERPCMISMATCH
;
5940 nd
->nd_procnum
= NFSPROC_NOOP
;
5943 nfsm_chain_get_32(error
, nmreq
, val
); // RPC Program Number
5945 if (val
!= NFS_PROG
) {
5946 nd
->nd_repstat
= EPROGUNAVAIL
;
5947 nd
->nd_procnum
= NFSPROC_NOOP
;
5950 nfsm_chain_get_32(error
, nmreq
, nfsvers
);// NFS Version Number
5952 if ((nfsvers
< NFS_VER2
) || (nfsvers
> NFS_VER3
)) {
5953 nd
->nd_repstat
= EPROGMISMATCH
;
5954 nd
->nd_procnum
= NFSPROC_NOOP
;
5957 nd
->nd_vers
= nfsvers
;
5958 nfsm_chain_get_32(error
, nmreq
, nd
->nd_procnum
);// NFS Procedure Number
5960 if ((nd
->nd_procnum
>= NFS_NPROCS
) ||
5961 ((nd
->nd_vers
== NFS_VER2
) && (nd
->nd_procnum
> NFSV2PROC_STATFS
))) {
5962 nd
->nd_repstat
= EPROCUNAVAIL
;
5963 nd
->nd_procnum
= NFSPROC_NOOP
;
5966 if (nfsvers
!= NFS_VER3
)
5967 nd
->nd_procnum
= nfsv3_procid
[nd
->nd_procnum
];
5968 nfsm_chain_get_32(error
, nmreq
, auth_type
); // Auth Flavor
5969 nfsm_chain_get_32(error
, nmreq
, len
); // Auth Length
5970 if (!error
&& (len
< 0 || len
> RPCAUTH_MAXSIZ
))
5974 /* Handle authentication */
5975 if (auth_type
== RPCAUTH_SYS
) {
5976 struct posix_cred temp_pcred
;
5977 if (nd
->nd_procnum
== NFSPROC_NULL
)
5979 nd
->nd_sec
= RPCAUTH_SYS
;
5980 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
); // skip stamp
5981 nfsm_chain_get_32(error
, nmreq
, len
); // hostname length
5982 if (len
< 0 || len
> NFS_MAXNAMLEN
)
5984 nfsm_chain_adv(error
, nmreq
, nfsm_rndup(len
)); // skip hostname
5987 /* create a temporary credential using the bits from the wire */
5988 bzero(&temp_pcred
, sizeof(temp_pcred
));
5989 nfsm_chain_get_32(error
, nmreq
, user_id
);
5990 nfsm_chain_get_32(error
, nmreq
, group_id
);
5991 temp_pcred
.cr_groups
[0] = group_id
;
5992 nfsm_chain_get_32(error
, nmreq
, len
); // extra GID count
5993 if ((len
< 0) || (len
> RPCAUTH_UNIXGIDS
))
5996 for (i
= 1; i
<= len
; i
++)
5998 nfsm_chain_get_32(error
, nmreq
, temp_pcred
.cr_groups
[i
]);
6000 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
);
6002 ngroups
= (len
>= NGROUPS
) ? NGROUPS
: (len
+ 1);
6004 nfsrv_group_sort(&temp_pcred
.cr_groups
[0], ngroups
);
6005 nfsm_chain_adv(error
, nmreq
, NFSX_UNSIGNED
); // verifier flavor (should be AUTH_NONE)
6006 nfsm_chain_get_32(error
, nmreq
, len
); // verifier length
6007 if (len
< 0 || len
> RPCAUTH_MAXSIZ
)
6010 nfsm_chain_adv(error
, nmreq
, nfsm_rndup(len
));
6012 /* request creation of a real credential */
6013 temp_pcred
.cr_uid
= user_id
;
6014 temp_pcred
.cr_ngroups
= ngroups
;
6015 nd
->nd_cr
= posix_cred_create(&temp_pcred
);
6016 if (nd
->nd_cr
== NULL
) {
6017 nd
->nd_repstat
= ENOMEM
;
6018 nd
->nd_procnum
= NFSPROC_NOOP
;
6021 } else if (auth_type
== RPCSEC_GSS
) {
6022 error
= nfs_gss_svc_cred_get(nd
, nmreq
);
6024 if (error
== EINVAL
)
6025 goto nfsmout
; // drop the request
6026 nd
->nd_repstat
= error
;
6027 nd
->nd_procnum
= NFSPROC_NOOP
;
6031 if (nd
->nd_procnum
== NFSPROC_NULL
) // assume it's AUTH_NONE
6033 nd
->nd_repstat
= (NFSERR_AUTHERR
| AUTH_REJECTCRED
);
6034 nd
->nd_procnum
= NFSPROC_NOOP
;
6039 if (IS_VALID_CRED(nd
->nd_cr
))
6040 kauth_cred_unref(&nd
->nd_cr
);
6041 nfsm_chain_cleanup(nmreq
);
6046 * Search for a sleeping nfsd and wake it up.
6047 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
6048 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
6049 * Note: Must be called with nfsd_mutex held.
6052 nfsrv_wakenfsd(struct nfsrv_sock
*slp
)
6056 if ((slp
->ns_flag
& SLP_VALID
) == 0)
6059 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
6060 /* if there's work to do on this socket, make sure it's queued up */
6061 if ((slp
->ns_flag
& SLP_WORKTODO
) && !(slp
->ns_flag
& SLP_QUEUED
)) {
6062 TAILQ_INSERT_TAIL(&nfsrv_sockwait
, slp
, ns_svcq
);
6063 slp
->ns_flag
|= SLP_WAITQ
;
6065 lck_rw_done(&slp
->ns_rwlock
);
6067 /* wake up a waiting nfsd, if possible */
6068 nd
= TAILQ_FIRST(&nfsd_queue
);
6072 TAILQ_REMOVE(&nfsd_queue
, nd
, nfsd_queue
);
6073 nd
->nfsd_flag
&= ~NFSD_WAITING
;
6077 #endif /* NFSSERVER */