]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
xnu-2050.7.9.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66 */
67
68 /*
69 * Socket operations for use by nfs
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/proc.h>
75 #include <sys/signalvar.h>
76 #include <sys/kauth.h>
77 #include <sys/mount_internal.h>
78 #include <sys/kernel.h>
79 #include <sys/kpi_mbuf.h>
80 #include <sys/malloc.h>
81 #include <sys/vnode.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/syslog.h>
86 #include <sys/tprintf.h>
87 #include <libkern/OSAtomic.h>
88
89 #include <sys/time.h>
90 #include <kern/clock.h>
91 #include <kern/task.h>
92 #include <kern/thread.h>
93 #include <kern/thread_call.h>
94 #include <sys/user.h>
95 #include <sys/acct.h>
96
97 #include <netinet/in.h>
98 #include <netinet/tcp.h>
99
100 #include <nfs/rpcv2.h>
101 #include <nfs/krpc.h>
102 #include <nfs/nfsproto.h>
103 #include <nfs/nfs.h>
104 #include <nfs/xdr_subs.h>
105 #include <nfs/nfsm_subs.h>
106 #include <nfs/nfs_gss.h>
107 #include <nfs/nfsmount.h>
108 #include <nfs/nfsnode.h>
109
110 /* XXX */
111 boolean_t current_thread_aborted(void);
112 kern_return_t thread_terminate(thread_t);
113
114
115 #if NFSSERVER
116 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
117
118 int nfsrv_getstream(struct nfsrv_sock *,int);
119 int nfsrv_getreq(struct nfsrv_descript *);
120 extern int nfsv3_procid[NFS_NPROCS];
121 #endif /* NFSSERVER */
122
123 /*
124 * compare two sockaddr structures
125 */
126 int
127 nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2)
128 {
129 if (!sa1)
130 return (-1);
131 if (!sa2)
132 return (1);
133 if (sa1->sa_family != sa2->sa_family)
134 return ((sa1->sa_family < sa2->sa_family) ? -1 : 1);
135 if (sa1->sa_len != sa2->sa_len)
136 return ((sa1->sa_len < sa2->sa_len) ? -1 : 1);
137 if (sa1->sa_family == AF_INET)
138 return (bcmp(&((struct sockaddr_in*)sa1)->sin_addr,
139 &((struct sockaddr_in*)sa2)->sin_addr, sizeof(((struct sockaddr_in*)sa1)->sin_addr)));
140 if (sa1->sa_family == AF_INET6)
141 return (bcmp(&((struct sockaddr_in6*)sa1)->sin6_addr,
142 &((struct sockaddr_in6*)sa2)->sin6_addr, sizeof(((struct sockaddr_in6*)sa1)->sin6_addr)));
143 return (-1);
144 }
145
146 #if NFSCLIENT
147
148 int nfs_reconnect(struct nfsmount *);
149 int nfs_connect_setup(struct nfsmount *);
150 void nfs_mount_sock_thread(void *, wait_result_t);
151 void nfs_udp_rcv(socket_t, void*, int);
152 void nfs_tcp_rcv(socket_t, void*, int);
153 void nfs_sock_poke(struct nfsmount *);
154 void nfs_request_match_reply(struct nfsmount *, mbuf_t);
155 void nfs_reqdequeue(struct nfsreq *);
156 void nfs_reqbusy(struct nfsreq *);
157 struct nfsreq *nfs_reqnext(struct nfsreq *);
158 int nfs_wait_reply(struct nfsreq *);
159 void nfs_softterm(struct nfsreq *);
160 int nfs_can_squish(struct nfsmount *);
161 int nfs_is_squishy(struct nfsmount *);
162 int nfs_is_dead(int, struct nfsmount *);
163
164 #ifdef NFS_SOCKET_DEBUGGING
165 #define NFS_SOCK_DBG(X) printf X
166 #else
167 #define NFS_SOCK_DBG(X)
168 #endif
169
170 /*
171 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
172 * Use the mean and mean deviation of rtt for the appropriate type of rpc
173 * for the frequent rpcs and a default for the others.
174 * The justification for doing "other" this way is that these rpcs
175 * happen so infrequently that timer est. would probably be stale.
176 * Also, since many of these rpcs are
177 * non-idempotent, a conservative timeout is desired.
178 * getattr, lookup - A+2D
179 * read, write - A+4D
180 * other - nm_timeo
181 */
182 #define NFS_RTO(n, t) \
183 ((t) == 0 ? (n)->nm_timeo : \
184 ((t) < 3 ? \
185 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
186 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
187 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
188 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
189
190 /*
191 * Defines which timer to use for the procnum.
192 * 0 - default
193 * 1 - getattr
194 * 2 - lookup
195 * 3 - read
196 * 4 - write
197 */
198 static int proct[NFS_NPROCS] = {
199 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
200 };
201
202 /*
203 * There is a congestion window for outstanding rpcs maintained per mount
204 * point. The cwnd size is adjusted in roughly the way that:
205 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
206 * SIGCOMM '88". ACM, August 1988.
207 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
208 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
209 * of rpcs is in progress.
210 * (The sent count and cwnd are scaled for integer arith.)
211 * Variants of "slow start" were tried and were found to be too much of a
212 * performance hit (ave. rtt 3 times larger),
213 * I suspect due to the large rtt that nfs rpcs have.
214 */
215 #define NFS_CWNDSCALE 256
216 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
217 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
218
219 /*
220 * Increment location index to next address/server/location.
221 */
222 void
223 nfs_location_next(struct nfs_fs_locations *nlp, struct nfs_location_index *nlip)
224 {
225 uint8_t loc = nlip->nli_loc;
226 uint8_t serv = nlip->nli_serv;
227 uint8_t addr = nlip->nli_addr;
228
229 /* move to next address */
230 addr++;
231 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) {
232 /* no more addresses on current server, go to first address of next server */
233 next_server:
234 addr = 0;
235 serv++;
236 if (serv >= nlp->nl_locations[loc]->nl_servcount) {
237 /* no more servers on current location, go to first server of next location */
238 serv = 0;
239 loc++;
240 if (loc >= nlp->nl_numlocs)
241 loc = 0; /* after last location, wrap back around to first location */
242 }
243 }
244 /*
245 * It's possible for this next server to not have any addresses.
246 * Check for that here and go to the next server.
247 * But bail out if we've managed to come back around to the original
248 * location that was passed in. (That would mean no servers had any
249 * addresses. And we don't want to spin here forever.)
250 */
251 if ((loc == nlip->nli_loc) && (serv == nlip->nli_serv) && (addr == nlip->nli_addr))
252 return;
253 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount)
254 goto next_server;
255
256 nlip->nli_loc = loc;
257 nlip->nli_serv = serv;
258 nlip->nli_addr = addr;
259 }
260
261 /*
262 * Compare two location indices.
263 */
264 int
265 nfs_location_index_cmp(struct nfs_location_index *nlip1, struct nfs_location_index *nlip2)
266 {
267 if (nlip1->nli_loc != nlip2->nli_loc)
268 return (nlip1->nli_loc - nlip2->nli_loc);
269 if (nlip1->nli_serv != nlip2->nli_serv)
270 return (nlip1->nli_serv - nlip2->nli_serv);
271 return (nlip1->nli_addr - nlip2->nli_addr);
272 }
273
274 /*
275 * Get the mntfromname (or path portion only) for a given location.
276 */
277 void
278 nfs_location_mntfromname(struct nfs_fs_locations *locs, struct nfs_location_index idx, char *s, int size, int pathonly)
279 {
280 struct nfs_fs_location *fsl = locs->nl_locations[idx.nli_loc];
281 char *p;
282 int cnt, i;
283
284 p = s;
285 if (!pathonly) {
286 cnt = snprintf(p, size, "%s:", fsl->nl_servers[idx.nli_serv]->ns_name);
287 p += cnt;
288 size -= cnt;
289 }
290 if (fsl->nl_path.np_compcount == 0) {
291 /* mounting root export on server */
292 if (size > 0) {
293 *p++ = '/';
294 *p++ = '\0';
295 }
296 return;
297 }
298 /* append each server path component */
299 for (i=0; (size > 0) && (i < (int)fsl->nl_path.np_compcount); i++) {
300 cnt = snprintf(p, size, "/%s", fsl->nl_path.np_components[i]);
301 p += cnt;
302 size -= cnt;
303 }
304 }
305
306 /*
307 * NFS client connect socket upcall.
308 * (Used only during socket connect/search.)
309 */
310 void
311 nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag)
312 {
313 struct nfs_socket *nso = arg;
314 size_t rcvlen;
315 mbuf_t m;
316 int error = 0, recv = 1;
317
318 if (nso->nso_flags & NSO_CONNECTING) {
319 NFS_SOCK_DBG(("nfs connect - socket %p upcall - connecting\n", nso));
320 wakeup(nso->nso_wake);
321 return;
322 }
323
324 lck_mtx_lock(&nso->nso_lock);
325 if ((nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) || !(nso->nso_flags & NSO_PINGING)) {
326 NFS_SOCK_DBG(("nfs connect - socket %p upcall - nevermind\n", nso));
327 lck_mtx_unlock(&nso->nso_lock);
328 return;
329 }
330 NFS_SOCK_DBG(("nfs connect - socket %p upcall\n", nso));
331 nso->nso_flags |= NSO_UPCALL;
332
333 /* loop while we make error-free progress */
334 while (!error && recv) {
335 /* make sure we're still interested in this socket */
336 if (nso->nso_flags & (NSO_DISCONNECTING|NSO_DEAD))
337 break;
338 lck_mtx_unlock(&nso->nso_lock);
339 m = NULL;
340 if (nso->nso_sotype == SOCK_STREAM) {
341 error = nfs_rpc_record_read(so, &nso->nso_rrs, MSG_DONTWAIT, &recv, &m);
342 } else {
343 rcvlen = 1000000;
344 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
345 recv = m ? 1 : 0;
346 }
347 lck_mtx_lock(&nso->nso_lock);
348 if (m) {
349 /* match response with request */
350 struct nfsm_chain nmrep;
351 uint32_t reply = 0, rxid = 0, verf_type, verf_len;
352 uint32_t reply_status, rejected_status, accepted_status;
353
354 nfsm_chain_dissect_init(error, &nmrep, m);
355 nfsm_chain_get_32(error, &nmrep, rxid);
356 nfsm_chain_get_32(error, &nmrep, reply);
357 if (!error && ((reply != RPC_REPLY) || (rxid != nso->nso_pingxid)))
358 error = EBADRPC;
359 nfsm_chain_get_32(error, &nmrep, reply_status);
360 if (!error && (reply_status == RPC_MSGDENIED)) {
361 nfsm_chain_get_32(error, &nmrep, rejected_status);
362 if (!error)
363 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES;
364 }
365 nfsm_chain_get_32(error, &nmrep, verf_type); /* verifier flavor */
366 nfsm_chain_get_32(error, &nmrep, verf_len); /* verifier length */
367 nfsmout_if(error);
368 if (verf_len)
369 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
370 nfsm_chain_get_32(error, &nmrep, accepted_status);
371 nfsmout_if(error);
372 if ((accepted_status == RPC_PROGMISMATCH) && !nso->nso_version) {
373 uint32_t minvers, maxvers;
374 nfsm_chain_get_32(error, &nmrep, minvers);
375 nfsm_chain_get_32(error, &nmrep, maxvers);
376 nfsmout_if(error);
377 if (nso->nso_protocol == PMAPPROG) {
378 if ((minvers > RPCBVERS4) || (maxvers < PMAPVERS))
379 error = EPROGMISMATCH;
380 else if ((nso->nso_saddr->sa_family == AF_INET) &&
381 (PMAPVERS >= minvers) && (PMAPVERS <= maxvers))
382 nso->nso_version = PMAPVERS;
383 else if (nso->nso_saddr->sa_family == AF_INET6) {
384 if ((RPCBVERS4 >= minvers) && (RPCBVERS4 <= maxvers))
385 nso->nso_version = RPCBVERS4;
386 else if ((RPCBVERS3 >= minvers) && (RPCBVERS3 <= maxvers))
387 nso->nso_version = RPCBVERS3;
388 }
389 } else if (nso->nso_protocol == NFS_PROG) {
390 if ((minvers > NFS_VER4) || (maxvers < NFS_VER2))
391 error = EPROGMISMATCH;
392 else if ((NFS_VER3 >= minvers) && (NFS_VER3 <= maxvers))
393 nso->nso_version = NFS_VER3;
394 else if ((NFS_VER2 >= minvers) && (NFS_VER2 <= maxvers))
395 nso->nso_version = NFS_VER2;
396 else if ((NFS_VER4 >= minvers) && (NFS_VER4 <= maxvers))
397 nso->nso_version = NFS_VER4;
398 }
399 if (!error && nso->nso_version)
400 accepted_status = RPC_SUCCESS;
401 }
402 if (!error) {
403 switch (accepted_status) {
404 case RPC_SUCCESS:
405 error = 0;
406 break;
407 case RPC_PROGUNAVAIL:
408 error = EPROGUNAVAIL;
409 break;
410 case RPC_PROGMISMATCH:
411 error = EPROGMISMATCH;
412 break;
413 case RPC_PROCUNAVAIL:
414 error = EPROCUNAVAIL;
415 break;
416 case RPC_GARBAGE:
417 error = EBADRPC;
418 break;
419 case RPC_SYSTEM_ERR:
420 default:
421 error = EIO;
422 break;
423 }
424 }
425 nfsmout:
426 nso->nso_flags &= ~NSO_PINGING;
427 if (error) {
428 nso->nso_error = error;
429 nso->nso_flags |= NSO_DEAD;
430 } else {
431 nso->nso_flags |= NSO_VERIFIED;
432 }
433 mbuf_freem(m);
434 /* wake up search thread */
435 wakeup(nso->nso_wake);
436 break;
437 }
438 }
439
440 nso->nso_flags &= ~NSO_UPCALL;
441 if ((error != EWOULDBLOCK) && (error || !recv)) {
442 /* problems with the socket... */
443 nso->nso_error = error ? error : EPIPE;
444 nso->nso_flags |= NSO_DEAD;
445 wakeup(nso->nso_wake);
446 }
447 if (nso->nso_flags & NSO_DISCONNECTING)
448 wakeup(&nso->nso_flags);
449 lck_mtx_unlock(&nso->nso_lock);
450 }
451
452 /*
453 * Create/initialize an nfs_socket structure.
454 */
455 int
456 nfs_socket_create(
457 __unused struct nfsmount *nmp,
458 struct sockaddr *sa,
459 int sotype,
460 in_port_t port,
461 uint32_t protocol,
462 uint32_t vers,
463 int resvport,
464 struct nfs_socket **nsop)
465 {
466 struct nfs_socket *nso;
467 struct timeval now;
468 int error;
469 #ifdef NFS_SOCKET_DEBUGGING
470 char naddr[MAX_IPv6_STR_LEN];
471 void *sinaddr;
472
473 if (sa->sa_family == AF_INET)
474 sinaddr = &((struct sockaddr_in*)sa)->sin_addr;
475 else
476 sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr;
477 if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr)
478 strlcpy(naddr, "<unknown>", sizeof(naddr));
479 #endif
480
481 *nsop = NULL;
482
483 /* Create the socket. */
484 MALLOC(nso, struct nfs_socket *, sizeof(struct nfs_socket), M_TEMP, M_WAITOK|M_ZERO);
485 if (nso)
486 MALLOC(nso->nso_saddr, struct sockaddr *, sa->sa_len, M_SONAME, M_WAITOK|M_ZERO);
487 if (!nso || !nso->nso_saddr) {
488 if (nso)
489 FREE(nso, M_TEMP);
490 return (ENOMEM);
491 }
492 lck_mtx_init(&nso->nso_lock, nfs_request_grp, LCK_ATTR_NULL);
493 nso->nso_sotype = sotype;
494 if (nso->nso_sotype == SOCK_STREAM)
495 nfs_rpc_record_state_init(&nso->nso_rrs);
496 microuptime(&now);
497 nso->nso_timestamp = now.tv_sec;
498 bcopy(sa, nso->nso_saddr, sa->sa_len);
499 if (sa->sa_family == AF_INET)
500 ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port);
501 else if (sa->sa_family == AF_INET6)
502 ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port);
503 nso->nso_protocol = protocol;
504 nso->nso_version = vers;
505
506 error = sock_socket(sa->sa_family, nso->nso_sotype, 0, NULL, NULL, &nso->nso_so);
507
508 /* Some servers require that the client port be a reserved port number. */
509 if (!error && resvport && ((sa->sa_family == AF_INET) || (sa->sa_family == AF_INET6))) {
510 struct sockaddr_storage ss;
511 int level = (sa->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
512 int optname = (sa->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE;
513 int portrange = IP_PORTRANGE_LOW;
514
515 error = sock_setsockopt(nso->nso_so, level, optname, &portrange, sizeof(portrange));
516 if (!error) { /* bind now to check for failure */
517 ss.ss_len = sa->sa_len;
518 ss.ss_family = sa->sa_family;
519 if (ss.ss_family == AF_INET) {
520 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY;
521 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
522 } else if (ss.ss_family == AF_INET6) {
523 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any;
524 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
525 } else {
526 error = EINVAL;
527 }
528 if (!error)
529 error = sock_bind(nso->nso_so, (struct sockaddr*)&ss);
530 }
531 }
532
533 if (error) {
534 NFS_SOCK_DBG(("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n",
535 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nso, naddr, sotype,
536 resvport ? "r" : "", port, protocol, vers));
537 nfs_socket_destroy(nso);
538 } else {
539 NFS_SOCK_DBG(("nfs connect %s created socket %p %s type %d%s port %d prot %d %d\n",
540 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, naddr,
541 sotype, resvport ? "r" : "", port, protocol, vers));
542 *nsop = nso;
543 }
544 return (error);
545 }
546
547 /*
548 * Destroy an nfs_socket structure.
549 */
550 void
551 nfs_socket_destroy(struct nfs_socket *nso)
552 {
553 struct timespec ts = { 4, 0 };
554
555 lck_mtx_lock(&nso->nso_lock);
556 nso->nso_flags |= NSO_DISCONNECTING;
557 if (nso->nso_flags & NSO_UPCALL) /* give upcall a chance to complete */
558 msleep(&nso->nso_flags, &nso->nso_lock, PZERO-1, "nfswaitupcall", &ts);
559 lck_mtx_unlock(&nso->nso_lock);
560 sock_shutdown(nso->nso_so, SHUT_RDWR);
561 sock_close(nso->nso_so);
562 if (nso->nso_sotype == SOCK_STREAM)
563 nfs_rpc_record_state_cleanup(&nso->nso_rrs);
564 lck_mtx_destroy(&nso->nso_lock, nfs_request_grp);
565 if (nso->nso_saddr)
566 FREE(nso->nso_saddr, M_SONAME);
567 if (nso->nso_saddr2)
568 FREE(nso->nso_saddr2, M_SONAME);
569 NFS_SOCK_DBG(("nfs connect - socket %p destroyed\n", nso));
570 FREE(nso, M_TEMP);
571 }
572
573 /*
574 * Set common socket options on an nfs_socket.
575 */
576 void
577 nfs_socket_options(struct nfsmount *nmp, struct nfs_socket *nso)
578 {
579 /*
580 * Set socket send/receive timeouts
581 * - Receive timeout shouldn't matter because most receives are performed
582 * in the socket upcall non-blocking.
583 * - Send timeout should allow us to react to a blocked socket.
584 * Soft mounts will want to abort sooner.
585 */
586 struct timeval timeo;
587 int on = 1, proto;
588
589 timeo.tv_usec = 0;
590 timeo.tv_sec = (NMFLAG(nmp, SOFT) || nfs_can_squish(nmp)) ? 5 : 60;
591 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
592 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
593 if (nso->nso_sotype == SOCK_STREAM) {
594 /* Assume that SOCK_STREAM always requires a connection */
595 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
596 /* set nodelay for TCP */
597 sock_gettype(nso->nso_so, NULL, NULL, &proto);
598 if (proto == IPPROTO_TCP)
599 sock_setsockopt(nso->nso_so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
600 }
601 if (nso->nso_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
602 int reserve = NFS_UDPSOCKBUF;
603 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
604 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
605 }
606 /* set SO_NOADDRERR to detect network changes ASAP */
607 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
608 /* just playin' it safe with upcalls */
609 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
610 /* socket should be interruptible if the mount is */
611 if (!NMFLAG(nmp, INTR))
612 sock_nointerrupt(nso->nso_so, 1);
613 }
614
615 /*
616 * Release resources held in an nfs_socket_search.
617 */
618 void
619 nfs_socket_search_cleanup(struct nfs_socket_search *nss)
620 {
621 struct nfs_socket *nso, *nsonext;
622
623 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) {
624 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
625 nss->nss_sockcnt--;
626 nfs_socket_destroy(nso);
627 }
628 if (nss->nss_sock) {
629 nfs_socket_destroy(nss->nss_sock);
630 nss->nss_sock = NULL;
631 }
632 }
633
634 /*
635 * Prefer returning certain errors over others.
636 * This function returns a ranking of the given error.
637 */
638 int
639 nfs_connect_error_class(int error)
640 {
641 switch (error) {
642 case 0:
643 return (0);
644 case ETIMEDOUT:
645 case EAGAIN:
646 return (1);
647 case EPIPE:
648 case EADDRNOTAVAIL:
649 case ENETDOWN:
650 case ENETUNREACH:
651 case ENETRESET:
652 case ECONNABORTED:
653 case ECONNRESET:
654 case EISCONN:
655 case ENOTCONN:
656 case ESHUTDOWN:
657 case ECONNREFUSED:
658 case EHOSTDOWN:
659 case EHOSTUNREACH:
660 return (2);
661 case ERPCMISMATCH:
662 case EPROCUNAVAIL:
663 case EPROGMISMATCH:
664 case EPROGUNAVAIL:
665 return (3);
666 case EBADRPC:
667 return (4);
668 default:
669 return (5);
670 }
671 }
672
673 /*
674 * Make sure a socket search returns the best error.
675 */
676 void
677 nfs_socket_search_update_error(struct nfs_socket_search *nss, int error)
678 {
679 if (nfs_connect_error_class(error) >= nfs_connect_error_class(nss->nss_error))
680 nss->nss_error = error;
681 }
682
683 /*
684 * Continue the socket search until we have something to report.
685 */
686 int
687 nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss)
688 {
689 struct nfs_socket *nso, *nsonext;
690 struct timeval now;
691 struct nfs_fs_location *fsl;
692 struct nfs_fs_server *fss;
693 struct sockaddr_storage ss;
694 char *addrstr;
695 int error, nomore = 0;
696
697 loop:
698 microuptime(&now);
699 NFS_SOCK_DBG(("nfs connect %s search %ld\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, now.tv_sec));
700
701 /* Time to start another socket? */
702 while ((nss->nss_last < 0) || (nss->nss_sockcnt == 0) ||
703 ((nss->nss_sockcnt < 4) && (now.tv_sec >= (nss->nss_last + 2)))) {
704 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
705 return (EINTR);
706 /* Find the next address to try... */
707 /* Have we run out of locations? */
708 if (!nomore && (nss->nss_last != -1) && !nfs_location_index_cmp(&nss->nss_nextloc, &nss->nss_startloc))
709 nomore = 1;
710 if (nomore) {
711 if (nss->nss_last < 0)
712 nss->nss_last = now.tv_sec;
713 break;
714 }
715 /* Can we convert the address to a sockaddr? */
716 fsl = nmp->nm_locations.nl_locations[nss->nss_nextloc.nli_loc];
717 fss = fsl->nl_servers[nss->nss_nextloc.nli_serv];
718 addrstr = fss->ns_addresses[nss->nss_nextloc.nli_addr];
719 if (!nfs_uaddr2sockaddr(addrstr, (struct sockaddr*)&ss)) {
720 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
721 nss->nss_last = -2;
722 continue;
723 }
724 /* Check that socket family is acceptable. */
725 if (nmp->nm_sofamily && (ss.ss_family != nmp->nm_sofamily)) {
726 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
727 nss->nss_last = -2;
728 continue;
729 }
730
731 /* Create the socket. */
732 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nss->nss_sotype,
733 nss->nss_port, nss->nss_protocol, nss->nss_version,
734 ((nss->nss_protocol == NFS_PROG) && NMFLAG(nmp, RESVPORT)), &nso);
735 if (error)
736 return (error);
737
738 nso->nso_location = nss->nss_nextloc;
739 nso->nso_wake = nss;
740 error = sock_setupcall(nso->nso_so, nfs_connect_upcall, nso);
741 if (error) {
742 lck_mtx_lock(&nso->nso_lock);
743 nso->nso_error = error;
744 nso->nso_flags |= NSO_DEAD;
745 lck_mtx_unlock(&nso->nso_lock);
746 }
747
748 TAILQ_INSERT_TAIL(&nss->nss_socklist, nso, nso_link);
749 nss->nss_sockcnt++;
750 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
751
752 nss->nss_last = now.tv_sec;
753 }
754
755 /* check each active socket and try to push it along */
756 TAILQ_FOREACH(nso, &nss->nss_socklist, nso_link) {
757 lck_mtx_lock(&nso->nso_lock);
758 if (!(nso->nso_flags & NSO_CONNECTED)) {
759 if ((nso->nso_sotype != SOCK_STREAM) && NMFLAG(nmp, NOCONNECT)) {
760 /* no connection needed, just say it's already connected */
761 nso->nso_flags |= NSO_CONNECTED;
762 NFS_SOCK_DBG(("nfs connect %s UDP socket %p noconnect\n",
763 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
764 } else if (!(nso->nso_flags & NSO_CONNECTING)) {
765 /* initiate the connection */
766 nso->nso_flags |= NSO_CONNECTING;
767 lck_mtx_unlock(&nso->nso_lock);
768 NFS_SOCK_DBG(("nfs connect %s connecting socket %p\n",
769 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
770 error = sock_connect(nso->nso_so, nso->nso_saddr, MSG_DONTWAIT);
771 lck_mtx_lock(&nso->nso_lock);
772 if (error && (error != EINPROGRESS)) {
773 nso->nso_error = error;
774 nso->nso_flags |= NSO_DEAD;
775 lck_mtx_unlock(&nso->nso_lock);
776 continue;
777 }
778 }
779 if (nso->nso_flags & NSO_CONNECTING) {
780 /* check the connection */
781 if (sock_isconnected(nso->nso_so)) {
782 NFS_SOCK_DBG(("nfs connect %s socket %p is connected\n",
783 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
784 nso->nso_flags &= ~NSO_CONNECTING;
785 nso->nso_flags |= NSO_CONNECTED;
786 } else {
787 int optlen = sizeof(error);
788 error = 0;
789 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &error, &optlen);
790 if (error) { /* we got an error on the socket */
791 NFS_SOCK_DBG(("nfs connect %s socket %p connection error %d\n",
792 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
793 if (nss->nss_flags & NSS_VERBOSE)
794 log(LOG_INFO, "nfs_connect: socket error %d for %s\n",
795 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
796 nso->nso_error = error;
797 nso->nso_flags |= NSO_DEAD;
798 lck_mtx_unlock(&nso->nso_lock);
799 continue;
800 }
801 }
802 }
803 if (nso->nso_flags & NSO_CONNECTED)
804 nfs_socket_options(nmp, nso);
805 }
806 if (!(nso->nso_flags & NSO_CONNECTED)) {
807 lck_mtx_unlock(&nso->nso_lock);
808 continue;
809 }
810 if (!(nso->nso_flags & (NSO_PINGING|NSO_VERIFIED)) ||
811 ((nso->nso_sotype == SOCK_DGRAM) && (now.tv_sec >= nso->nso_reqtimestamp+2))) {
812 /* initiate a NULL RPC request */
813 uint64_t xid = nso->nso_pingxid;
814 mbuf_t m, mreq = NULL;
815 struct msghdr msg;
816 size_t reqlen, sentlen;
817 uint32_t vers;
818
819 if (!(vers = nso->nso_version)) {
820 if (nso->nso_protocol == PMAPPROG)
821 vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
822 else if (nso->nso_protocol == NFS_PROG)
823 vers = NFS_VER3;
824 }
825 lck_mtx_unlock(&nso->nso_lock);
826 error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS,
827 vfs_context_ucred(vfs_context_kernel()), NULL, NULL, &xid, &mreq);
828 lck_mtx_lock(&nso->nso_lock);
829 if (!error) {
830 nso->nso_flags |= NSO_PINGING;
831 nso->nso_pingxid = R_XID32(xid);
832 nso->nso_reqtimestamp = now.tv_sec;
833 bzero(&msg, sizeof(msg));
834 if ((nso->nso_sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so)) {
835 msg.msg_name = nso->nso_saddr;
836 msg.msg_namelen = nso->nso_saddr->sa_len;
837 }
838 for (reqlen=0, m=mreq; m; m = mbuf_next(m))
839 reqlen += mbuf_len(m);
840 lck_mtx_unlock(&nso->nso_lock);
841 error = sock_sendmbuf(nso->nso_so, &msg, mreq, 0, &sentlen);
842 NFS_SOCK_DBG(("nfs connect %s verifying socket %p send rv %d\n",
843 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
844 lck_mtx_lock(&nso->nso_lock);
845 if (!error && (sentlen != reqlen))
846 error = ETIMEDOUT;
847 }
848 if (error) {
849 nso->nso_error = error;
850 nso->nso_flags |= NSO_DEAD;
851 lck_mtx_unlock(&nso->nso_lock);
852 continue;
853 }
854 }
855 if (nso->nso_flags & NSO_VERIFIED) {
856 /* WOOHOO!! This socket looks good! */
857 NFS_SOCK_DBG(("nfs connect %s socket %p verified\n",
858 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
859 if (!nso->nso_version) {
860 /* If the version isn't set, the default must have worked. */
861 if (nso->nso_protocol == PMAPPROG)
862 nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
863 if (nso->nso_protocol == NFS_PROG)
864 nso->nso_version = NFS_VER3;
865 }
866 lck_mtx_unlock(&nso->nso_lock);
867 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
868 nss->nss_sockcnt--;
869 nss->nss_sock = nso;
870 break;
871 }
872 lck_mtx_unlock(&nso->nso_lock);
873 }
874
875 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) {
876 lck_mtx_lock(&nso->nso_lock);
877 if (now.tv_sec >= (nso->nso_timestamp + nss->nss_timeo)) {
878 /* took too long */
879 NFS_SOCK_DBG(("nfs connect %s socket %p timed out\n",
880 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
881 nso->nso_error = ETIMEDOUT;
882 nso->nso_flags |= NSO_DEAD;
883 }
884 if (!(nso->nso_flags & NSO_DEAD)) {
885 lck_mtx_unlock(&nso->nso_lock);
886 continue;
887 }
888 lck_mtx_unlock(&nso->nso_lock);
889 NFS_SOCK_DBG(("nfs connect %s reaping socket %p %d\n",
890 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, nso->nso_error));
891 nfs_socket_search_update_error(nss, nso->nso_error);
892 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
893 nss->nss_sockcnt--;
894 nfs_socket_destroy(nso);
895 if (!nomore)
896 nss->nss_last = -2;
897 }
898
899 /*
900 * Keep looping if we haven't found a socket yet and we have more
901 * sockets to (continue to) try.
902 */
903 error = 0;
904 if (!nss->nss_sock && (!TAILQ_EMPTY(&nss->nss_socklist) || !nomore)) {
905 /* log a warning if connect is taking a while */
906 if (((now.tv_sec - nss->nss_timestamp) >= 30) && ((nss->nss_flags & (NSS_VERBOSE|NSS_WARNED)) == NSS_VERBOSE)) {
907 log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n",
908 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
909 nss->nss_flags |= NSS_WARNED;
910 }
911 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
912 return (EINTR);
913 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0)))
914 return (error);
915 if (nss->nss_last >= 0)
916 tsleep(nss, PSOCK, "nfs_connect_search_wait", hz);
917 goto loop;
918 }
919
920 NFS_SOCK_DBG(("nfs connect %s returning %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, error));
921 return (error);
922 }
923
924 /*
925 * Initialize a new NFS connection.
926 *
927 * Search for a location to connect a socket to and initialize the connection.
928 *
929 * An NFS mount may have multiple locations/servers/addresses available.
930 * We attempt to connect to each one asynchronously and will start
931 * several sockets in parallel if other locations are slow to answer.
932 * We'll use the first NFS socket we can successfully set up.
933 *
934 * The search may involve contacting the portmapper service first.
935 *
936 * A mount's initial connection may require negotiating some parameters such
937 * as socket type and NFS version.
938 */
939 int
940 nfs_connect(struct nfsmount *nmp, int verbose, int timeo)
941 {
942 struct nfs_socket_search nss;
943 struct nfs_socket *nso, *nsonfs;
944 struct sockaddr_storage ss;
945 struct sockaddr *saddr, *oldsaddr;
946 sock_upcall upcall;
947 struct timeval now, start;
948 int error, savederror, nfsvers;
949 uint8_t sotype = nmp->nm_sotype ? nmp->nm_sotype : SOCK_STREAM;
950 fhandle_t *fh = NULL;
951 char *path = NULL;
952 in_port_t port;
953
954 /* paranoia... check that we have at least one address in the locations */
955 uint32_t loc, serv;
956 for (loc=0; loc < nmp->nm_locations.nl_numlocs; loc++) {
957 for (serv=0; serv < nmp->nm_locations.nl_locations[loc]->nl_servcount; serv++) {
958 if (nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount)
959 break;
960 NFS_SOCK_DBG(("nfs connect %s search, server %s has no addresses\n",
961 vfs_statfs(nmp->nm_mountp)->f_mntfromname,
962 nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name));
963 }
964 if (serv < nmp->nm_locations.nl_locations[loc]->nl_servcount)
965 break;
966 }
967 if (loc >= nmp->nm_locations.nl_numlocs) {
968 NFS_SOCK_DBG(("nfs connect %s search failed, no addresses\n",
969 vfs_statfs(nmp->nm_mountp)->f_mntfromname));
970 return (EINVAL);
971 }
972
973 lck_mtx_lock(&nmp->nm_lock);
974 nmp->nm_sockflags |= NMSOCK_CONNECTING;
975 nmp->nm_nss = &nss;
976 lck_mtx_unlock(&nmp->nm_lock);
977 microuptime(&start);
978 savederror = error = 0;
979
980 tryagain:
981 /* initialize socket search state */
982 bzero(&nss, sizeof(nss));
983 nss.nss_error = savederror;
984 TAILQ_INIT(&nss.nss_socklist);
985 nss.nss_sotype = sotype;
986 nss.nss_startloc = nmp->nm_locations.nl_current;
987 nss.nss_timestamp = start.tv_sec;
988 nss.nss_timeo = timeo;
989 if (verbose)
990 nss.nss_flags |= NSS_VERBOSE;
991
992 /* First time connecting, we may need to negotiate some things */
993 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
994 if (!nmp->nm_vers) {
995 /* No NFS version specified... */
996 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
997 /* ...connect to portmapper first if we (may) need any ports. */
998 nss.nss_port = PMAPPORT;
999 nss.nss_protocol = PMAPPROG;
1000 nss.nss_version = 0;
1001 } else {
1002 /* ...connect to NFS port first. */
1003 nss.nss_port = nmp->nm_nfsport;
1004 nss.nss_protocol = NFS_PROG;
1005 nss.nss_version = 0;
1006 }
1007 } else if (nmp->nm_vers >= NFS_VER4) {
1008 /* For NFSv4, we use the given (or default) port. */
1009 nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT;
1010 nss.nss_protocol = NFS_PROG;
1011 nss.nss_version = 4;
1012 } else {
1013 /* For NFSv3/v2... */
1014 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
1015 /* ...connect to portmapper first if we need any ports. */
1016 nss.nss_port = PMAPPORT;
1017 nss.nss_protocol = PMAPPROG;
1018 nss.nss_version = 0;
1019 } else {
1020 /* ...connect to NFS port first. */
1021 nss.nss_port = nmp->nm_nfsport;
1022 nss.nss_protocol = NFS_PROG;
1023 nss.nss_version = nmp->nm_vers;
1024 }
1025 }
1026 NFS_SOCK_DBG(("nfs connect first %s, so type %d port %d prot %d %d\n",
1027 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port,
1028 nss.nss_protocol, nss.nss_version));
1029 } else {
1030 /* we've connected before, just connect to NFS port */
1031 if (!nmp->nm_nfsport) {
1032 /* need to ask portmapper which port that would be */
1033 nss.nss_port = PMAPPORT;
1034 nss.nss_protocol = PMAPPROG;
1035 nss.nss_version = 0;
1036 } else {
1037 nss.nss_port = nmp->nm_nfsport;
1038 nss.nss_protocol = NFS_PROG;
1039 nss.nss_version = nmp->nm_vers;
1040 }
1041 NFS_SOCK_DBG(("nfs connect %s, so type %d port %d prot %d %d\n",
1042 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port,
1043 nss.nss_protocol, nss.nss_version));
1044 }
1045
1046 /* Set next location to first valid location. */
1047 /* If start location is invalid, find next location. */
1048 nss.nss_nextloc = nss.nss_startloc;
1049 if ((nss.nss_nextloc.nli_serv >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servcount) ||
1050 (nss.nss_nextloc.nli_addr >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servers[nss.nss_nextloc.nli_serv]->ns_addrcount)) {
1051 nfs_location_next(&nmp->nm_locations, &nss.nss_nextloc);
1052 if (!nfs_location_index_cmp(&nss.nss_nextloc, &nss.nss_startloc)) {
1053 NFS_SOCK_DBG(("nfs connect %s search failed, couldn't find a valid location index\n",
1054 vfs_statfs(nmp->nm_mountp)->f_mntfromname));
1055 return (ENOENT);
1056 }
1057 }
1058 nss.nss_last = -1;
1059
1060 keepsearching:
1061
1062 error = nfs_connect_search_loop(nmp, &nss);
1063 if (error || !nss.nss_sock) {
1064 /* search failed */
1065 nfs_socket_search_cleanup(&nss);
1066 if (!error && (nss.nss_sotype == SOCK_STREAM) && !nmp->nm_sotype && (nmp->nm_vers < NFS_VER4)) {
1067 /* Try using UDP */
1068 sotype = SOCK_DGRAM;
1069 savederror = nss.nss_error;
1070 NFS_SOCK_DBG(("nfs connect %s TCP failed %d %d, trying UDP\n",
1071 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error));
1072 goto tryagain;
1073 }
1074 if (!error)
1075 error = nss.nss_error ? nss.nss_error : ETIMEDOUT;
1076 lck_mtx_lock(&nmp->nm_lock);
1077 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
1078 nmp->nm_nss = NULL;
1079 lck_mtx_unlock(&nmp->nm_lock);
1080 if (nss.nss_flags & NSS_WARNED)
1081 log(LOG_INFO, "nfs_connect: socket connect aborted for %s\n",
1082 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1083 if (fh)
1084 FREE(fh, M_TEMP);
1085 if (path)
1086 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1087 NFS_SOCK_DBG(("nfs connect %s search failed, returning %d\n",
1088 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error));
1089 return (error);
1090 }
1091
1092 /* try to use nss_sock */
1093 nso = nss.nss_sock;
1094 nss.nss_sock = NULL;
1095
1096 /* We may be speaking to portmap first... to determine port(s). */
1097 if (nso->nso_saddr->sa_family == AF_INET)
1098 port = ntohs(((struct sockaddr_in*)nso->nso_saddr)->sin_port);
1099 else
1100 port = ntohs(((struct sockaddr_in6*)nso->nso_saddr)->sin6_port);
1101 if (port == PMAPPORT) {
1102 /* Use this portmapper port to get the port #s we need. */
1103 NFS_SOCK_DBG(("nfs connect %s got portmapper socket %p\n",
1104 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
1105
1106 /* remove the connect upcall so nfs_portmap_lookup() can use this socket */
1107 sock_setupcall(nso->nso_so, NULL, NULL);
1108
1109 /* Set up socket address and port for NFS socket. */
1110 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1111
1112 /* If NFS version not set, try NFSv3 then NFSv2. */
1113 nfsvers = nmp->nm_vers ? nmp->nm_vers : NFS_VER3;
1114
1115 if (!(port = nmp->nm_nfsport)) {
1116 if (ss.ss_family == AF_INET)
1117 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
1118 else if (ss.ss_family == AF_INET6)
1119 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
1120 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1121 nso->nso_so, NFS_PROG, nfsvers,
1122 (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
1123 if (!error) {
1124 if (ss.ss_family == AF_INET)
1125 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1126 else if (ss.ss_family == AF_INET6)
1127 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1128 if (!port)
1129 error = EPROGUNAVAIL;
1130 }
1131 if (error && !nmp->nm_vers) {
1132 nfsvers = NFS_VER2;
1133 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1134 nso->nso_so, NFS_PROG, nfsvers,
1135 (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
1136 if (!error) {
1137 if (ss.ss_family == AF_INET)
1138 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1139 else if (ss.ss_family == AF_INET6)
1140 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1141 if (!port)
1142 error = EPROGUNAVAIL;
1143 }
1144 }
1145 if (error) {
1146 nfs_socket_search_update_error(&nss, error);
1147 nfs_socket_destroy(nso);
1148 goto keepsearching;
1149 }
1150 }
1151 /* Create NFS protocol socket and add it to the list of sockets. */
1152 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nso->nso_sotype, port,
1153 NFS_PROG, nfsvers, NMFLAG(nmp, RESVPORT), &nsonfs);
1154 if (error) {
1155 nfs_socket_search_update_error(&nss, error);
1156 nfs_socket_destroy(nso);
1157 goto keepsearching;
1158 }
1159 nsonfs->nso_location = nso->nso_location;
1160 nsonfs->nso_wake = &nss;
1161 error = sock_setupcall(nsonfs->nso_so, nfs_connect_upcall, nsonfs);
1162 if (error) {
1163 nfs_socket_search_update_error(&nss, error);
1164 nfs_socket_destroy(nsonfs);
1165 nfs_socket_destroy(nso);
1166 goto keepsearching;
1167 }
1168 TAILQ_INSERT_TAIL(&nss.nss_socklist, nsonfs, nso_link);
1169 nss.nss_sockcnt++;
1170 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) {
1171 /* Set up socket address and port for MOUNT socket. */
1172 error = 0;
1173 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1174 port = nmp->nm_mountport;
1175 if (ss.ss_family == AF_INET)
1176 ((struct sockaddr_in*)&ss)->sin_port = htons(port);
1177 else if (ss.ss_family == AF_INET6)
1178 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port);
1179 if (!port) {
1180 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */
1181 /* If NFS version is unknown, optimistically choose for NFSv3. */
1182 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
1183 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
1184 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1185 nso->nso_so, RPCPROG_MNT, mntvers, mntproto, timeo);
1186 }
1187 if (!error) {
1188 if (ss.ss_family == AF_INET)
1189 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1190 else if (ss.ss_family == AF_INET6)
1191 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1192 if (!port)
1193 error = EPROGUNAVAIL;
1194 }
1195 /* create sockaddr for MOUNT */
1196 if (!error)
1197 MALLOC(nsonfs->nso_saddr2, struct sockaddr *, ss.ss_len, M_SONAME, M_WAITOK|M_ZERO);
1198 if (!error && !nsonfs->nso_saddr2)
1199 error = ENOMEM;
1200 if (!error)
1201 bcopy(&ss, nsonfs->nso_saddr2, ss.ss_len);
1202 if (error) {
1203 lck_mtx_lock(&nsonfs->nso_lock);
1204 nsonfs->nso_error = error;
1205 nsonfs->nso_flags |= NSO_DEAD;
1206 lck_mtx_unlock(&nsonfs->nso_lock);
1207 }
1208 }
1209 nfs_socket_destroy(nso);
1210 goto keepsearching;
1211 }
1212
1213 /* nso is an NFS socket */
1214 NFS_SOCK_DBG(("nfs connect %s got NFS socket %p\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
1215
1216 /* If NFS version wasn't specified, it was determined during the connect. */
1217 nfsvers = nmp->nm_vers ? nmp->nm_vers : (int)nso->nso_version;
1218
1219 /* Perform MOUNT call for initial NFSv2/v3 connection/mount. */
1220 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) {
1221 error = 0;
1222 saddr = nso->nso_saddr2;
1223 if (!saddr) {
1224 /* Need sockaddr for MOUNT port */
1225 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1226 port = nmp->nm_mountport;
1227 if (ss.ss_family == AF_INET)
1228 ((struct sockaddr_in*)&ss)->sin_port = htons(port);
1229 else if (ss.ss_family == AF_INET6)
1230 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port);
1231 if (!port) {
1232 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */
1233 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
1234 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
1235 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1236 NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
1237 if (ss.ss_family == AF_INET)
1238 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1239 else if (ss.ss_family == AF_INET6)
1240 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1241 }
1242 if (!error) {
1243 if (port)
1244 saddr = (struct sockaddr*)&ss;
1245 else
1246 error = EPROGUNAVAIL;
1247 }
1248 }
1249 if (saddr)
1250 MALLOC(fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO);
1251 if (saddr && fh)
1252 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1253 if (!saddr || !fh || !path) {
1254 if (!error)
1255 error = ENOMEM;
1256 if (fh)
1257 FREE(fh, M_TEMP);
1258 if (path)
1259 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1260 fh = NULL;
1261 path = NULL;
1262 nfs_socket_search_update_error(&nss, error);
1263 nfs_socket_destroy(nso);
1264 goto keepsearching;
1265 }
1266 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, path, MAXPATHLEN, 1);
1267 error = nfs3_mount_rpc(nmp, saddr, nso->nso_sotype, nfsvers,
1268 path, vfs_context_current(), timeo, fh, &nmp->nm_servsec);
1269 NFS_SOCK_DBG(("nfs connect %s socket %p mount %d\n",
1270 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
1271 if (!error) {
1272 /* Make sure we can agree on a security flavor. */
1273 int o, s; /* indices into mount option and server security flavor lists */
1274 int found = 0;
1275
1276 if ((nfsvers == NFS_VER3) && !nmp->nm_servsec.count) {
1277 /* Some servers return an empty list to indicate RPCAUTH_SYS? */
1278 nmp->nm_servsec.count = 1;
1279 nmp->nm_servsec.flavors[0] = RPCAUTH_SYS;
1280 }
1281 if (nmp->nm_sec.count) {
1282 /* Choose the first flavor in our list that the server supports. */
1283 if (!nmp->nm_servsec.count) {
1284 /* we don't know what the server supports, just use our first choice */
1285 nmp->nm_auth = nmp->nm_sec.flavors[0];
1286 found = 1;
1287 }
1288 for (o=0; !found && (o < nmp->nm_sec.count); o++)
1289 for (s=0; !found && (s < nmp->nm_servsec.count); s++)
1290 if (nmp->nm_sec.flavors[o] == nmp->nm_servsec.flavors[s]) {
1291 nmp->nm_auth = nmp->nm_sec.flavors[o];
1292 found = 1;
1293 }
1294 } else {
1295 /* Choose the first one we support from the server's list. */
1296 if (!nmp->nm_servsec.count) {
1297 nmp->nm_auth = RPCAUTH_SYS;
1298 found = 1;
1299 }
1300 for (s=0; s < nmp->nm_servsec.count; s++)
1301 switch (nmp->nm_servsec.flavors[s]) {
1302 case RPCAUTH_SYS:
1303 /* prefer RPCAUTH_SYS to RPCAUTH_NONE */
1304 if (found && (nmp->nm_auth == RPCAUTH_NONE))
1305 found = 0;
1306 case RPCAUTH_NONE:
1307 case RPCAUTH_KRB5:
1308 case RPCAUTH_KRB5I:
1309 case RPCAUTH_KRB5P:
1310 if (!found) {
1311 nmp->nm_auth = nmp->nm_servsec.flavors[s];
1312 found = 1;
1313 }
1314 break;
1315 }
1316 }
1317 error = !found ? EAUTH : 0;
1318 }
1319 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1320 path = NULL;
1321 if (error) {
1322 nfs_socket_search_update_error(&nss, error);
1323 FREE(fh, M_TEMP);
1324 fh = NULL;
1325 nfs_socket_destroy(nso);
1326 goto keepsearching;
1327 }
1328 if (nmp->nm_fh)
1329 FREE(nmp->nm_fh, M_TEMP);
1330 nmp->nm_fh = fh;
1331 fh = NULL;
1332 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_CALLUMNT);
1333 }
1334
1335 /* put the real upcall in place */
1336 upcall = (nso->nso_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
1337 error = sock_setupcall(nso->nso_so, upcall, nmp);
1338 if (error) {
1339 nfs_socket_search_update_error(&nss, error);
1340 nfs_socket_destroy(nso);
1341 goto keepsearching;
1342 }
1343
1344 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1345 /* set mntfromname to this location */
1346 if (!NM_OMATTR_GIVEN(nmp, MNTFROM))
1347 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location,
1348 vfs_statfs(nmp->nm_mountp)->f_mntfromname,
1349 sizeof(vfs_statfs(nmp->nm_mountp)->f_mntfromname), 0);
1350 /* some negotiated values need to remain unchanged for the life of the mount */
1351 if (!nmp->nm_sotype)
1352 nmp->nm_sotype = nso->nso_sotype;
1353 if (!nmp->nm_vers) {
1354 nmp->nm_vers = nfsvers;
1355 /* If we negotiated NFSv4, set nm_nfsport if we ended up on the standard NFS port */
1356 if ((nfsvers >= NFS_VER4) && !NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) {
1357 if (nso->nso_saddr->sa_family == AF_INET)
1358 port = ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port);
1359 else if (nso->nso_saddr->sa_family == AF_INET6)
1360 port = ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port);
1361 else
1362 port = 0;
1363 if (port == NFS_PORT)
1364 nmp->nm_nfsport = NFS_PORT;
1365 }
1366 }
1367 /* do some version-specific pre-mount set up */
1368 if (nmp->nm_vers >= NFS_VER4) {
1369 microtime(&now);
1370 nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec;
1371 if (!NMFLAG(nmp, NOCALLBACK))
1372 nfs4_mount_callback_setup(nmp);
1373 }
1374 }
1375
1376 /* Initialize NFS socket state variables */
1377 lck_mtx_lock(&nmp->nm_lock);
1378 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
1379 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
1380 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
1381 nmp->nm_sdrtt[3] = 0;
1382 if (nso->nso_sotype == SOCK_DGRAM) {
1383 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
1384 nmp->nm_sent = 0;
1385 } else if (nso->nso_sotype == SOCK_STREAM) {
1386 nmp->nm_timeouts = 0;
1387 }
1388 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
1389 nmp->nm_sockflags |= NMSOCK_SETUP;
1390 /* move the socket to the mount structure */
1391 nmp->nm_nso = nso;
1392 oldsaddr = nmp->nm_saddr;
1393 nmp->nm_saddr = nso->nso_saddr;
1394 lck_mtx_unlock(&nmp->nm_lock);
1395 error = nfs_connect_setup(nmp);
1396 lck_mtx_lock(&nmp->nm_lock);
1397 nmp->nm_sockflags &= ~NMSOCK_SETUP;
1398 if (!error) {
1399 nmp->nm_sockflags |= NMSOCK_READY;
1400 wakeup(&nmp->nm_sockflags);
1401 }
1402 if (error) {
1403 NFS_SOCK_DBG(("nfs connect %s socket %p setup failed %d\n",
1404 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
1405 nfs_socket_search_update_error(&nss, error);
1406 nmp->nm_saddr = oldsaddr;
1407 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1408 /* undo settings made prior to setup */
1409 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_SOCKET_TYPE))
1410 nmp->nm_sotype = 0;
1411 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_VERSION)) {
1412 if (nmp->nm_vers >= NFS_VER4) {
1413 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT))
1414 nmp->nm_nfsport = 0;
1415 if (nmp->nm_cbid)
1416 nfs4_mount_callback_shutdown(nmp);
1417 if (IS_VALID_CRED(nmp->nm_mcred))
1418 kauth_cred_unref(&nmp->nm_mcred);
1419 bzero(&nmp->nm_un, sizeof(nmp->nm_un));
1420 }
1421 nmp->nm_vers = 0;
1422 }
1423 }
1424 lck_mtx_unlock(&nmp->nm_lock);
1425 nmp->nm_nso = NULL;
1426 nfs_socket_destroy(nso);
1427 goto keepsearching;
1428 }
1429
1430 /* update current location */
1431 if ((nmp->nm_locations.nl_current.nli_flags & NLI_VALID) &&
1432 (nmp->nm_locations.nl_current.nli_serv != nso->nso_location.nli_serv)) {
1433 /* server has changed, we should initiate failover/recovery */
1434 // XXX
1435 }
1436 nmp->nm_locations.nl_current = nso->nso_location;
1437 nmp->nm_locations.nl_current.nli_flags |= NLI_VALID;
1438
1439 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1440 /* We have now successfully connected... make a note of it. */
1441 nmp->nm_sockflags |= NMSOCK_HASCONNECTED;
1442 }
1443
1444 lck_mtx_unlock(&nmp->nm_lock);
1445 if (oldsaddr)
1446 FREE(oldsaddr, M_SONAME);
1447
1448 if (nss.nss_flags & NSS_WARNED)
1449 log(LOG_INFO, "nfs_connect: socket connect completed for %s\n",
1450 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1451
1452 nmp->nm_nss = NULL;
1453 nfs_socket_search_cleanup(&nss);
1454 if (fh)
1455 FREE(fh, M_TEMP);
1456 if (path)
1457 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1458 NFS_SOCK_DBG(("nfs connect %s success\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname));
1459 return (0);
1460 }
1461
1462
1463 /* setup & confirm socket connection is functional */
1464 int
1465 nfs_connect_setup(struct nfsmount *nmp)
1466 {
1467 int error = 0;
1468
1469 if (nmp->nm_vers >= NFS_VER4) {
1470 if (nmp->nm_state & NFSSTA_CLIENTID) {
1471 /* first, try to renew our current state */
1472 error = nfs4_renew(nmp, R_SETUP);
1473 if ((error == NFSERR_ADMIN_REVOKED) ||
1474 (error == NFSERR_CB_PATH_DOWN) ||
1475 (error == NFSERR_EXPIRED) ||
1476 (error == NFSERR_LEASE_MOVED) ||
1477 (error == NFSERR_STALE_CLIENTID)) {
1478 lck_mtx_lock(&nmp->nm_lock);
1479 nfs_need_recover(nmp, error);
1480 lck_mtx_unlock(&nmp->nm_lock);
1481 }
1482 }
1483 error = nfs4_setclientid(nmp);
1484 }
1485 return (error);
1486 }
1487
1488 /*
1489 * NFS socket reconnect routine:
1490 * Called when a connection is broken.
1491 * - disconnect the old socket
1492 * - nfs_connect() again
1493 * - set R_MUSTRESEND for all outstanding requests on mount point
1494 * If this fails the mount point is DEAD!
1495 */
1496 int
1497 nfs_reconnect(struct nfsmount *nmp)
1498 {
1499 struct nfsreq *rq;
1500 struct timeval now;
1501 thread_t thd = current_thread();
1502 int error, wentdown = 0, verbose = 1;
1503 time_t lastmsg;
1504 int timeo;
1505
1506 microuptime(&now);
1507 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay);
1508
1509 nfs_disconnect(nmp);
1510
1511
1512 lck_mtx_lock(&nmp->nm_lock);
1513 timeo = nfs_is_squishy(nmp) ? 8 : 30;
1514 lck_mtx_unlock(&nmp->nm_lock);
1515
1516 while ((error = nfs_connect(nmp, verbose, timeo))) {
1517 verbose = 0;
1518 nfs_disconnect(nmp);
1519 if ((error == EINTR) || (error == ERESTART))
1520 return (EINTR);
1521 if (error == EIO)
1522 return (EIO);
1523 microuptime(&now);
1524 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) {
1525 lastmsg = now.tv_sec;
1526 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect");
1527 wentdown = 1;
1528 }
1529 lck_mtx_lock(&nmp->nm_lock);
1530 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
1531 /* we're not yet completely mounted and */
1532 /* we can't reconnect, so we fail */
1533 lck_mtx_unlock(&nmp->nm_lock);
1534 return (error);
1535 }
1536 nfs_mount_check_dead_timeout(nmp);
1537 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) {
1538 lck_mtx_unlock(&nmp->nm_lock);
1539 return (error);
1540 }
1541 lck_mtx_unlock(&nmp->nm_lock);
1542 tsleep(&lbolt, PSOCK, "nfs_reconnect_delay", 0);
1543 if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
1544 return (error);
1545 }
1546
1547 if (wentdown)
1548 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected");
1549
1550 /*
1551 * Loop through outstanding request list and mark all requests
1552 * as needing a resend. (Though nfs_need_reconnect() probably
1553 * marked them all already.)
1554 */
1555 lck_mtx_lock(nfs_request_mutex);
1556 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
1557 if (rq->r_nmp == nmp) {
1558 lck_mtx_lock(&rq->r_mtx);
1559 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
1560 rq->r_flags |= R_MUSTRESEND;
1561 rq->r_rtt = -1;
1562 wakeup(rq);
1563 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
1564 nfs_asyncio_resend(rq);
1565 }
1566 lck_mtx_unlock(&rq->r_mtx);
1567 }
1568 }
1569 lck_mtx_unlock(nfs_request_mutex);
1570 return (0);
1571 }
1572
1573 /*
1574 * NFS disconnect. Clean up and unlink.
1575 */
1576 void
1577 nfs_disconnect(struct nfsmount *nmp)
1578 {
1579 struct nfs_socket *nso;
1580
1581 lck_mtx_lock(&nmp->nm_lock);
1582 tryagain:
1583 if (nmp->nm_nso) {
1584 struct timespec ts = { 1, 0 };
1585 if (nmp->nm_state & NFSSTA_SENDING) { /* wait for sending to complete */
1586 nmp->nm_state |= NFSSTA_WANTSND;
1587 msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitsending", &ts);
1588 goto tryagain;
1589 }
1590 if (nmp->nm_sockflags & NMSOCK_POKE) { /* wait for poking to complete */
1591 msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO-1, "nfswaitpoke", &ts);
1592 goto tryagain;
1593 }
1594 nmp->nm_sockflags |= NMSOCK_DISCONNECTING;
1595 nmp->nm_sockflags &= ~NMSOCK_READY;
1596 nso = nmp->nm_nso;
1597 nmp->nm_nso = NULL;
1598 if (nso->nso_saddr == nmp->nm_saddr)
1599 nso->nso_saddr = NULL;
1600 lck_mtx_unlock(&nmp->nm_lock);
1601 nfs_socket_destroy(nso);
1602 lck_mtx_lock(&nmp->nm_lock);
1603 nmp->nm_sockflags &= ~NMSOCK_DISCONNECTING;
1604 lck_mtx_unlock(&nmp->nm_lock);
1605 } else {
1606 lck_mtx_unlock(&nmp->nm_lock);
1607 }
1608 }
1609
1610 /*
1611 * mark an NFS mount as needing a reconnect/resends.
1612 */
1613 void
1614 nfs_need_reconnect(struct nfsmount *nmp)
1615 {
1616 struct nfsreq *rq;
1617
1618 lck_mtx_lock(&nmp->nm_lock);
1619 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP);
1620 lck_mtx_unlock(&nmp->nm_lock);
1621
1622 /*
1623 * Loop through outstanding request list and
1624 * mark all requests as needing a resend.
1625 */
1626 lck_mtx_lock(nfs_request_mutex);
1627 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
1628 if (rq->r_nmp == nmp) {
1629 lck_mtx_lock(&rq->r_mtx);
1630 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
1631 rq->r_flags |= R_MUSTRESEND;
1632 rq->r_rtt = -1;
1633 wakeup(rq);
1634 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
1635 nfs_asyncio_resend(rq);
1636 }
1637 lck_mtx_unlock(&rq->r_mtx);
1638 }
1639 }
1640 lck_mtx_unlock(nfs_request_mutex);
1641 }
1642
1643
1644 /*
1645 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
1646 */
1647 void
1648 nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
1649 {
1650 struct nfsmount *nmp = arg;
1651 struct timespec ts = { 30, 0 };
1652 thread_t thd = current_thread();
1653 struct nfsreq *req;
1654 struct timeval now;
1655 int error, dofinish;
1656 nfsnode_t np;
1657
1658 lck_mtx_lock(&nmp->nm_lock);
1659
1660 while (!(nmp->nm_sockflags & NMSOCK_READY) ||
1661 !TAILQ_EMPTY(&nmp->nm_resendq) ||
1662 !LIST_EMPTY(&nmp->nm_monlist) ||
1663 nmp->nm_deadto_start ||
1664 (nmp->nm_state & NFSSTA_RECOVER) ||
1665 ((nmp->nm_vers >= NFS_VER4) && !TAILQ_EMPTY(&nmp->nm_dreturnq)))
1666 {
1667 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
1668 break;
1669 /* do reconnect, if necessary */
1670 if (!(nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_FORCE)) {
1671 if (nmp->nm_reconnect_start <= 0) {
1672 microuptime(&now);
1673 nmp->nm_reconnect_start = now.tv_sec;
1674 }
1675 lck_mtx_unlock(&nmp->nm_lock);
1676 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname));
1677 if (nfs_reconnect(nmp) == 0)
1678 nmp->nm_reconnect_start = 0;
1679 lck_mtx_lock(&nmp->nm_lock);
1680 }
1681 if ((nmp->nm_sockflags & NMSOCK_READY) &&
1682 (nmp->nm_state & NFSSTA_RECOVER) &&
1683 !(nmp->nm_sockflags & NMSOCK_UNMOUNT) &&
1684 !(nmp->nm_state & NFSSTA_FORCE)) {
1685 /* perform state recovery */
1686 lck_mtx_unlock(&nmp->nm_lock);
1687 nfs_recover(nmp);
1688 lck_mtx_lock(&nmp->nm_lock);
1689 }
1690 /* handle NFSv4 delegation returns */
1691 while ((nmp->nm_vers >= NFS_VER4) && !(nmp->nm_state & NFSSTA_FORCE) &&
1692 (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) &&
1693 ((np = TAILQ_FIRST(&nmp->nm_dreturnq)))) {
1694 lck_mtx_unlock(&nmp->nm_lock);
1695 nfs4_delegation_return(np, R_RECOVER, thd, nmp->nm_mcred);
1696 lck_mtx_lock(&nmp->nm_lock);
1697 }
1698 /* do resends, if necessary/possible */
1699 while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || (nmp->nm_state & NFSSTA_FORCE)) &&
1700 ((req = TAILQ_FIRST(&nmp->nm_resendq)))) {
1701 if (req->r_resendtime)
1702 microuptime(&now);
1703 while (req && !(nmp->nm_state & NFSSTA_FORCE) && req->r_resendtime && (now.tv_sec < req->r_resendtime))
1704 req = TAILQ_NEXT(req, r_rchain);
1705 if (!req)
1706 break;
1707 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
1708 req->r_rchain.tqe_next = NFSREQNOLIST;
1709 lck_mtx_unlock(&nmp->nm_lock);
1710 lck_mtx_lock(&req->r_mtx);
1711 if (req->r_error || req->r_nmrep.nmc_mhead) {
1712 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
1713 req->r_flags &= ~R_RESENDQ;
1714 wakeup(req);
1715 lck_mtx_unlock(&req->r_mtx);
1716 if (dofinish)
1717 nfs_asyncio_finish(req);
1718 lck_mtx_lock(&nmp->nm_lock);
1719 continue;
1720 }
1721 if ((req->r_flags & R_RESTART) || nfs_request_using_gss(req)) {
1722 req->r_flags &= ~R_RESTART;
1723 req->r_resendtime = 0;
1724 lck_mtx_unlock(&req->r_mtx);
1725 /* async RPCs on GSS mounts need to be rebuilt and resent. */
1726 nfs_reqdequeue(req);
1727 if (nfs_request_using_gss(req)) {
1728 nfs_gss_clnt_rpcdone(req);
1729 error = nfs_gss_clnt_args_restore(req);
1730 if (error == ENEEDAUTH)
1731 req->r_xid = 0;
1732 }
1733 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
1734 nfs_request_using_gss(req) ? " gss" : "", req->r_procnum, req->r_xid,
1735 req->r_flags, req->r_rtt));
1736 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
1737 if (!error)
1738 error = nfs_sigintr(nmp, req, req->r_thread, 0);
1739 if (!error)
1740 error = nfs_request_add_header(req);
1741 if (!error)
1742 error = nfs_request_send(req, 0);
1743 lck_mtx_lock(&req->r_mtx);
1744 if (req->r_flags & R_RESENDQ)
1745 req->r_flags &= ~R_RESENDQ;
1746 if (error)
1747 req->r_error = error;
1748 wakeup(req);
1749 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
1750 lck_mtx_unlock(&req->r_mtx);
1751 if (dofinish)
1752 nfs_asyncio_finish(req);
1753 lck_mtx_lock(&nmp->nm_lock);
1754 error = 0;
1755 continue;
1756 }
1757 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
1758 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
1759 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
1760 if (!error)
1761 error = nfs_sigintr(nmp, req, req->r_thread, 0);
1762 if (!error) {
1763 req->r_flags |= R_SENDING;
1764 lck_mtx_unlock(&req->r_mtx);
1765 error = nfs_send(req, 0);
1766 lck_mtx_lock(&req->r_mtx);
1767 if (!error) {
1768 if (req->r_flags & R_RESENDQ)
1769 req->r_flags &= ~R_RESENDQ;
1770 wakeup(req);
1771 lck_mtx_unlock(&req->r_mtx);
1772 lck_mtx_lock(&nmp->nm_lock);
1773 continue;
1774 }
1775 }
1776 req->r_error = error;
1777 if (req->r_flags & R_RESENDQ)
1778 req->r_flags &= ~R_RESENDQ;
1779 wakeup(req);
1780 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
1781 lck_mtx_unlock(&req->r_mtx);
1782 if (dofinish)
1783 nfs_asyncio_finish(req);
1784 lck_mtx_lock(&nmp->nm_lock);
1785 }
1786 if (nmp->nm_deadto_start)
1787 nfs_mount_check_dead_timeout(nmp);
1788 if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))
1789 break;
1790 /* check monitored nodes, if necessary/possible */
1791 if (!LIST_EMPTY(&nmp->nm_monlist)) {
1792 nmp->nm_state |= NFSSTA_MONITOR_SCAN;
1793 LIST_FOREACH(np, &nmp->nm_monlist, n_monlink) {
1794 if (!(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE)))
1795 break;
1796 np->n_mflag |= NMMONSCANINPROG;
1797 lck_mtx_unlock(&nmp->nm_lock);
1798 error = nfs_getattr(np, NULL, vfs_context_kernel(), (NGA_UNCACHED|NGA_MONITOR));
1799 if (!error && ISSET(np->n_flag, NUPDATESIZE)) /* update quickly to avoid multiple events */
1800 nfs_data_update_size(np, 0);
1801 lck_mtx_lock(&nmp->nm_lock);
1802 np->n_mflag &= ~NMMONSCANINPROG;
1803 if (np->n_mflag & NMMONSCANWANT) {
1804 np->n_mflag &= ~NMMONSCANWANT;
1805 wakeup(&np->n_mflag);
1806 }
1807 if (error || !(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE)))
1808 break;
1809 }
1810 nmp->nm_state &= ~NFSSTA_MONITOR_SCAN;
1811 if (nmp->nm_state & NFSSTA_UNMOUNTING)
1812 wakeup(&nmp->nm_state); /* let unmounting thread know scan is done */
1813 }
1814 if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING))) {
1815 if (nmp->nm_deadto_start || !TAILQ_EMPTY(&nmp->nm_resendq) ||
1816 (nmp->nm_state & NFSSTA_RECOVER))
1817 ts.tv_sec = 1;
1818 else
1819 ts.tv_sec = 5;
1820 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts);
1821 }
1822 }
1823
1824 /* If we're unmounting, send the unmount RPC, if requested/appropriate. */
1825 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) &&
1826 (nmp->nm_state & NFSSTA_MOUNTED) && NMFLAG(nmp, CALLUMNT) &&
1827 (nmp->nm_vers < NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) {
1828 lck_mtx_unlock(&nmp->nm_lock);
1829 nfs3_umount_rpc(nmp, vfs_context_kernel(),
1830 (nmp->nm_sockflags & NMSOCK_READY) ? 6 : 2);
1831 lck_mtx_lock(&nmp->nm_lock);
1832 }
1833
1834 if (nmp->nm_sockthd == thd)
1835 nmp->nm_sockthd = NULL;
1836 lck_mtx_unlock(&nmp->nm_lock);
1837 wakeup(&nmp->nm_sockthd);
1838 thread_terminate(thd);
1839 }
1840
1841 /* start or wake a mount's socket thread */
1842 void
1843 nfs_mount_sock_thread_wake(struct nfsmount *nmp)
1844 {
1845 if (nmp->nm_sockthd)
1846 wakeup(&nmp->nm_sockthd);
1847 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS)
1848 thread_deallocate(nmp->nm_sockthd);
1849 }
1850
1851 /*
1852 * Check if we should mark the mount dead because the
1853 * unresponsive mount has reached the dead timeout.
1854 * (must be called with nmp locked)
1855 */
1856 void
1857 nfs_mount_check_dead_timeout(struct nfsmount *nmp)
1858 {
1859 struct timeval now;
1860
1861 if (nmp->nm_deadto_start == 0)
1862 return;
1863 if (nmp->nm_state & NFSSTA_DEAD)
1864 return;
1865 nfs_is_squishy(nmp);
1866 if (nmp->nm_curdeadtimeout <= 0)
1867 return;
1868 microuptime(&now);
1869 if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_curdeadtimeout)
1870 return;
1871 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname,
1872 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : "");
1873 nmp->nm_state |= NFSSTA_DEAD;
1874 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0);
1875 }
1876
1877 /*
1878 * NFS callback channel socket state
1879 */
1880 struct nfs_callback_socket
1881 {
1882 TAILQ_ENTRY(nfs_callback_socket) ncbs_link;
1883 socket_t ncbs_so; /* the socket */
1884 struct sockaddr_storage ncbs_saddr; /* socket address */
1885 struct nfs_rpc_record_state ncbs_rrs; /* RPC record parsing state */
1886 time_t ncbs_stamp; /* last accessed at */
1887 uint32_t ncbs_flags; /* see below */
1888 };
1889 #define NCBSOCK_UPCALL 0x0001
1890 #define NCBSOCK_UPCALLWANT 0x0002
1891 #define NCBSOCK_DEAD 0x0004
1892
1893 /*
1894 * NFS callback channel state
1895 *
1896 * One listening socket for accepting socket connections from servers and
1897 * a list of connected sockets to handle callback requests on.
1898 * Mounts registered with the callback channel are assigned IDs and
1899 * put on a list so that the callback request handling code can match
1900 * the requests up with mounts.
1901 */
1902 socket_t nfs4_cb_so = NULL;
1903 socket_t nfs4_cb_so6 = NULL;
1904 in_port_t nfs4_cb_port = 0;
1905 in_port_t nfs4_cb_port6 = 0;
1906 uint32_t nfs4_cb_id = 0;
1907 uint32_t nfs4_cb_so_usecount = 0;
1908 TAILQ_HEAD(nfs4_cb_sock_list,nfs_callback_socket) nfs4_cb_socks;
1909 TAILQ_HEAD(nfs4_cb_mount_list,nfsmount) nfs4_cb_mounts;
1910
1911 int nfs4_cb_handler(struct nfs_callback_socket *, mbuf_t);
1912
1913 /*
1914 * Set up the callback channel for the NFS mount.
1915 *
1916 * Initializes the callback channel socket state and
1917 * assigns a callback ID to the mount.
1918 */
1919 void
1920 nfs4_mount_callback_setup(struct nfsmount *nmp)
1921 {
1922 struct sockaddr_in sin;
1923 struct sockaddr_in6 sin6;
1924 socket_t so = NULL;
1925 socket_t so6 = NULL;
1926 struct timeval timeo;
1927 int error, on = 1;
1928 in_port_t port;
1929
1930 lck_mtx_lock(nfs_global_mutex);
1931 if (nfs4_cb_id == 0) {
1932 TAILQ_INIT(&nfs4_cb_mounts);
1933 TAILQ_INIT(&nfs4_cb_socks);
1934 nfs4_cb_id++;
1935 }
1936 nmp->nm_cbid = nfs4_cb_id++;
1937 if (nmp->nm_cbid == 0)
1938 nmp->nm_cbid = nfs4_cb_id++;
1939 nfs4_cb_so_usecount++;
1940 TAILQ_INSERT_HEAD(&nfs4_cb_mounts, nmp, nm_cblink);
1941
1942 if (nfs4_cb_so) {
1943 lck_mtx_unlock(nfs_global_mutex);
1944 return;
1945 }
1946
1947 /* IPv4 */
1948 error = sock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so);
1949 if (error) {
1950 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv4 socket\n", error);
1951 goto fail;
1952 }
1953 so = nfs4_cb_so;
1954
1955 sock_setsockopt(so, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
1956 sin.sin_len = sizeof(struct sockaddr_in);
1957 sin.sin_family = AF_INET;
1958 sin.sin_addr.s_addr = htonl(INADDR_ANY);
1959 sin.sin_port = htons(nfs_callback_port); /* try to use specified port */
1960 error = sock_bind(so, (struct sockaddr *)&sin);
1961 if (error) {
1962 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv4 socket\n", error);
1963 goto fail;
1964 }
1965 error = sock_getsockname(so, (struct sockaddr *)&sin, sin.sin_len);
1966 if (error) {
1967 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv4 socket port\n", error);
1968 goto fail;
1969 }
1970 nfs4_cb_port = ntohs(sin.sin_port);
1971
1972 error = sock_listen(so, 32);
1973 if (error) {
1974 log(LOG_INFO, "nfs callback setup: error %d on IPv4 listen\n", error);
1975 goto fail;
1976 }
1977
1978 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
1979 timeo.tv_usec = 0;
1980 timeo.tv_sec = 60;
1981 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
1982 if (error)
1983 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket rx timeout\n", error);
1984 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
1985 if (error)
1986 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket tx timeout\n", error);
1987 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
1988 sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
1989 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
1990 error = 0;
1991
1992 /* IPv6 */
1993 error = sock_socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so6);
1994 if (error) {
1995 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv6 socket\n", error);
1996 goto fail;
1997 }
1998 so6 = nfs4_cb_so6;
1999
2000 sock_setsockopt(so6, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
2001 sock_setsockopt(so6, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on));
2002 /* try to use specified port or same port as IPv4 */
2003 port = nfs_callback_port ? nfs_callback_port : nfs4_cb_port;
2004 ipv6_bind_again:
2005 sin6.sin6_len = sizeof(struct sockaddr_in6);
2006 sin6.sin6_family = AF_INET6;
2007 sin6.sin6_addr = in6addr_any;
2008 sin6.sin6_port = htons(port);
2009 error = sock_bind(so6, (struct sockaddr *)&sin6);
2010 if (error) {
2011 if (port != nfs_callback_port) {
2012 /* if we simply tried to match the IPv4 port, then try any port */
2013 port = 0;
2014 goto ipv6_bind_again;
2015 }
2016 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv6 socket\n", error);
2017 goto fail;
2018 }
2019 error = sock_getsockname(so6, (struct sockaddr *)&sin6, sin6.sin6_len);
2020 if (error) {
2021 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv6 socket port\n", error);
2022 goto fail;
2023 }
2024 nfs4_cb_port6 = ntohs(sin6.sin6_port);
2025
2026 error = sock_listen(so6, 32);
2027 if (error) {
2028 log(LOG_INFO, "nfs callback setup: error %d on IPv6 listen\n", error);
2029 goto fail;
2030 }
2031
2032 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2033 timeo.tv_usec = 0;
2034 timeo.tv_sec = 60;
2035 error = sock_setsockopt(so6, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
2036 if (error)
2037 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket rx timeout\n", error);
2038 error = sock_setsockopt(so6, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
2039 if (error)
2040 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket tx timeout\n", error);
2041 sock_setsockopt(so6, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2042 sock_setsockopt(so6, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
2043 sock_setsockopt(so6, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
2044 error = 0;
2045
2046 fail:
2047 if (error) {
2048 nfs4_cb_so = nfs4_cb_so6 = NULL;
2049 lck_mtx_unlock(nfs_global_mutex);
2050 if (so) {
2051 sock_shutdown(so, SHUT_RDWR);
2052 sock_close(so);
2053 }
2054 if (so6) {
2055 sock_shutdown(so6, SHUT_RDWR);
2056 sock_close(so6);
2057 }
2058 } else {
2059 lck_mtx_unlock(nfs_global_mutex);
2060 }
2061 }
2062
2063 /*
2064 * Shut down the callback channel for the NFS mount.
2065 *
2066 * Clears the mount's callback ID and releases the mounts
2067 * reference on the callback socket. Last reference dropped
2068 * will also shut down the callback socket(s).
2069 */
2070 void
2071 nfs4_mount_callback_shutdown(struct nfsmount *nmp)
2072 {
2073 struct nfs_callback_socket *ncbsp;
2074 socket_t so, so6;
2075 struct nfs4_cb_sock_list cb_socks;
2076 struct timespec ts = {1,0};
2077
2078 lck_mtx_lock(nfs_global_mutex);
2079 TAILQ_REMOVE(&nfs4_cb_mounts, nmp, nm_cblink);
2080 /* wait for any callbacks in progress to complete */
2081 while (nmp->nm_cbrefs)
2082 msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait", &ts);
2083 nmp->nm_cbid = 0;
2084 if (--nfs4_cb_so_usecount) {
2085 lck_mtx_unlock(nfs_global_mutex);
2086 return;
2087 }
2088 so = nfs4_cb_so;
2089 so6 = nfs4_cb_so6;
2090 nfs4_cb_so = nfs4_cb_so6 = NULL;
2091 TAILQ_INIT(&cb_socks);
2092 TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link);
2093 lck_mtx_unlock(nfs_global_mutex);
2094 if (so) {
2095 sock_shutdown(so, SHUT_RDWR);
2096 sock_close(so);
2097 }
2098 if (so6) {
2099 sock_shutdown(so6, SHUT_RDWR);
2100 sock_close(so6);
2101 }
2102 while ((ncbsp = TAILQ_FIRST(&cb_socks))) {
2103 TAILQ_REMOVE(&cb_socks, ncbsp, ncbs_link);
2104 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
2105 sock_close(ncbsp->ncbs_so);
2106 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs);
2107 FREE(ncbsp, M_TEMP);
2108 }
2109 }
2110
2111 /*
2112 * Check periodically for stale/unused nfs callback sockets
2113 */
2114 #define NFS4_CB_TIMER_PERIOD 30
2115 #define NFS4_CB_IDLE_MAX 300
2116 void
2117 nfs4_callback_timer(__unused void *param0, __unused void *param1)
2118 {
2119 struct nfs_callback_socket *ncbsp, *nextncbsp;
2120 struct timeval now;
2121
2122 loop:
2123 lck_mtx_lock(nfs_global_mutex);
2124 if (TAILQ_EMPTY(&nfs4_cb_socks)) {
2125 nfs4_callback_timer_on = 0;
2126 lck_mtx_unlock(nfs_global_mutex);
2127 return;
2128 }
2129 microuptime(&now);
2130 TAILQ_FOREACH_SAFE(ncbsp, &nfs4_cb_socks, ncbs_link, nextncbsp) {
2131 if (!(ncbsp->ncbs_flags & NCBSOCK_DEAD) &&
2132 (now.tv_sec < (ncbsp->ncbs_stamp + NFS4_CB_IDLE_MAX)))
2133 continue;
2134 TAILQ_REMOVE(&nfs4_cb_socks, ncbsp, ncbs_link);
2135 lck_mtx_unlock(nfs_global_mutex);
2136 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
2137 sock_close(ncbsp->ncbs_so);
2138 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs);
2139 FREE(ncbsp, M_TEMP);
2140 goto loop;
2141 }
2142 nfs4_callback_timer_on = 1;
2143 nfs_interval_timer_start(nfs4_callback_timer_call,
2144 NFS4_CB_TIMER_PERIOD * 1000);
2145 lck_mtx_unlock(nfs_global_mutex);
2146 }
2147
2148 /*
2149 * Accept a new callback socket.
2150 */
2151 void
2152 nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag)
2153 {
2154 socket_t newso = NULL;
2155 struct nfs_callback_socket *ncbsp;
2156 struct nfsmount *nmp;
2157 struct timeval timeo, now;
2158 int error, on = 1, ip;
2159
2160 if (so == nfs4_cb_so)
2161 ip = 4;
2162 else if (so == nfs4_cb_so6)
2163 ip = 6;
2164 else
2165 return;
2166
2167 /* allocate/initialize a new nfs_callback_socket */
2168 MALLOC(ncbsp, struct nfs_callback_socket *, sizeof(struct nfs_callback_socket), M_TEMP, M_WAITOK);
2169 if (!ncbsp) {
2170 log(LOG_ERR, "nfs callback accept: no memory for new socket\n");
2171 return;
2172 }
2173 bzero(ncbsp, sizeof(*ncbsp));
2174 ncbsp->ncbs_saddr.ss_len = (ip == 4) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
2175 nfs_rpc_record_state_init(&ncbsp->ncbs_rrs);
2176
2177 /* accept a new socket */
2178 error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_saddr,
2179 ncbsp->ncbs_saddr.ss_len, MSG_DONTWAIT,
2180 nfs4_cb_rcv, ncbsp, &newso);
2181 if (error) {
2182 log(LOG_INFO, "nfs callback accept: error %d accepting IPv%d socket\n", error, ip);
2183 FREE(ncbsp, M_TEMP);
2184 return;
2185 }
2186
2187 /* set up the new socket */
2188 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2189 timeo.tv_usec = 0;
2190 timeo.tv_sec = 60;
2191 error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
2192 if (error)
2193 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket rx timeout\n", error, ip);
2194 error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
2195 if (error)
2196 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket tx timeout\n", error, ip);
2197 sock_setsockopt(newso, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2198 sock_setsockopt(newso, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
2199 sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
2200 sock_setsockopt(newso, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
2201
2202 ncbsp->ncbs_so = newso;
2203 microuptime(&now);
2204 ncbsp->ncbs_stamp = now.tv_sec;
2205
2206 lck_mtx_lock(nfs_global_mutex);
2207
2208 /* add it to the list */
2209 TAILQ_INSERT_HEAD(&nfs4_cb_socks, ncbsp, ncbs_link);
2210
2211 /* verify it's from a host we have mounted */
2212 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
2213 /* check if socket's source address matches this mount's server address */
2214 if (!nmp->nm_saddr)
2215 continue;
2216 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0)
2217 break;
2218 }
2219 if (!nmp) /* we don't want this socket, mark it dead */
2220 ncbsp->ncbs_flags |= NCBSOCK_DEAD;
2221
2222 /* make sure the callback socket cleanup timer is running */
2223 /* (shorten the timer if we've got a socket we don't want) */
2224 if (!nfs4_callback_timer_on) {
2225 nfs4_callback_timer_on = 1;
2226 nfs_interval_timer_start(nfs4_callback_timer_call,
2227 !nmp ? 500 : (NFS4_CB_TIMER_PERIOD * 1000));
2228 } else if (!nmp && (nfs4_callback_timer_on < 2)) {
2229 nfs4_callback_timer_on = 2;
2230 thread_call_cancel(nfs4_callback_timer_call);
2231 nfs_interval_timer_start(nfs4_callback_timer_call, 500);
2232 }
2233
2234 lck_mtx_unlock(nfs_global_mutex);
2235 }
2236
2237 /*
2238 * Receive mbufs from callback sockets into RPC records and process each record.
2239 * Detect connection has been closed and shut down.
2240 */
2241 void
2242 nfs4_cb_rcv(socket_t so, void *arg, __unused int waitflag)
2243 {
2244 struct nfs_callback_socket *ncbsp = arg;
2245 struct timespec ts = {1,0};
2246 struct timeval now;
2247 mbuf_t m;
2248 int error = 0, recv = 1;
2249
2250 lck_mtx_lock(nfs_global_mutex);
2251 while (ncbsp->ncbs_flags & NCBSOCK_UPCALL) {
2252 /* wait if upcall is already in progress */
2253 ncbsp->ncbs_flags |= NCBSOCK_UPCALLWANT;
2254 msleep(ncbsp, nfs_global_mutex, PSOCK, "cbupcall", &ts);
2255 }
2256 ncbsp->ncbs_flags |= NCBSOCK_UPCALL;
2257 lck_mtx_unlock(nfs_global_mutex);
2258
2259 /* loop while we make error-free progress */
2260 while (!error && recv) {
2261 error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, MSG_DONTWAIT, &recv, &m);
2262 if (m) /* handle the request */
2263 error = nfs4_cb_handler(ncbsp, m);
2264 }
2265
2266 /* note: no error and no data indicates server closed its end */
2267 if ((error != EWOULDBLOCK) && (error || !recv)) {
2268 /*
2269 * Socket is either being closed or should be.
2270 * We can't close the socket in the context of the upcall.
2271 * So we mark it as dead and leave it for the cleanup timer to reap.
2272 */
2273 ncbsp->ncbs_stamp = 0;
2274 ncbsp->ncbs_flags |= NCBSOCK_DEAD;
2275 } else {
2276 microuptime(&now);
2277 ncbsp->ncbs_stamp = now.tv_sec;
2278 }
2279
2280 lck_mtx_lock(nfs_global_mutex);
2281 ncbsp->ncbs_flags &= ~NCBSOCK_UPCALL;
2282 lck_mtx_unlock(nfs_global_mutex);
2283 wakeup(ncbsp);
2284 }
2285
2286 /*
2287 * Handle an NFS callback channel request.
2288 */
2289 int
2290 nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
2291 {
2292 socket_t so = ncbsp->ncbs_so;
2293 struct nfsm_chain nmreq, nmrep;
2294 mbuf_t mhead = NULL, mrest = NULL, m;
2295 struct msghdr msg;
2296 struct nfsmount *nmp;
2297 fhandle_t fh;
2298 nfsnode_t np;
2299 nfs_stateid stateid;
2300 uint32_t bitmap[NFS_ATTR_BITMAP_LEN], rbitmap[NFS_ATTR_BITMAP_LEN], bmlen, truncate, attrbytes;
2301 uint32_t val, xid, procnum, taglen, cbid, numops, op, status;
2302 uint32_t auth_type, auth_len;
2303 uint32_t numres, *pnumres;
2304 int error = 0, replen, len;
2305 size_t sentlen = 0;
2306
2307 xid = numops = op = status = procnum = taglen = cbid = 0;
2308
2309 nfsm_chain_dissect_init(error, &nmreq, mreq);
2310 nfsm_chain_get_32(error, &nmreq, xid); // RPC XID
2311 nfsm_chain_get_32(error, &nmreq, val); // RPC Call
2312 nfsm_assert(error, (val == RPC_CALL), EBADRPC);
2313 nfsm_chain_get_32(error, &nmreq, val); // RPC Version
2314 nfsm_assert(error, (val == RPC_VER2), ERPCMISMATCH);
2315 nfsm_chain_get_32(error, &nmreq, val); // RPC Program Number
2316 nfsm_assert(error, (val == NFS4_CALLBACK_PROG), EPROGUNAVAIL);
2317 nfsm_chain_get_32(error, &nmreq, val); // NFS Callback Program Version Number
2318 nfsm_assert(error, (val == NFS4_CALLBACK_PROG_VERSION), EPROGMISMATCH);
2319 nfsm_chain_get_32(error, &nmreq, procnum); // NFS Callback Procedure Number
2320 nfsm_assert(error, (procnum <= NFSPROC4_CB_COMPOUND), EPROCUNAVAIL);
2321
2322 /* Handle authentication */
2323 /* XXX just ignore auth for now - handling kerberos may be tricky */
2324 nfsm_chain_get_32(error, &nmreq, auth_type); // RPC Auth Flavor
2325 nfsm_chain_get_32(error, &nmreq, auth_len); // RPC Auth Length
2326 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC);
2327 if (!error && (auth_len > 0))
2328 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len));
2329 nfsm_chain_adv(error, &nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
2330 nfsm_chain_get_32(error, &nmreq, auth_len); // verifier length
2331 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC);
2332 if (!error && (auth_len > 0))
2333 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len));
2334 if (error) {
2335 status = error;
2336 error = 0;
2337 goto nfsmout;
2338 }
2339
2340 switch (procnum) {
2341 case NFSPROC4_CB_NULL:
2342 status = NFSERR_RETVOID;
2343 break;
2344 case NFSPROC4_CB_COMPOUND:
2345 /* tag, minorversion, cb ident, numops, op array */
2346 nfsm_chain_get_32(error, &nmreq, taglen); /* tag length */
2347 nfsm_assert(error, (val <= NFS4_OPAQUE_LIMIT), EBADRPC);
2348
2349 /* start building the body of the response */
2350 nfsm_mbuf_get(error, &mrest, nfsm_rndup(taglen) + 5*NFSX_UNSIGNED);
2351 nfsm_chain_init(&nmrep, mrest);
2352
2353 /* copy tag from request to response */
2354 nfsm_chain_add_32(error, &nmrep, taglen); /* tag length */
2355 for (len = (int)taglen; !error && (len > 0); len -= NFSX_UNSIGNED) {
2356 nfsm_chain_get_32(error, &nmreq, val);
2357 nfsm_chain_add_32(error, &nmrep, val);
2358 }
2359
2360 /* insert number of results placeholder */
2361 numres = 0;
2362 nfsm_chain_add_32(error, &nmrep, numres);
2363 pnumres = (uint32_t*)(nmrep.nmc_ptr - NFSX_UNSIGNED);
2364
2365 nfsm_chain_get_32(error, &nmreq, val); /* minorversion */
2366 nfsm_assert(error, (val == 0), NFSERR_MINOR_VERS_MISMATCH);
2367 nfsm_chain_get_32(error, &nmreq, cbid); /* callback ID */
2368 nfsm_chain_get_32(error, &nmreq, numops); /* number of operations */
2369 if (error) {
2370 if ((error == EBADRPC) || (error == NFSERR_MINOR_VERS_MISMATCH))
2371 status = error;
2372 else if ((error == ENOBUFS) || (error == ENOMEM))
2373 status = NFSERR_RESOURCE;
2374 else
2375 status = NFSERR_SERVERFAULT;
2376 error = 0;
2377 nfsm_chain_null(&nmrep);
2378 goto nfsmout;
2379 }
2380 /* match the callback ID to a registered mount */
2381 lck_mtx_lock(nfs_global_mutex);
2382 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
2383 if (nmp->nm_cbid != cbid)
2384 continue;
2385 /* verify socket's source address matches this mount's server address */
2386 if (!nmp->nm_saddr)
2387 continue;
2388 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0)
2389 break;
2390 }
2391 /* mark the NFS mount as busy */
2392 if (nmp)
2393 nmp->nm_cbrefs++;
2394 lck_mtx_unlock(nfs_global_mutex);
2395 if (!nmp) {
2396 /* if no mount match, just drop socket. */
2397 error = EPERM;
2398 nfsm_chain_null(&nmrep);
2399 goto out;
2400 }
2401
2402 /* process ops, adding results to mrest */
2403 while (numops > 0) {
2404 numops--;
2405 nfsm_chain_get_32(error, &nmreq, op);
2406 if (error)
2407 break;
2408 switch (op) {
2409 case NFS_OP_CB_GETATTR:
2410 // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS)
2411 np = NULL;
2412 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh);
2413 bmlen = NFS_ATTR_BITMAP_LEN;
2414 nfsm_chain_get_bitmap(error, &nmreq, bitmap, bmlen);
2415 if (error) {
2416 status = error;
2417 error = 0;
2418 numops = 0; /* don't process any more ops */
2419 } else {
2420 /* find the node for the file handle */
2421 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
2422 if (error || !np) {
2423 status = NFSERR_BADHANDLE;
2424 error = 0;
2425 np = NULL;
2426 numops = 0; /* don't process any more ops */
2427 }
2428 }
2429 nfsm_chain_add_32(error, &nmrep, op);
2430 nfsm_chain_add_32(error, &nmrep, status);
2431 if (!error && (status == EBADRPC))
2432 error = status;
2433 if (np) {
2434 /* only allow returning size, change, and mtime attrs */
2435 NFS_CLEAR_ATTRIBUTES(&rbitmap);
2436 attrbytes = 0;
2437 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) {
2438 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_CHANGE);
2439 attrbytes += 2 * NFSX_UNSIGNED;
2440 }
2441 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) {
2442 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_SIZE);
2443 attrbytes += 2 * NFSX_UNSIGNED;
2444 }
2445 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) {
2446 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_TIME_MODIFY);
2447 attrbytes += 3 * NFSX_UNSIGNED;
2448 }
2449 nfsm_chain_add_bitmap(error, &nmrep, rbitmap, NFS_ATTR_BITMAP_LEN);
2450 nfsm_chain_add_32(error, &nmrep, attrbytes);
2451 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE))
2452 nfsm_chain_add_64(error, &nmrep,
2453 np->n_vattr.nva_change + ((np->n_flag & NMODIFIED) ? 1 : 0));
2454 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE))
2455 nfsm_chain_add_64(error, &nmrep, np->n_size);
2456 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) {
2457 nfsm_chain_add_64(error, &nmrep, np->n_vattr.nva_timesec[NFSTIME_MODIFY]);
2458 nfsm_chain_add_32(error, &nmrep, np->n_vattr.nva_timensec[NFSTIME_MODIFY]);
2459 }
2460 nfs_node_unlock(np);
2461 vnode_put(NFSTOV(np));
2462 np = NULL;
2463 }
2464 /*
2465 * If we hit an error building the reply, we can't easily back up.
2466 * So we'll just update the status and hope the server ignores the
2467 * extra garbage.
2468 */
2469 break;
2470 case NFS_OP_CB_RECALL:
2471 // (STATEID, TRUNCATE, FH) -> (STATUS)
2472 np = NULL;
2473 nfsm_chain_get_stateid(error, &nmreq, &stateid);
2474 nfsm_chain_get_32(error, &nmreq, truncate);
2475 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh);
2476 if (error) {
2477 status = error;
2478 error = 0;
2479 numops = 0; /* don't process any more ops */
2480 } else {
2481 /* find the node for the file handle */
2482 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
2483 if (error || !np) {
2484 status = NFSERR_BADHANDLE;
2485 error = 0;
2486 np = NULL;
2487 numops = 0; /* don't process any more ops */
2488 } else if (!(np->n_openflags & N_DELEG_MASK) ||
2489 bcmp(&np->n_dstateid, &stateid, sizeof(stateid))) {
2490 /* delegation stateid state doesn't match */
2491 status = NFSERR_BAD_STATEID;
2492 numops = 0; /* don't process any more ops */
2493 }
2494 if (!status) /* add node to recall queue, and wake socket thread */
2495 nfs4_delegation_return_enqueue(np);
2496 if (np) {
2497 nfs_node_unlock(np);
2498 vnode_put(NFSTOV(np));
2499 }
2500 }
2501 nfsm_chain_add_32(error, &nmrep, op);
2502 nfsm_chain_add_32(error, &nmrep, status);
2503 if (!error && (status == EBADRPC))
2504 error = status;
2505 break;
2506 case NFS_OP_CB_ILLEGAL:
2507 default:
2508 nfsm_chain_add_32(error, &nmrep, NFS_OP_CB_ILLEGAL);
2509 status = NFSERR_OP_ILLEGAL;
2510 nfsm_chain_add_32(error, &nmrep, status);
2511 numops = 0; /* don't process any more ops */
2512 break;
2513 }
2514 numres++;
2515 }
2516
2517 if (!status && error) {
2518 if (error == EBADRPC)
2519 status = error;
2520 else if ((error == ENOBUFS) || (error == ENOMEM))
2521 status = NFSERR_RESOURCE;
2522 else
2523 status = NFSERR_SERVERFAULT;
2524 error = 0;
2525 }
2526
2527 /* Now, set the numres field */
2528 *pnumres = txdr_unsigned(numres);
2529 nfsm_chain_build_done(error, &nmrep);
2530 nfsm_chain_null(&nmrep);
2531
2532 /* drop the callback reference on the mount */
2533 lck_mtx_lock(nfs_global_mutex);
2534 nmp->nm_cbrefs--;
2535 if (!nmp->nm_cbid)
2536 wakeup(&nmp->nm_cbrefs);
2537 lck_mtx_unlock(nfs_global_mutex);
2538 break;
2539 }
2540
2541 nfsmout:
2542 if (status == EBADRPC)
2543 OSAddAtomic64(1, &nfsstats.rpcinvalid);
2544
2545 /* build reply header */
2546 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mhead);
2547 nfsm_chain_init(&nmrep, mhead);
2548 nfsm_chain_add_32(error, &nmrep, 0); /* insert space for an RPC record mark */
2549 nfsm_chain_add_32(error, &nmrep, xid);
2550 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
2551 if ((status == ERPCMISMATCH) || (status & NFSERR_AUTHERR)) {
2552 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
2553 if (status & NFSERR_AUTHERR) {
2554 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
2555 nfsm_chain_add_32(error, &nmrep, (status & ~NFSERR_AUTHERR));
2556 } else {
2557 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
2558 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2559 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2560 }
2561 } else {
2562 /* reply status */
2563 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
2564 /* XXX RPCAUTH_NULL verifier */
2565 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
2566 nfsm_chain_add_32(error, &nmrep, 0);
2567 /* accepted status */
2568 switch (status) {
2569 case EPROGUNAVAIL:
2570 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
2571 break;
2572 case EPROGMISMATCH:
2573 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
2574 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION);
2575 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION);
2576 break;
2577 case EPROCUNAVAIL:
2578 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
2579 break;
2580 case EBADRPC:
2581 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
2582 break;
2583 default:
2584 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
2585 if (status != NFSERR_RETVOID)
2586 nfsm_chain_add_32(error, &nmrep, status);
2587 break;
2588 }
2589 }
2590 nfsm_chain_build_done(error, &nmrep);
2591 if (error) {
2592 nfsm_chain_null(&nmrep);
2593 goto out;
2594 }
2595 error = mbuf_setnext(nmrep.nmc_mcur, mrest);
2596 if (error) {
2597 printf("nfs cb: mbuf_setnext failed %d\n", error);
2598 goto out;
2599 }
2600 mrest = NULL;
2601 /* Calculate the size of the reply */
2602 replen = 0;
2603 for (m = nmrep.nmc_mhead; m; m = mbuf_next(m))
2604 replen += mbuf_len(m);
2605 mbuf_pkthdr_setlen(mhead, replen);
2606 error = mbuf_pkthdr_setrcvif(mhead, NULL);
2607 nfsm_chain_set_recmark(error, &nmrep, (replen - NFSX_UNSIGNED) | 0x80000000);
2608 nfsm_chain_null(&nmrep);
2609
2610 /* send the reply */
2611 bzero(&msg, sizeof(msg));
2612 error = sock_sendmbuf(so, &msg, mhead, 0, &sentlen);
2613 mhead = NULL;
2614 if (!error && ((int)sentlen != replen))
2615 error = EWOULDBLOCK;
2616 if (error == EWOULDBLOCK) /* inability to send response is considered fatal */
2617 error = ETIMEDOUT;
2618 out:
2619 if (error)
2620 nfsm_chain_cleanup(&nmrep);
2621 if (mhead)
2622 mbuf_freem(mhead);
2623 if (mrest)
2624 mbuf_freem(mrest);
2625 if (mreq)
2626 mbuf_freem(mreq);
2627 return (error);
2628 }
2629
2630
2631 /*
2632 * Initialize an nfs_rpc_record_state structure.
2633 */
2634 void
2635 nfs_rpc_record_state_init(struct nfs_rpc_record_state *nrrsp)
2636 {
2637 bzero(nrrsp, sizeof(*nrrsp));
2638 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft);
2639 }
2640
2641 /*
2642 * Clean up an nfs_rpc_record_state structure.
2643 */
2644 void
2645 nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state *nrrsp)
2646 {
2647 if (nrrsp->nrrs_m) {
2648 mbuf_freem(nrrsp->nrrs_m);
2649 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL;
2650 }
2651 }
2652
2653 /*
2654 * Read the next (marked) RPC record from the socket.
2655 *
2656 * *recvp returns if any data was received.
2657 * *mp returns the next complete RPC record
2658 */
2659 int
2660 nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int flags, int *recvp, mbuf_t *mp)
2661 {
2662 struct iovec aio;
2663 struct msghdr msg;
2664 size_t rcvlen;
2665 int error = 0;
2666 mbuf_t m;
2667
2668 *recvp = 0;
2669 *mp = NULL;
2670
2671 /* read the TCP RPC record marker */
2672 while (!error && nrrsp->nrrs_markerleft) {
2673 aio.iov_base = ((char*)&nrrsp->nrrs_fragleft +
2674 sizeof(nrrsp->nrrs_fragleft) - nrrsp->nrrs_markerleft);
2675 aio.iov_len = nrrsp->nrrs_markerleft;
2676 bzero(&msg, sizeof(msg));
2677 msg.msg_iov = &aio;
2678 msg.msg_iovlen = 1;
2679 error = sock_receive(so, &msg, flags, &rcvlen);
2680 if (error || !rcvlen)
2681 break;
2682 *recvp = 1;
2683 nrrsp->nrrs_markerleft -= rcvlen;
2684 if (nrrsp->nrrs_markerleft)
2685 continue;
2686 /* record marker complete */
2687 nrrsp->nrrs_fragleft = ntohl(nrrsp->nrrs_fragleft);
2688 if (nrrsp->nrrs_fragleft & 0x80000000) {
2689 nrrsp->nrrs_lastfrag = 1;
2690 nrrsp->nrrs_fragleft &= ~0x80000000;
2691 }
2692 nrrsp->nrrs_reclen += nrrsp->nrrs_fragleft;
2693 if (nrrsp->nrrs_reclen > NFS_MAXPACKET) {
2694 /* This is SERIOUS! We are out of sync with the sender. */
2695 log(LOG_ERR, "impossible RPC record length (%d) on callback", nrrsp->nrrs_reclen);
2696 error = EFBIG;
2697 }
2698 }
2699
2700 /* read the TCP RPC record fragment */
2701 while (!error && !nrrsp->nrrs_markerleft && nrrsp->nrrs_fragleft) {
2702 m = NULL;
2703 rcvlen = nrrsp->nrrs_fragleft;
2704 error = sock_receivembuf(so, NULL, &m, flags, &rcvlen);
2705 if (error || !rcvlen || !m)
2706 break;
2707 *recvp = 1;
2708 /* append mbufs to list */
2709 nrrsp->nrrs_fragleft -= rcvlen;
2710 if (!nrrsp->nrrs_m) {
2711 nrrsp->nrrs_m = m;
2712 } else {
2713 error = mbuf_setnext(nrrsp->nrrs_mlast, m);
2714 if (error) {
2715 printf("nfs tcp rcv: mbuf_setnext failed %d\n", error);
2716 mbuf_freem(m);
2717 break;
2718 }
2719 }
2720 while (mbuf_next(m))
2721 m = mbuf_next(m);
2722 nrrsp->nrrs_mlast = m;
2723 }
2724
2725 /* done reading fragment? */
2726 if (!error && !nrrsp->nrrs_markerleft && !nrrsp->nrrs_fragleft) {
2727 /* reset socket fragment parsing state */
2728 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft);
2729 if (nrrsp->nrrs_lastfrag) {
2730 /* RPC record complete */
2731 *mp = nrrsp->nrrs_m;
2732 /* reset socket record parsing state */
2733 nrrsp->nrrs_reclen = 0;
2734 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL;
2735 nrrsp->nrrs_lastfrag = 0;
2736 }
2737 }
2738
2739 return (error);
2740 }
2741
2742
2743
2744 /*
2745 * The NFS client send routine.
2746 *
2747 * Send the given NFS request out the mount's socket.
2748 * Holds nfs_sndlock() for the duration of this call.
2749 *
2750 * - check for request termination (sigintr)
2751 * - wait for reconnect, if necessary
2752 * - UDP: check the congestion window
2753 * - make a copy of the request to send
2754 * - UDP: update the congestion window
2755 * - send the request
2756 *
2757 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
2758 * rexmit count is also updated if this isn't the first send.
2759 *
2760 * If the send is not successful, make sure R_MUSTRESEND is set.
2761 * If this wasn't the first transmit, set R_RESENDERR.
2762 * Also, undo any UDP congestion window changes made.
2763 *
2764 * If the error appears to indicate that the socket should
2765 * be reconnected, mark the socket for reconnection.
2766 *
2767 * Only return errors when the request should be aborted.
2768 */
2769 int
2770 nfs_send(struct nfsreq *req, int wait)
2771 {
2772 struct nfsmount *nmp;
2773 struct nfs_socket *nso;
2774 int error, error2, sotype, rexmit, slpflag = 0, needrecon;
2775 struct msghdr msg;
2776 struct sockaddr *sendnam;
2777 mbuf_t mreqcopy;
2778 size_t sentlen = 0;
2779 struct timespec ts = { 2, 0 };
2780
2781 again:
2782 error = nfs_sndlock(req);
2783 if (error) {
2784 lck_mtx_lock(&req->r_mtx);
2785 req->r_error = error;
2786 req->r_flags &= ~R_SENDING;
2787 lck_mtx_unlock(&req->r_mtx);
2788 return (error);
2789 }
2790
2791 error = nfs_sigintr(req->r_nmp, req, NULL, 0);
2792 if (error) {
2793 nfs_sndunlock(req);
2794 lck_mtx_lock(&req->r_mtx);
2795 req->r_error = error;
2796 req->r_flags &= ~R_SENDING;
2797 lck_mtx_unlock(&req->r_mtx);
2798 return (error);
2799 }
2800 nmp = req->r_nmp;
2801 sotype = nmp->nm_sotype;
2802
2803 /*
2804 * If it's a setup RPC but we're not in SETUP... must need reconnect.
2805 * If it's a recovery RPC but the socket's not ready... must need reconnect.
2806 */
2807 if (((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) ||
2808 ((req->r_flags & R_RECOVER) && !(nmp->nm_sockflags & NMSOCK_READY))) {
2809 error = ETIMEDOUT;
2810 nfs_sndunlock(req);
2811 lck_mtx_lock(&req->r_mtx);
2812 req->r_error = error;
2813 req->r_flags &= ~R_SENDING;
2814 lck_mtx_unlock(&req->r_mtx);
2815 return (error);
2816 }
2817
2818 /* If the socket needs reconnection, do that now. */
2819 /* wait until socket is ready - unless this request is part of setup */
2820 lck_mtx_lock(&nmp->nm_lock);
2821 if (!(nmp->nm_sockflags & NMSOCK_READY) &&
2822 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) {
2823 if (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR))
2824 slpflag |= PCATCH;
2825 lck_mtx_unlock(&nmp->nm_lock);
2826 nfs_sndunlock(req);
2827 if (!wait) {
2828 lck_mtx_lock(&req->r_mtx);
2829 req->r_flags &= ~R_SENDING;
2830 req->r_flags |= R_MUSTRESEND;
2831 req->r_rtt = 0;
2832 lck_mtx_unlock(&req->r_mtx);
2833 return (0);
2834 }
2835 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req->r_xid));
2836 lck_mtx_lock(&req->r_mtx);
2837 req->r_flags &= ~R_MUSTRESEND;
2838 req->r_rtt = 0;
2839 lck_mtx_unlock(&req->r_mtx);
2840 lck_mtx_lock(&nmp->nm_lock);
2841 while (!(nmp->nm_sockflags & NMSOCK_READY)) {
2842 /* don't bother waiting if the socket thread won't be reconnecting it */
2843 if (nmp->nm_state & NFSSTA_FORCE) {
2844 error = EIO;
2845 break;
2846 }
2847 if (NMFLAG(nmp, SOFT) && (nmp->nm_reconnect_start > 0)) {
2848 struct timeval now;
2849 microuptime(&now);
2850 if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) {
2851 /* soft mount in reconnect for a while... terminate ASAP */
2852 OSAddAtomic64(1, &nfsstats.rpctimeouts);
2853 req->r_flags |= R_SOFTTERM;
2854 req->r_error = error = ETIMEDOUT;
2855 break;
2856 }
2857 }
2858 /* make sure socket thread is running, then wait */
2859 nfs_mount_sock_thread_wake(nmp);
2860 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
2861 break;
2862 msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait", &ts);
2863 slpflag = 0;
2864 }
2865 lck_mtx_unlock(&nmp->nm_lock);
2866 if (error) {
2867 lck_mtx_lock(&req->r_mtx);
2868 req->r_error = error;
2869 req->r_flags &= ~R_SENDING;
2870 lck_mtx_unlock(&req->r_mtx);
2871 return (error);
2872 }
2873 goto again;
2874 }
2875 nso = nmp->nm_nso;
2876 /* note that we're using the mount's socket to do the send */
2877 nmp->nm_state |= NFSSTA_SENDING; /* will be cleared by nfs_sndunlock() */
2878 lck_mtx_unlock(&nmp->nm_lock);
2879 if (!nso) {
2880 nfs_sndunlock(req);
2881 lck_mtx_lock(&req->r_mtx);
2882 req->r_flags &= ~R_SENDING;
2883 req->r_flags |= R_MUSTRESEND;
2884 req->r_rtt = 0;
2885 lck_mtx_unlock(&req->r_mtx);
2886 return (0);
2887 }
2888
2889 lck_mtx_lock(&req->r_mtx);
2890 rexmit = (req->r_flags & R_SENT);
2891
2892 if (sotype == SOCK_DGRAM) {
2893 lck_mtx_lock(&nmp->nm_lock);
2894 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) {
2895 /* if we can't send this out yet, wait on the cwnd queue */
2896 slpflag = (NMFLAG(nmp, INTR) && req->r_thread) ? PCATCH : 0;
2897 lck_mtx_unlock(&nmp->nm_lock);
2898 nfs_sndunlock(req);
2899 req->r_flags &= ~R_SENDING;
2900 req->r_flags |= R_MUSTRESEND;
2901 lck_mtx_unlock(&req->r_mtx);
2902 if (!wait) {
2903 req->r_rtt = 0;
2904 return (0);
2905 }
2906 lck_mtx_lock(&nmp->nm_lock);
2907 while (nmp->nm_sent >= nmp->nm_cwnd) {
2908 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
2909 break;
2910 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain);
2911 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts);
2912 slpflag = 0;
2913 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) {
2914 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
2915 req->r_cchain.tqe_next = NFSREQNOLIST;
2916 }
2917 }
2918 lck_mtx_unlock(&nmp->nm_lock);
2919 goto again;
2920 }
2921 /*
2922 * We update these *before* the send to avoid racing
2923 * against others who may be looking to send requests.
2924 */
2925 if (!rexmit) {
2926 /* first transmit */
2927 req->r_flags |= R_CWND;
2928 nmp->nm_sent += NFS_CWNDSCALE;
2929 } else {
2930 /*
2931 * When retransmitting, turn timing off
2932 * and divide congestion window by 2.
2933 */
2934 req->r_flags &= ~R_TIMING;
2935 nmp->nm_cwnd >>= 1;
2936 if (nmp->nm_cwnd < NFS_CWNDSCALE)
2937 nmp->nm_cwnd = NFS_CWNDSCALE;
2938 }
2939 lck_mtx_unlock(&nmp->nm_lock);
2940 }
2941
2942 req->r_flags &= ~R_MUSTRESEND;
2943 lck_mtx_unlock(&req->r_mtx);
2944
2945 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL,
2946 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy);
2947 if (error) {
2948 if (wait)
2949 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error);
2950 nfs_sndunlock(req);
2951 lck_mtx_lock(&req->r_mtx);
2952 req->r_flags &= ~R_SENDING;
2953 req->r_flags |= R_MUSTRESEND;
2954 req->r_rtt = 0;
2955 lck_mtx_unlock(&req->r_mtx);
2956 return (0);
2957 }
2958
2959 bzero(&msg, sizeof(msg));
2960 if ((sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so) && ((sendnam = nmp->nm_saddr))) {
2961 msg.msg_name = (caddr_t)sendnam;
2962 msg.msg_namelen = sendnam->sa_len;
2963 }
2964 error = sock_sendmbuf(nso->nso_so, &msg, mreqcopy, 0, &sentlen);
2965 #ifdef NFS_SOCKET_DEBUGGING
2966 if (error || (sentlen != req->r_mreqlen))
2967 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
2968 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error));
2969 #endif
2970 if (!error && (sentlen != req->r_mreqlen))
2971 error = EWOULDBLOCK;
2972 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen));
2973
2974 lck_mtx_lock(&req->r_mtx);
2975 req->r_flags &= ~R_SENDING;
2976 req->r_rtt = 0;
2977 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT))
2978 req->r_rexmit = NFS_MAXREXMIT;
2979
2980 if (!error) {
2981 /* SUCCESS */
2982 req->r_flags &= ~R_RESENDERR;
2983 if (rexmit)
2984 OSAddAtomic64(1, &nfsstats.rpcretries);
2985 req->r_flags |= R_SENT;
2986 if (req->r_flags & R_WAITSENT) {
2987 req->r_flags &= ~R_WAITSENT;
2988 wakeup(req);
2989 }
2990 nfs_sndunlock(req);
2991 lck_mtx_unlock(&req->r_mtx);
2992 return (0);
2993 }
2994
2995 /* send failed */
2996 req->r_flags |= R_MUSTRESEND;
2997 if (rexmit)
2998 req->r_flags |= R_RESENDERR;
2999 if ((error == EINTR) || (error == ERESTART))
3000 req->r_error = error;
3001 lck_mtx_unlock(&req->r_mtx);
3002
3003 if (sotype == SOCK_DGRAM) {
3004 /*
3005 * Note: even though a first send may fail, we consider
3006 * the request sent for congestion window purposes.
3007 * So we don't need to undo any of the changes made above.
3008 */
3009 /*
3010 * Socket errors ignored for connectionless sockets??
3011 * For now, ignore them all
3012 */
3013 if ((error != EINTR) && (error != ERESTART) &&
3014 (error != EWOULDBLOCK) && (error != EIO) && (nso == nmp->nm_nso)) {
3015 int clearerror = 0, optlen = sizeof(clearerror);
3016 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
3017 #ifdef NFS_SOCKET_DEBUGGING
3018 if (clearerror)
3019 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
3020 error, clearerror));
3021 #endif
3022 }
3023 }
3024
3025 /* check if it appears we should reconnect the socket */
3026 switch (error) {
3027 case EWOULDBLOCK:
3028 /* if send timed out, reconnect if on TCP */
3029 if (sotype != SOCK_STREAM)
3030 break;
3031 case EPIPE:
3032 case EADDRNOTAVAIL:
3033 case ENETDOWN:
3034 case ENETUNREACH:
3035 case ENETRESET:
3036 case ECONNABORTED:
3037 case ECONNRESET:
3038 case ENOTCONN:
3039 case ESHUTDOWN:
3040 case ECONNREFUSED:
3041 case EHOSTDOWN:
3042 case EHOSTUNREACH:
3043 needrecon = 1;
3044 break;
3045 }
3046 if (needrecon && (nso == nmp->nm_nso)) { /* mark socket as needing reconnect */
3047 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error));
3048 nfs_need_reconnect(nmp);
3049 }
3050
3051 nfs_sndunlock(req);
3052
3053 /*
3054 * Don't log some errors:
3055 * EPIPE errors may be common with servers that drop idle connections.
3056 * EADDRNOTAVAIL may occur on network transitions.
3057 * ENOTCONN may occur under some network conditions.
3058 */
3059 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN))
3060 error = 0;
3061 if (error && (error != EINTR) && (error != ERESTART))
3062 log(LOG_INFO, "nfs send error %d for server %s\n", error,
3063 !req->r_nmp ? "<unmounted>" :
3064 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
3065
3066 if (nfs_is_dead(error, nmp))
3067 error = EIO;
3068
3069 /* prefer request termination error over other errors */
3070 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
3071 if (error2)
3072 error = error2;
3073
3074 /* only allow the following errors to be returned */
3075 if ((error != EINTR) && (error != ERESTART) && (error != EIO) &&
3076 (error != ENXIO) && (error != ETIMEDOUT))
3077 error = 0;
3078 return (error);
3079 }
3080
3081 /*
3082 * NFS client socket upcalls
3083 *
3084 * Pull RPC replies out of an NFS mount's socket and match them
3085 * up with the pending request.
3086 *
3087 * The datagram code is simple because we always get whole
3088 * messages out of the socket.
3089 *
3090 * The stream code is more involved because we have to parse
3091 * the RPC records out of the stream.
3092 */
3093
3094 /* NFS client UDP socket upcall */
3095 void
3096 nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag)
3097 {
3098 struct nfsmount *nmp = arg;
3099 struct nfs_socket *nso = nmp->nm_nso;
3100 size_t rcvlen;
3101 mbuf_t m;
3102 int error = 0;
3103
3104 if (nmp->nm_sockflags & NMSOCK_CONNECTING)
3105 return;
3106
3107 do {
3108 /* make sure we're on the current socket */
3109 if (!nso || (nso->nso_so != so))
3110 return;
3111
3112 m = NULL;
3113 rcvlen = 1000000;
3114 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
3115 if (m)
3116 nfs_request_match_reply(nmp, m);
3117 } while (m && !error);
3118
3119 if (error && (error != EWOULDBLOCK)) {
3120 /* problems with the socket... mark for reconnection */
3121 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error));
3122 nfs_need_reconnect(nmp);
3123 }
3124 }
3125
3126 /* NFS client TCP socket upcall */
3127 void
3128 nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag)
3129 {
3130 struct nfsmount *nmp = arg;
3131 struct nfs_socket *nso = nmp->nm_nso;
3132 struct nfs_rpc_record_state nrrs;
3133 mbuf_t m;
3134 int error = 0;
3135 int recv = 1;
3136
3137 if (nmp->nm_sockflags & NMSOCK_CONNECTING)
3138 return;
3139
3140 /* make sure we're on the current socket */
3141 lck_mtx_lock(&nmp->nm_lock);
3142 nso = nmp->nm_nso;
3143 if (!nso || (nso->nso_so != so) || (nmp->nm_sockflags & (NMSOCK_DISCONNECTING))) {
3144 lck_mtx_unlock(&nmp->nm_lock);
3145 return;
3146 }
3147 lck_mtx_unlock(&nmp->nm_lock);
3148
3149 /* make sure this upcall should be trying to do work */
3150 lck_mtx_lock(&nso->nso_lock);
3151 if (nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) {
3152 lck_mtx_unlock(&nso->nso_lock);
3153 return;
3154 }
3155 nso->nso_flags |= NSO_UPCALL;
3156 nrrs = nso->nso_rrs;
3157 lck_mtx_unlock(&nso->nso_lock);
3158
3159 /* loop while we make error-free progress */
3160 while (!error && recv) {
3161 error = nfs_rpc_record_read(so, &nrrs, MSG_DONTWAIT, &recv, &m);
3162 if (m) /* match completed response with request */
3163 nfs_request_match_reply(nmp, m);
3164 }
3165
3166 lck_mtx_lock(&nmp->nm_lock);
3167 if (nmp->nm_nso == nso) {
3168 /* still the same socket, so update socket's RPC parsing state */
3169 lck_mtx_unlock(&nmp->nm_lock);
3170 lck_mtx_lock(&nso->nso_lock);
3171 nso->nso_rrs = nrrs;
3172 nso->nso_flags &= ~NSO_UPCALL;
3173 lck_mtx_unlock(&nso->nso_lock);
3174 if (nmp->nm_sockflags & NMSOCK_DISCONNECTING)
3175 wakeup(&nmp->nm_sockflags);
3176 } else {
3177 lck_mtx_unlock(&nmp->nm_lock);
3178 }
3179 #ifdef NFS_SOCKET_DEBUGGING
3180 if (!recv && (error != EWOULDBLOCK))
3181 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error));
3182 #endif
3183 /* note: no error and no data indicates server closed its end */
3184 if ((error != EWOULDBLOCK) && (error || !recv)) {
3185 /* problems with the socket... mark for reconnection */
3186 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error));
3187 nfs_need_reconnect(nmp);
3188 }
3189 }
3190
3191 /*
3192 * "poke" a socket to try to provoke any pending errors
3193 */
3194 void
3195 nfs_sock_poke(struct nfsmount *nmp)
3196 {
3197 struct iovec aio;
3198 struct msghdr msg;
3199 size_t len;
3200 int error = 0;
3201 int dummy;
3202
3203 lck_mtx_lock(&nmp->nm_lock);
3204 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) ||
3205 !(nmp->nm_sockflags & NMSOCK_READY) || !nmp->nm_nso || !nmp->nm_nso->nso_so) {
3206 lck_mtx_unlock(&nmp->nm_lock);
3207 return;
3208 }
3209 lck_mtx_unlock(&nmp->nm_lock);
3210 aio.iov_base = &dummy;
3211 aio.iov_len = 0;
3212 len = 0;
3213 bzero(&msg, sizeof(msg));
3214 msg.msg_iov = &aio;
3215 msg.msg_iovlen = 1;
3216 error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len);
3217 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error));
3218 nfs_is_dead(error, nmp);
3219 }
3220
3221 /*
3222 * Match an RPC reply with the corresponding request
3223 */
3224 void
3225 nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
3226 {
3227 struct nfsreq *req;
3228 struct nfsm_chain nmrep;
3229 u_int32_t reply = 0, rxid = 0;
3230 int error = 0, asyncioq, t1;
3231
3232 /* Get the xid and check that it is an rpc reply */
3233 nfsm_chain_dissect_init(error, &nmrep, mrep);
3234 nfsm_chain_get_32(error, &nmrep, rxid);
3235 nfsm_chain_get_32(error, &nmrep, reply);
3236 if (error || (reply != RPC_REPLY)) {
3237 OSAddAtomic64(1, &nfsstats.rpcinvalid);
3238 mbuf_freem(mrep);
3239 return;
3240 }
3241
3242 /*
3243 * Loop through the request list to match up the reply
3244 * Iff no match, just drop it.
3245 */
3246 lck_mtx_lock(nfs_request_mutex);
3247 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
3248 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid)))
3249 continue;
3250 /* looks like we have it, grab lock and double check */
3251 lck_mtx_lock(&req->r_mtx);
3252 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
3253 lck_mtx_unlock(&req->r_mtx);
3254 continue;
3255 }
3256 /* Found it.. */
3257 req->r_nmrep = nmrep;
3258 lck_mtx_lock(&nmp->nm_lock);
3259 if (nmp->nm_sotype == SOCK_DGRAM) {
3260 /*
3261 * Update congestion window.
3262 * Do the additive increase of one rpc/rtt.
3263 */
3264 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
3265 if (nmp->nm_cwnd <= nmp->nm_sent) {
3266 nmp->nm_cwnd +=
3267 ((NFS_CWNDSCALE * NFS_CWNDSCALE) +
3268 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
3269 if (nmp->nm_cwnd > NFS_MAXCWND)
3270 nmp->nm_cwnd = NFS_MAXCWND;
3271 }
3272 if (req->r_flags & R_CWND) {
3273 nmp->nm_sent -= NFS_CWNDSCALE;
3274 req->r_flags &= ~R_CWND;
3275 }
3276 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
3277 /* congestion window is open, poke the cwnd queue */
3278 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
3279 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
3280 req2->r_cchain.tqe_next = NFSREQNOLIST;
3281 wakeup(req2);
3282 }
3283 }
3284 /*
3285 * Update rtt using a gain of 0.125 on the mean
3286 * and a gain of 0.25 on the deviation.
3287 */
3288 if (req->r_flags & R_TIMING) {
3289 /*
3290 * Since the timer resolution of
3291 * NFS_HZ is so course, it can often
3292 * result in r_rtt == 0. Since
3293 * r_rtt == N means that the actual
3294 * rtt is between N+dt and N+2-dt ticks,
3295 * add 1.
3296 */
3297 if (proct[req->r_procnum] == 0)
3298 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum);
3299 t1 = req->r_rtt + 1;
3300 t1 -= (NFS_SRTT(req) >> 3);
3301 NFS_SRTT(req) += t1;
3302 if (t1 < 0)
3303 t1 = -t1;
3304 t1 -= (NFS_SDRTT(req) >> 2);
3305 NFS_SDRTT(req) += t1;
3306 }
3307 nmp->nm_timeouts = 0;
3308 lck_mtx_unlock(&nmp->nm_lock);
3309 /* signal anyone waiting on this request */
3310 wakeup(req);
3311 asyncioq = (req->r_callback.rcb_func != NULL);
3312 if (nfs_request_using_gss(req))
3313 nfs_gss_clnt_rpcdone(req);
3314 lck_mtx_unlock(&req->r_mtx);
3315 lck_mtx_unlock(nfs_request_mutex);
3316 /* if it's an async RPC with a callback, queue it up */
3317 if (asyncioq)
3318 nfs_asyncio_finish(req);
3319 break;
3320 }
3321
3322 if (!req) {
3323 /* not matched to a request, so drop it. */
3324 lck_mtx_unlock(nfs_request_mutex);
3325 OSAddAtomic64(1, &nfsstats.rpcunexpected);
3326 mbuf_freem(mrep);
3327 }
3328 }
3329
3330 /*
3331 * Wait for the reply for a given request...
3332 * ...potentially resending the request if necessary.
3333 */
3334 int
3335 nfs_wait_reply(struct nfsreq *req)
3336 {
3337 struct timespec ts = { 2, 0 };
3338 int error = 0, slpflag, first = 1;
3339
3340 if (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR))
3341 slpflag = PCATCH;
3342 else
3343 slpflag = 0;
3344
3345 lck_mtx_lock(&req->r_mtx);
3346 while (!req->r_nmrep.nmc_mhead) {
3347 if ((error = nfs_sigintr(req->r_nmp, req, first ? NULL : req->r_thread, 0)))
3348 break;
3349 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
3350 break;
3351 /* check if we need to resend */
3352 if (req->r_flags & R_MUSTRESEND) {
3353 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
3354 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
3355 req->r_flags |= R_SENDING;
3356 lck_mtx_unlock(&req->r_mtx);
3357 if (nfs_request_using_gss(req)) {
3358 /*
3359 * It's an RPCSEC_GSS request.
3360 * Can't just resend the original request
3361 * without bumping the cred sequence number.
3362 * Go back and re-build the request.
3363 */
3364 lck_mtx_lock(&req->r_mtx);
3365 req->r_flags &= ~R_SENDING;
3366 lck_mtx_unlock(&req->r_mtx);
3367 return (EAGAIN);
3368 }
3369 error = nfs_send(req, 1);
3370 lck_mtx_lock(&req->r_mtx);
3371 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
3372 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error));
3373 if (error)
3374 break;
3375 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
3376 break;
3377 }
3378 /* need to poll if we're P_NOREMOTEHANG */
3379 if (nfs_noremotehang(req->r_thread))
3380 ts.tv_sec = 1;
3381 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts);
3382 first = slpflag = 0;
3383 }
3384 lck_mtx_unlock(&req->r_mtx);
3385
3386 return (error);
3387 }
3388
3389 /*
3390 * An NFS request goes something like this:
3391 * (nb: always frees up mreq mbuf list)
3392 * nfs_request_create()
3393 * - allocates a request struct if one is not provided
3394 * - initial fill-in of the request struct
3395 * nfs_request_add_header()
3396 * - add the RPC header
3397 * nfs_request_send()
3398 * - link it into list
3399 * - call nfs_send() for first transmit
3400 * nfs_request_wait()
3401 * - call nfs_wait_reply() to wait for the reply
3402 * nfs_request_finish()
3403 * - break down rpc header and return with error or nfs reply
3404 * pointed to by nmrep.
3405 * nfs_request_rele()
3406 * nfs_request_destroy()
3407 * - clean up the request struct
3408 * - free the request struct if it was allocated by nfs_request_create()
3409 */
3410
3411 /*
3412 * Set up an NFS request struct (allocating if no request passed in).
3413 */
3414 int
3415 nfs_request_create(
3416 nfsnode_t np,
3417 mount_t mp, /* used only if !np */
3418 struct nfsm_chain *nmrest,
3419 int procnum,
3420 thread_t thd,
3421 kauth_cred_t cred,
3422 struct nfsreq **reqp)
3423 {
3424 struct nfsreq *req, *newreq = NULL;
3425 struct nfsmount *nmp;
3426
3427 req = *reqp;
3428 if (!req) {
3429 /* allocate a new NFS request structure */
3430 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK);
3431 if (!newreq) {
3432 mbuf_freem(nmrest->nmc_mhead);
3433 nmrest->nmc_mhead = NULL;
3434 return (ENOMEM);
3435 }
3436 req = newreq;
3437 }
3438
3439 bzero(req, sizeof(*req));
3440 if (req == newreq)
3441 req->r_flags = R_ALLOCATED;
3442
3443 nmp = VFSTONFS(np ? NFSTOMP(np) : mp);
3444 if (!nmp) {
3445 if (newreq)
3446 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
3447 return (ENXIO);
3448 }
3449 lck_mtx_lock(&nmp->nm_lock);
3450 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
3451 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
3452 lck_mtx_unlock(&nmp->nm_lock);
3453 mbuf_freem(nmrest->nmc_mhead);
3454 nmrest->nmc_mhead = NULL;
3455 if (newreq)
3456 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
3457 return (ENXIO);
3458 }
3459
3460 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS))
3461 OSAddAtomic64(1, &nfsstats.rpccnt[procnum]);
3462 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL))
3463 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
3464
3465 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL);
3466 req->r_nmp = nmp;
3467 req->r_np = np;
3468 req->r_thread = thd;
3469 if (!thd)
3470 req->r_flags |= R_NOINTR;
3471 if (IS_VALID_CRED(cred)) {
3472 kauth_cred_ref(cred);
3473 req->r_cred = cred;
3474 }
3475 req->r_procnum = procnum;
3476 if (proct[procnum] > 0)
3477 req->r_flags |= R_TIMING;
3478 req->r_nmrep.nmc_mhead = NULL;
3479 SLIST_INIT(&req->r_gss_seqlist);
3480 req->r_achain.tqe_next = NFSREQNOLIST;
3481 req->r_rchain.tqe_next = NFSREQNOLIST;
3482 req->r_cchain.tqe_next = NFSREQNOLIST;
3483
3484 /* set auth flavor to use for request */
3485 if (!req->r_cred)
3486 req->r_auth = RPCAUTH_NONE;
3487 else if (req->r_np && (req->r_np->n_auth != RPCAUTH_INVALID))
3488 req->r_auth = req->r_np->n_auth;
3489 else
3490 req->r_auth = nmp->nm_auth;
3491
3492 lck_mtx_unlock(&nmp->nm_lock);
3493
3494 /* move the request mbuf chain to the nfsreq */
3495 req->r_mrest = nmrest->nmc_mhead;
3496 nmrest->nmc_mhead = NULL;
3497
3498 req->r_flags |= R_INITTED;
3499 req->r_refs = 1;
3500 if (newreq)
3501 *reqp = req;
3502 return (0);
3503 }
3504
3505 /*
3506 * Clean up and free an NFS request structure.
3507 */
3508 void
3509 nfs_request_destroy(struct nfsreq *req)
3510 {
3511 struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3512 struct gss_seq *gsp, *ngsp;
3513 struct timespec ts = { 1, 0 };
3514 int clearjbtimeo = 0;
3515
3516 if (!req || !(req->r_flags & R_INITTED))
3517 return;
3518 req->r_flags &= ~R_INITTED;
3519 if (req->r_lflags & RL_QUEUED)
3520 nfs_reqdequeue(req);
3521 if (req->r_achain.tqe_next != NFSREQNOLIST) {
3522 /* still on an async I/O queue? */
3523 lck_mtx_lock(nfsiod_mutex);
3524 if (nmp && (req->r_achain.tqe_next != NFSREQNOLIST)) {
3525 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
3526 req->r_achain.tqe_next = NFSREQNOLIST;
3527 }
3528 lck_mtx_unlock(nfsiod_mutex);
3529 }
3530 lck_mtx_lock(&req->r_mtx);
3531 if (nmp) {
3532 lck_mtx_lock(&nmp->nm_lock);
3533 if (req->r_flags & R_CWND) {
3534 /* Decrement the outstanding request count. */
3535 req->r_flags &= ~R_CWND;
3536 nmp->nm_sent -= NFS_CWNDSCALE;
3537 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
3538 /* congestion window is open, poke the cwnd queue */
3539 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
3540 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
3541 req2->r_cchain.tqe_next = NFSREQNOLIST;
3542 wakeup(req2);
3543 }
3544 }
3545 if (req->r_rchain.tqe_next != NFSREQNOLIST) {
3546 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
3547 req->r_rchain.tqe_next = NFSREQNOLIST;
3548 if (req->r_flags & R_RESENDQ)
3549 req->r_flags &= ~R_RESENDQ;
3550 }
3551 if (req->r_cchain.tqe_next != NFSREQNOLIST) {
3552 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
3553 req->r_cchain.tqe_next = NFSREQNOLIST;
3554 }
3555 if (req->r_flags & R_JBTPRINTFMSG) {
3556 req->r_flags &= ~R_JBTPRINTFMSG;
3557 nmp->nm_jbreqs--;
3558 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
3559 }
3560 lck_mtx_unlock(&nmp->nm_lock);
3561 }
3562 while (req->r_flags & R_RESENDQ)
3563 msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts);
3564 lck_mtx_unlock(&req->r_mtx);
3565 if (clearjbtimeo)
3566 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL);
3567 if (req->r_mhead)
3568 mbuf_freem(req->r_mhead);
3569 else if (req->r_mrest)
3570 mbuf_freem(req->r_mrest);
3571 if (req->r_nmrep.nmc_mhead)
3572 mbuf_freem(req->r_nmrep.nmc_mhead);
3573 if (IS_VALID_CRED(req->r_cred))
3574 kauth_cred_unref(&req->r_cred);
3575 if (nfs_request_using_gss(req))
3576 nfs_gss_clnt_rpcdone(req);
3577 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp)
3578 FREE(gsp, M_TEMP);
3579 if (req->r_gss_ctx)
3580 nfs_gss_clnt_ctx_unref(req);
3581 if (req->r_wrongsec)
3582 FREE(req->r_wrongsec, M_TEMP);
3583
3584 lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
3585 if (req->r_flags & R_ALLOCATED)
3586 FREE_ZONE(req, sizeof(*req), M_NFSREQ);
3587 }
3588
3589 void
3590 nfs_request_ref(struct nfsreq *req, int locked)
3591 {
3592 if (!locked)
3593 lck_mtx_lock(&req->r_mtx);
3594 if (req->r_refs <= 0)
3595 panic("nfsreq reference error");
3596 req->r_refs++;
3597 if (!locked)
3598 lck_mtx_unlock(&req->r_mtx);
3599 }
3600
3601 void
3602 nfs_request_rele(struct nfsreq *req)
3603 {
3604 int destroy;
3605
3606 lck_mtx_lock(&req->r_mtx);
3607 if (req->r_refs <= 0)
3608 panic("nfsreq reference underflow");
3609 req->r_refs--;
3610 destroy = (req->r_refs == 0);
3611 lck_mtx_unlock(&req->r_mtx);
3612 if (destroy)
3613 nfs_request_destroy(req);
3614 }
3615
3616
3617 /*
3618 * Add an (updated) RPC header with authorization to an NFS request.
3619 */
3620 int
3621 nfs_request_add_header(struct nfsreq *req)
3622 {
3623 struct nfsmount *nmp;
3624 int error = 0;
3625 mbuf_t m;
3626
3627 /* free up any previous header */
3628 if ((m = req->r_mhead)) {
3629 while (m && (m != req->r_mrest))
3630 m = mbuf_free(m);
3631 req->r_mhead = NULL;
3632 }
3633
3634 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3635 if (!nmp)
3636 return (ENXIO);
3637
3638 error = nfsm_rpchead(req, req->r_mrest, &req->r_xid, &req->r_mhead);
3639 if (error)
3640 return (error);
3641
3642 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead);
3643 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3644 if (!nmp)
3645 return (ENXIO);
3646 lck_mtx_lock(&nmp->nm_lock);
3647 if (NMFLAG(nmp, SOFT))
3648 req->r_retry = nmp->nm_retry;
3649 else
3650 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
3651 lck_mtx_unlock(&nmp->nm_lock);
3652
3653 return (error);
3654 }
3655
3656
3657 /*
3658 * Queue an NFS request up and send it out.
3659 */
3660 int
3661 nfs_request_send(struct nfsreq *req, int wait)
3662 {
3663 struct nfsmount *nmp;
3664 struct timeval now;
3665
3666 lck_mtx_lock(&req->r_mtx);
3667 req->r_flags |= R_SENDING;
3668 lck_mtx_unlock(&req->r_mtx);
3669
3670 lck_mtx_lock(nfs_request_mutex);
3671
3672 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3673 if (!nmp) {
3674 lck_mtx_unlock(nfs_request_mutex);
3675 return (ENXIO);
3676 }
3677
3678 microuptime(&now);
3679 if (!req->r_start) {
3680 req->r_start = now.tv_sec;
3681 req->r_lastmsg = now.tv_sec -
3682 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
3683 }
3684
3685 OSAddAtomic64(1, &nfsstats.rpcrequests);
3686
3687 /*
3688 * Chain request into list of outstanding requests. Be sure
3689 * to put it LAST so timer finds oldest requests first.
3690 * Make sure that the request queue timer is running
3691 * to check for possible request timeout.
3692 */
3693 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain);
3694 req->r_lflags |= RL_QUEUED;
3695 if (!nfs_request_timer_on) {
3696 nfs_request_timer_on = 1;
3697 nfs_interval_timer_start(nfs_request_timer_call,
3698 NFS_REQUESTDELAY);
3699 }
3700 lck_mtx_unlock(nfs_request_mutex);
3701
3702 /* Send the request... */
3703 return (nfs_send(req, wait));
3704 }
3705
3706 /*
3707 * Call nfs_wait_reply() to wait for the reply.
3708 */
3709 void
3710 nfs_request_wait(struct nfsreq *req)
3711 {
3712 req->r_error = nfs_wait_reply(req);
3713 }
3714
3715 /*
3716 * Finish up an NFS request by dequeueing it and
3717 * doing the initial NFS request reply processing.
3718 */
3719 int
3720 nfs_request_finish(
3721 struct nfsreq *req,
3722 struct nfsm_chain *nmrepp,
3723 int *status)
3724 {
3725 struct nfsmount *nmp;
3726 mbuf_t mrep;
3727 int verf_type = 0;
3728 uint32_t verf_len = 0;
3729 uint32_t reply_status = 0;
3730 uint32_t rejected_status = 0;
3731 uint32_t auth_status = 0;
3732 uint32_t accepted_status = 0;
3733 struct nfsm_chain nmrep;
3734 int error, clearjbtimeo;
3735
3736 error = req->r_error;
3737
3738 if (nmrepp)
3739 nmrepp->nmc_mhead = NULL;
3740
3741 /* RPC done, unlink the request. */
3742 nfs_reqdequeue(req);
3743
3744 mrep = req->r_nmrep.nmc_mhead;
3745
3746 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3747
3748 if ((req->r_flags & R_CWND) && nmp) {
3749 /*
3750 * Decrement the outstanding request count.
3751 */
3752 req->r_flags &= ~R_CWND;
3753 lck_mtx_lock(&nmp->nm_lock);
3754 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
3755 nmp->nm_sent -= NFS_CWNDSCALE;
3756 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
3757 /* congestion window is open, poke the cwnd queue */
3758 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
3759 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
3760 req2->r_cchain.tqe_next = NFSREQNOLIST;
3761 wakeup(req2);
3762 }
3763 lck_mtx_unlock(&nmp->nm_lock);
3764 }
3765
3766 if (nfs_request_using_gss(req)) {
3767 /*
3768 * If the request used an RPCSEC_GSS credential
3769 * then reset its sequence number bit in the
3770 * request window.
3771 */
3772 nfs_gss_clnt_rpcdone(req);
3773
3774 /*
3775 * If we need to re-send, go back and re-build the
3776 * request based on a new sequence number.
3777 * Note that we're using the original XID.
3778 */
3779 if (error == EAGAIN) {
3780 req->r_error = 0;
3781 if (mrep)
3782 mbuf_freem(mrep);
3783 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs
3784 req->r_nmrep.nmc_mhead = NULL;
3785 req->r_flags |= R_RESTART;
3786 if (error == ENEEDAUTH) {
3787 req->r_xid = 0; // get a new XID
3788 error = 0;
3789 }
3790 goto nfsmout;
3791 }
3792 }
3793
3794 /*
3795 * If there was a successful reply, make sure to mark the mount as up.
3796 * If a tprintf message was given (or if this is a timed-out soft mount)
3797 * then post a tprintf message indicating the server is alive again.
3798 */
3799 if (!error) {
3800 if ((req->r_flags & R_TPRINTFMSG) ||
3801 (nmp && NMFLAG(nmp, SOFT) &&
3802 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO)))
3803 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again");
3804 else
3805 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL);
3806 }
3807 if (!error && !nmp)
3808 error = ENXIO;
3809 nfsmout_if(error);
3810
3811 /*
3812 * break down the RPC header and check if ok
3813 */
3814 nmrep = req->r_nmrep;
3815 nfsm_chain_get_32(error, &nmrep, reply_status);
3816 nfsmout_if(error);
3817 if (reply_status == RPC_MSGDENIED) {
3818 nfsm_chain_get_32(error, &nmrep, rejected_status);
3819 nfsmout_if(error);
3820 if (rejected_status == RPC_MISMATCH) {
3821 error = ENOTSUP;
3822 goto nfsmout;
3823 }
3824 nfsm_chain_get_32(error, &nmrep, auth_status);
3825 nfsmout_if(error);
3826 switch (auth_status) {
3827 case RPCSEC_GSS_CREDPROBLEM:
3828 case RPCSEC_GSS_CTXPROBLEM:
3829 /*
3830 * An RPCSEC_GSS cred or context problem.
3831 * We can't use it anymore.
3832 * Restore the args, renew the context
3833 * and set up for a resend.
3834 */
3835 error = nfs_gss_clnt_args_restore(req);
3836 if (error && error != ENEEDAUTH)
3837 break;
3838
3839 if (!error) {
3840 error = nfs_gss_clnt_ctx_renew(req);
3841 if (error)
3842 break;
3843 }
3844 mbuf_freem(mrep);
3845 req->r_nmrep.nmc_mhead = NULL;
3846 req->r_xid = 0; // get a new XID
3847 req->r_flags |= R_RESTART;
3848 goto nfsmout;
3849 default:
3850 error = EACCES;
3851 break;
3852 }
3853 goto nfsmout;
3854 }
3855
3856 /* Now check the verifier */
3857 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor
3858 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length
3859 nfsmout_if(error);
3860
3861 switch (req->r_auth) {
3862 case RPCAUTH_NONE:
3863 case RPCAUTH_SYS:
3864 /* Any AUTH_SYS verifier is ignored */
3865 if (verf_len > 0)
3866 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
3867 nfsm_chain_get_32(error, &nmrep, accepted_status);
3868 break;
3869 case RPCAUTH_KRB5:
3870 case RPCAUTH_KRB5I:
3871 case RPCAUTH_KRB5P:
3872 error = nfs_gss_clnt_verf_get(req, &nmrep,
3873 verf_type, verf_len, &accepted_status);
3874 break;
3875 }
3876 nfsmout_if(error);
3877
3878 switch (accepted_status) {
3879 case RPC_SUCCESS:
3880 if (req->r_procnum == NFSPROC_NULL) {
3881 /*
3882 * The NFS null procedure is unique,
3883 * in not returning an NFS status.
3884 */
3885 *status = NFS_OK;
3886 } else {
3887 nfsm_chain_get_32(error, &nmrep, *status);
3888 nfsmout_if(error);
3889 }
3890
3891 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) {
3892 /*
3893 * It's a JUKEBOX error - delay and try again
3894 */
3895 int delay, slpflag = (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
3896
3897 mbuf_freem(mrep);
3898 req->r_nmrep.nmc_mhead = NULL;
3899 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) {
3900 /* we're not yet completely mounted and */
3901 /* we can't complete an RPC, so we fail */
3902 OSAddAtomic64(1, &nfsstats.rpctimeouts);
3903 nfs_softterm(req);
3904 error = req->r_error;
3905 goto nfsmout;
3906 }
3907 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2);
3908 if (req->r_delay > 30)
3909 req->r_delay = 30;
3910 if (nmp->nm_tprintf_initial_delay && (req->r_delay >= nmp->nm_tprintf_initial_delay)) {
3911 if (!(req->r_flags & R_JBTPRINTFMSG)) {
3912 req->r_flags |= R_JBTPRINTFMSG;
3913 lck_mtx_lock(&nmp->nm_lock);
3914 nmp->nm_jbreqs++;
3915 lck_mtx_unlock(&nmp->nm_lock);
3916 }
3917 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO,
3918 "resource temporarily unavailable (jukebox)");
3919 }
3920 if (NMFLAG(nmp, SOFT) && (req->r_delay == 30) && !(req->r_flags & R_NOINTR)) {
3921 /* for soft mounts, just give up after a short while */
3922 OSAddAtomic64(1, &nfsstats.rpctimeouts);
3923 nfs_softterm(req);
3924 error = req->r_error;
3925 goto nfsmout;
3926 }
3927 delay = req->r_delay;
3928 if (req->r_callback.rcb_func) {
3929 struct timeval now;
3930 microuptime(&now);
3931 req->r_resendtime = now.tv_sec + delay;
3932 } else {
3933 do {
3934 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
3935 goto nfsmout;
3936 tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0);
3937 slpflag = 0;
3938 } while (--delay > 0);
3939 }
3940 req->r_xid = 0; // get a new XID
3941 req->r_flags |= R_RESTART;
3942 req->r_start = 0;
3943 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER);
3944 return (0);
3945 }
3946
3947 if (req->r_flags & R_JBTPRINTFMSG) {
3948 req->r_flags &= ~R_JBTPRINTFMSG;
3949 lck_mtx_lock(&nmp->nm_lock);
3950 nmp->nm_jbreqs--;
3951 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
3952 lck_mtx_unlock(&nmp->nm_lock);
3953 nfs_up(nmp, req->r_thread, clearjbtimeo, "resource available again");
3954 }
3955
3956 if ((nmp->nm_vers >= NFS_VER4) && (*status == NFSERR_WRONGSEC)) {
3957 /*
3958 * Hmmm... we need to try a different security flavor.
3959 * The first time a request hits this, we will allocate an array
3960 * to track flavors to try. We fill the array with the mount's
3961 * preferred flavors or the server's preferred flavors or just the
3962 * flavors we support.
3963 */
3964 uint32_t srvflavors[NX_MAX_SEC_FLAVORS];
3965 int srvcount, i, j;
3966
3967 /* Call SECINFO to try to get list of flavors from server. */
3968 srvcount = NX_MAX_SEC_FLAVORS;
3969 nfs4_secinfo_rpc(nmp, &req->r_secinfo, req->r_cred, srvflavors, &srvcount);
3970
3971 if (!req->r_wrongsec) {
3972 /* first time... set up flavor array */
3973 MALLOC(req->r_wrongsec, uint32_t*, NX_MAX_SEC_FLAVORS*sizeof(uint32_t), M_TEMP, M_WAITOK);
3974 if (!req->r_wrongsec) {
3975 error = EACCES;
3976 goto nfsmout;
3977 }
3978 i=0;
3979 if (nmp->nm_sec.count) { /* use the mount's preferred list of flavors */
3980 for(; i < nmp->nm_sec.count; i++)
3981 req->r_wrongsec[i] = nmp->nm_sec.flavors[i];
3982 } else if (srvcount) { /* otherwise use the server's list of flavors */
3983 for(; i < srvcount; i++)
3984 req->r_wrongsec[i] = srvflavors[i];
3985 } else { /* otherwise, just try the flavors we support. */
3986 req->r_wrongsec[i++] = RPCAUTH_KRB5P;
3987 req->r_wrongsec[i++] = RPCAUTH_KRB5I;
3988 req->r_wrongsec[i++] = RPCAUTH_KRB5;
3989 req->r_wrongsec[i++] = RPCAUTH_SYS;
3990 req->r_wrongsec[i++] = RPCAUTH_NONE;
3991 }
3992 for(; i < NX_MAX_SEC_FLAVORS; i++) /* invalidate any remaining slots */
3993 req->r_wrongsec[i] = RPCAUTH_INVALID;
3994 }
3995
3996 /* clear the current flavor from the list */
3997 for(i=0; i < NX_MAX_SEC_FLAVORS; i++)
3998 if (req->r_wrongsec[i] == req->r_auth)
3999 req->r_wrongsec[i] = RPCAUTH_INVALID;
4000
4001 /* find the next flavor to try */
4002 for(i=0; i < NX_MAX_SEC_FLAVORS; i++)
4003 if (req->r_wrongsec[i] != RPCAUTH_INVALID) {
4004 if (((req->r_wrongsec[i] == RPCAUTH_KRB5P) ||
4005 (req->r_wrongsec[i] == RPCAUTH_KRB5I) ||
4006 (req->r_wrongsec[i] == RPCAUTH_KRB5)) && (req->r_gss_ctx &&
4007 (req->r_gss_ctx->gss_clnt_service == RPCSEC_GSS_SVC_SYS))) {
4008 /* don't bother trying Kerberos if we've already got a fallback context */
4009 req->r_wrongsec[i] = RPCAUTH_INVALID;
4010 continue;
4011 }
4012 if (!srvcount) /* no server list, just try it */
4013 break;
4014 /* check that it's in the server's list */
4015 for(j=0; j < srvcount; j++)
4016 if (req->r_wrongsec[i] == srvflavors[j])
4017 break;
4018 if (j < srvcount) /* found */
4019 break;
4020 /* not found in server list */
4021 req->r_wrongsec[i] = RPCAUTH_INVALID;
4022 }
4023 if (i == NX_MAX_SEC_FLAVORS) {
4024 /* nothing left to try! */
4025 error = EACCES;
4026 goto nfsmout;
4027 }
4028
4029 /* retry with the next auth flavor */
4030 req->r_auth = req->r_wrongsec[i];
4031 req->r_xid = 0; // get a new XID
4032 req->r_flags |= R_RESTART;
4033 req->r_start = 0;
4034 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_WRONGSEC);
4035 return (0);
4036 }
4037 if ((nmp->nm_vers >= NFS_VER4) && req->r_wrongsec) {
4038 /*
4039 * We renegotiated security for this request; so update the
4040 * default security flavor for the associated node.
4041 */
4042 if (req->r_np)
4043 req->r_np->n_auth = req->r_auth;
4044 }
4045
4046 if (*status == NFS_OK) {
4047 /*
4048 * Successful NFS request
4049 */
4050 *nmrepp = nmrep;
4051 req->r_nmrep.nmc_mhead = NULL;
4052 break;
4053 }
4054 /* Got an NFS error of some kind */
4055
4056 /*
4057 * If the File Handle was stale, invalidate the
4058 * lookup cache, just in case.
4059 */
4060 if ((*status == ESTALE) && req->r_np) {
4061 cache_purge(NFSTOV(req->r_np));
4062 /* if monitored, also send delete event */
4063 if (vnode_ismonitored(NFSTOV(req->r_np)))
4064 nfs_vnode_notify(req->r_np, (VNODE_EVENT_ATTRIB|VNODE_EVENT_DELETE));
4065 }
4066 if (nmp->nm_vers == NFS_VER2)
4067 mbuf_freem(mrep);
4068 else
4069 *nmrepp = nmrep;
4070 req->r_nmrep.nmc_mhead = NULL;
4071 error = 0;
4072 break;
4073 case RPC_PROGUNAVAIL:
4074 error = EPROGUNAVAIL;
4075 break;
4076 case RPC_PROGMISMATCH:
4077 error = ERPCMISMATCH;
4078 break;
4079 case RPC_PROCUNAVAIL:
4080 error = EPROCUNAVAIL;
4081 break;
4082 case RPC_GARBAGE:
4083 error = EBADRPC;
4084 break;
4085 case RPC_SYSTEM_ERR:
4086 default:
4087 error = EIO;
4088 break;
4089 }
4090 nfsmout:
4091 if (req->r_flags & R_JBTPRINTFMSG) {
4092 req->r_flags &= ~R_JBTPRINTFMSG;
4093 lck_mtx_lock(&nmp->nm_lock);
4094 nmp->nm_jbreqs--;
4095 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
4096 lck_mtx_unlock(&nmp->nm_lock);
4097 if (clearjbtimeo)
4098 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL);
4099 }
4100 FSDBG(273, R_XID32(req->r_xid), nmp, req,
4101 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error);
4102 return (error);
4103 }
4104
4105 /*
4106 * NFS request using a GSS/Kerberos security flavor?
4107 */
4108 int
4109 nfs_request_using_gss(struct nfsreq *req)
4110 {
4111 if (!req->r_gss_ctx)
4112 return (0);
4113 switch (req->r_auth) {
4114 case RPCAUTH_KRB5:
4115 case RPCAUTH_KRB5I:
4116 case RPCAUTH_KRB5P:
4117 return (1);
4118 }
4119 return (0);
4120 }
4121
4122 /*
4123 * Perform an NFS request synchronously.
4124 */
4125
4126 int
4127 nfs_request(
4128 nfsnode_t np,
4129 mount_t mp, /* used only if !np */
4130 struct nfsm_chain *nmrest,
4131 int procnum,
4132 vfs_context_t ctx,
4133 struct nfsreq_secinfo_args *si,
4134 struct nfsm_chain *nmrepp,
4135 u_int64_t *xidp,
4136 int *status)
4137 {
4138 return nfs_request2(np, mp, nmrest, procnum,
4139 vfs_context_thread(ctx), vfs_context_ucred(ctx),
4140 si, 0, nmrepp, xidp, status);
4141 }
4142
4143 int
4144 nfs_request2(
4145 nfsnode_t np,
4146 mount_t mp, /* used only if !np */
4147 struct nfsm_chain *nmrest,
4148 int procnum,
4149 thread_t thd,
4150 kauth_cred_t cred,
4151 struct nfsreq_secinfo_args *si,
4152 int flags,
4153 struct nfsm_chain *nmrepp,
4154 u_int64_t *xidp,
4155 int *status)
4156 {
4157 struct nfsreq rq, *req = &rq;
4158 int error;
4159
4160 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req)))
4161 return (error);
4162 req->r_flags |= (flags & R_OPTMASK);
4163 if (si)
4164 req->r_secinfo = *si;
4165
4166 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0);
4167 do {
4168 req->r_error = 0;
4169 req->r_flags &= ~R_RESTART;
4170 if ((error = nfs_request_add_header(req)))
4171 break;
4172 if (xidp)
4173 *xidp = req->r_xid;
4174 if ((error = nfs_request_send(req, 1)))
4175 break;
4176 nfs_request_wait(req);
4177 if ((error = nfs_request_finish(req, nmrepp, status)))
4178 break;
4179 } while (req->r_flags & R_RESTART);
4180
4181 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error);
4182 nfs_request_rele(req);
4183 return (error);
4184 }
4185
4186
4187 /*
4188 * Set up a new null proc request to exchange GSS context tokens with the
4189 * server. Associate the context that we are setting up with the request that we
4190 * are sending.
4191 */
4192
4193 int
4194 nfs_request_gss(
4195 mount_t mp,
4196 struct nfsm_chain *nmrest,
4197 thread_t thd,
4198 kauth_cred_t cred,
4199 int flags,
4200 struct nfs_gss_clnt_ctx *cp, /* Set to gss context to renew or setup */
4201 struct nfsm_chain *nmrepp,
4202 int *status)
4203 {
4204 struct nfsreq rq, *req = &rq;
4205 int error;
4206
4207 if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req)))
4208 return (error);
4209 req->r_flags |= (flags & R_OPTMASK);
4210
4211 if (cp == NULL) {
4212 printf("nfs_request_gss request has no context\n");
4213 nfs_request_rele(req);
4214 return (NFSERR_EAUTH);
4215 }
4216 nfs_gss_clnt_ctx_ref(req, cp);
4217
4218 FSDBG_TOP(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, 0);
4219 do {
4220 req->r_error = 0;
4221 req->r_flags &= ~R_RESTART;
4222 if ((error = nfs_request_add_header(req)))
4223 break;
4224
4225 if ((error = nfs_request_send(req, 1)))
4226 break;
4227 nfs_request_wait(req);
4228 if ((error = nfs_request_finish(req, nmrepp, status)))
4229 break;
4230 } while (req->r_flags & R_RESTART);
4231
4232 FSDBG_BOT(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, error);
4233 nfs_request_rele(req);
4234 return (error);
4235 }
4236
4237 /*
4238 * Create and start an asynchronous NFS request.
4239 */
4240 int
4241 nfs_request_async(
4242 nfsnode_t np,
4243 mount_t mp, /* used only if !np */
4244 struct nfsm_chain *nmrest,
4245 int procnum,
4246 thread_t thd,
4247 kauth_cred_t cred,
4248 struct nfsreq_secinfo_args *si,
4249 int flags,
4250 struct nfsreq_cbinfo *cb,
4251 struct nfsreq **reqp)
4252 {
4253 struct nfsreq *req;
4254 struct nfsmount *nmp;
4255 int error, sent;
4256
4257 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp);
4258 req = *reqp;
4259 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error);
4260 if (error)
4261 return (error);
4262 req->r_flags |= (flags & R_OPTMASK);
4263 req->r_flags |= R_ASYNC;
4264 if (si)
4265 req->r_secinfo = *si;
4266 if (cb)
4267 req->r_callback = *cb;
4268 error = nfs_request_add_header(req);
4269 if (!error) {
4270 req->r_flags |= R_WAITSENT;
4271 if (req->r_callback.rcb_func)
4272 nfs_request_ref(req, 0);
4273 error = nfs_request_send(req, 1);
4274 lck_mtx_lock(&req->r_mtx);
4275 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) {
4276 /* make sure to wait until this async I/O request gets sent */
4277 int slpflag = (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
4278 struct timespec ts = { 2, 0 };
4279 while (!(req->r_flags & R_SENT)) {
4280 if ((req->r_flags & R_RESENDQ) && ((nmp = req->r_nmp))) {
4281 lck_mtx_lock(&nmp->nm_lock);
4282 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) {
4283 /*
4284 * It's not going to get off the resend queue if we're in recovery.
4285 * So, just take it off ourselves. We could be holding mount state
4286 * busy and thus holding up the start of recovery.
4287 */
4288 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
4289 req->r_rchain.tqe_next = NFSREQNOLIST;
4290 if (req->r_flags & R_RESENDQ)
4291 req->r_flags &= ~R_RESENDQ;
4292 lck_mtx_unlock(&nmp->nm_lock);
4293 req->r_flags |= R_SENDING;
4294 lck_mtx_unlock(&req->r_mtx);
4295 error = nfs_send(req, 1);
4296 lck_mtx_lock(&req->r_mtx);
4297 if (error)
4298 break;
4299 continue;
4300 }
4301 lck_mtx_unlock(&nmp->nm_lock);
4302 }
4303 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
4304 break;
4305 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts);
4306 slpflag = 0;
4307 }
4308 }
4309 sent = req->r_flags & R_SENT;
4310 lck_mtx_unlock(&req->r_mtx);
4311 if (error && req->r_callback.rcb_func && !sent)
4312 nfs_request_rele(req);
4313 }
4314 FSDBG(274, R_XID32(req->r_xid), np, procnum, error);
4315 if (error || req->r_callback.rcb_func)
4316 nfs_request_rele(req);
4317 return (error);
4318 }
4319
4320 /*
4321 * Wait for and finish an asynchronous NFS request.
4322 */
4323 int
4324 nfs_request_async_finish(
4325 struct nfsreq *req,
4326 struct nfsm_chain *nmrepp,
4327 u_int64_t *xidp,
4328 int *status)
4329 {
4330 int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0;
4331 struct nfsmount *nmp;
4332
4333 lck_mtx_lock(&req->r_mtx);
4334 if (!asyncio)
4335 req->r_flags |= R_ASYNCWAIT;
4336 while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */
4337 struct timespec ts = { 2, 0 };
4338 if ((nmp = req->r_nmp)) {
4339 lck_mtx_lock(&nmp->nm_lock);
4340 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) {
4341 /*
4342 * It's not going to get off the resend queue if we're in recovery.
4343 * So, just take it off ourselves. We could be holding mount state
4344 * busy and thus holding up the start of recovery.
4345 */
4346 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
4347 req->r_rchain.tqe_next = NFSREQNOLIST;
4348 if (req->r_flags & R_RESENDQ)
4349 req->r_flags &= ~R_RESENDQ;
4350 lck_mtx_unlock(&nmp->nm_lock);
4351 break;
4352 }
4353 lck_mtx_unlock(&nmp->nm_lock);
4354 }
4355 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
4356 break;
4357 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", &ts);
4358 }
4359 lck_mtx_unlock(&req->r_mtx);
4360
4361 if (!error) {
4362 nfs_request_wait(req);
4363 error = nfs_request_finish(req, nmrepp, status);
4364 }
4365
4366 while (!error && (req->r_flags & R_RESTART)) {
4367 if (asyncio && req->r_resendtime) { /* send later */
4368 lck_mtx_lock(&req->r_mtx);
4369 nfs_asyncio_resend(req);
4370 lck_mtx_unlock(&req->r_mtx);
4371 return (EINPROGRESS);
4372 }
4373 req->r_error = 0;
4374 req->r_flags &= ~R_RESTART;
4375 if ((error = nfs_request_add_header(req)))
4376 break;
4377 if ((error = nfs_request_send(req, !asyncio)))
4378 break;
4379 if (asyncio)
4380 return (EINPROGRESS);
4381 nfs_request_wait(req);
4382 if ((error = nfs_request_finish(req, nmrepp, status)))
4383 break;
4384 }
4385 if (xidp)
4386 *xidp = req->r_xid;
4387
4388 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error);
4389 nfs_request_rele(req);
4390 return (error);
4391 }
4392
4393 /*
4394 * Cancel a pending asynchronous NFS request.
4395 */
4396 void
4397 nfs_request_async_cancel(struct nfsreq *req)
4398 {
4399 nfs_reqdequeue(req);
4400 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E);
4401 nfs_request_rele(req);
4402 }
4403
4404 /*
4405 * Flag a request as being terminated.
4406 */
4407 void
4408 nfs_softterm(struct nfsreq *req)
4409 {
4410 struct nfsmount *nmp = req->r_nmp;
4411 req->r_flags |= R_SOFTTERM;
4412 req->r_error = ETIMEDOUT;
4413 if (!(req->r_flags & R_CWND) || !nmp)
4414 return;
4415 /* update congestion window */
4416 req->r_flags &= ~R_CWND;
4417 lck_mtx_lock(&nmp->nm_lock);
4418 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
4419 nmp->nm_sent -= NFS_CWNDSCALE;
4420 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
4421 /* congestion window is open, poke the cwnd queue */
4422 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
4423 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
4424 req2->r_cchain.tqe_next = NFSREQNOLIST;
4425 wakeup(req2);
4426 }
4427 lck_mtx_unlock(&nmp->nm_lock);
4428 }
4429
4430 /*
4431 * Ensure req isn't in use by the timer, then dequeue it.
4432 */
4433 void
4434 nfs_reqdequeue(struct nfsreq *req)
4435 {
4436 lck_mtx_lock(nfs_request_mutex);
4437 while (req->r_lflags & RL_BUSY) {
4438 req->r_lflags |= RL_WAITING;
4439 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL);
4440 }
4441 if (req->r_lflags & RL_QUEUED) {
4442 TAILQ_REMOVE(&nfs_reqq, req, r_chain);
4443 req->r_lflags &= ~RL_QUEUED;
4444 }
4445 lck_mtx_unlock(nfs_request_mutex);
4446 }
4447
4448 /*
4449 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
4450 * free()'d out from under it.
4451 */
4452 void
4453 nfs_reqbusy(struct nfsreq *req)
4454 {
4455 if (req->r_lflags & RL_BUSY)
4456 panic("req locked");
4457 req->r_lflags |= RL_BUSY;
4458 }
4459
4460 /*
4461 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
4462 */
4463 struct nfsreq *
4464 nfs_reqnext(struct nfsreq *req)
4465 {
4466 struct nfsreq * nextreq;
4467
4468 if (req == NULL)
4469 return (NULL);
4470 /*
4471 * We need to get and busy the next req before signalling the
4472 * current one, otherwise wakeup() may block us and we'll race to
4473 * grab the next req.
4474 */
4475 nextreq = TAILQ_NEXT(req, r_chain);
4476 if (nextreq != NULL)
4477 nfs_reqbusy(nextreq);
4478 /* unbusy and signal. */
4479 req->r_lflags &= ~RL_BUSY;
4480 if (req->r_lflags & RL_WAITING) {
4481 req->r_lflags &= ~RL_WAITING;
4482 wakeup(&req->r_lflags);
4483 }
4484 return (nextreq);
4485 }
4486
4487 /*
4488 * NFS request queue timer routine
4489 *
4490 * Scan the NFS request queue for any requests that have timed out.
4491 *
4492 * Alert the system of unresponsive servers.
4493 * Mark expired requests on soft mounts as terminated.
4494 * For UDP, mark/signal requests for retransmission.
4495 */
4496 void
4497 nfs_request_timer(__unused void *param0, __unused void *param1)
4498 {
4499 struct nfsreq *req;
4500 struct nfsmount *nmp;
4501 int timeo, maxtime, finish_asyncio, error;
4502 struct timeval now;
4503 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue;
4504
4505 lck_mtx_lock(nfs_request_mutex);
4506 req = TAILQ_FIRST(&nfs_reqq);
4507 if (req == NULL) { /* no requests - turn timer off */
4508 nfs_request_timer_on = 0;
4509 lck_mtx_unlock(nfs_request_mutex);
4510 return;
4511 }
4512
4513 nfs_reqbusy(req);
4514 TAILQ_INIT(&nfs_mount_poke_queue);
4515
4516 microuptime(&now);
4517 for ( ; req != NULL ; req = nfs_reqnext(req)) {
4518 nmp = req->r_nmp;
4519 if (!nmp) /* unmounted */
4520 continue;
4521 if (req->r_error || req->r_nmrep.nmc_mhead)
4522 continue;
4523 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) {
4524 if (req->r_callback.rcb_func != NULL) {
4525 /* async I/O RPC needs to be finished */
4526 lck_mtx_lock(&req->r_mtx);
4527 req->r_error = error;
4528 finish_asyncio = !(req->r_flags & R_WAITSENT);
4529 wakeup(req);
4530 lck_mtx_unlock(&req->r_mtx);
4531 if (finish_asyncio)
4532 nfs_asyncio_finish(req);
4533 }
4534 continue;
4535 }
4536
4537 lck_mtx_lock(&req->r_mtx);
4538
4539 if (nmp->nm_tprintf_initial_delay &&
4540 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) &&
4541 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
4542 req->r_lastmsg = now.tv_sec;
4543 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
4544 "not responding");
4545 req->r_flags |= R_TPRINTFMSG;
4546 lck_mtx_lock(&nmp->nm_lock);
4547 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
4548 lck_mtx_unlock(&nmp->nm_lock);
4549 /* we're not yet completely mounted and */
4550 /* we can't complete an RPC, so we fail */
4551 OSAddAtomic64(1, &nfsstats.rpctimeouts);
4552 nfs_softterm(req);
4553 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
4554 wakeup(req);
4555 lck_mtx_unlock(&req->r_mtx);
4556 if (finish_asyncio)
4557 nfs_asyncio_finish(req);
4558 continue;
4559 }
4560 lck_mtx_unlock(&nmp->nm_lock);
4561 }
4562
4563 /*
4564 * Put a reasonable limit on the maximum timeout,
4565 * and reduce that limit when soft mounts get timeouts or are in reconnect.
4566 */
4567 if (!NMFLAG(nmp, SOFT) && !nfs_can_squish(nmp))
4568 maxtime = NFS_MAXTIMEO;
4569 else if ((req->r_flags & (R_SETUP|R_RECOVER)) ||
4570 ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
4571 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2;
4572 else
4573 maxtime = NFS_MINTIMEO/4;
4574
4575 /*
4576 * Check for request timeout.
4577 */
4578 if (req->r_rtt >= 0) {
4579 req->r_rtt++;
4580 lck_mtx_lock(&nmp->nm_lock);
4581 if (req->r_flags & R_RESENDERR) {
4582 /* with resend errors, retry every few seconds */
4583 timeo = 4*hz;
4584 } else {
4585 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL)
4586 timeo = NFS_MINIDEMTIMEO; // gss context setup
4587 else if (NMFLAG(nmp, DUMBTIMER))
4588 timeo = nmp->nm_timeo;
4589 else
4590 timeo = NFS_RTO(nmp, proct[req->r_procnum]);
4591
4592 /* ensure 62.5 ms floor */
4593 while (16 * timeo < hz)
4594 timeo *= 2;
4595 if (nmp->nm_timeouts > 0)
4596 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
4597 }
4598 /* limit timeout to max */
4599 if (timeo > maxtime)
4600 timeo = maxtime;
4601 if (req->r_rtt <= timeo) {
4602 lck_mtx_unlock(&nmp->nm_lock);
4603 lck_mtx_unlock(&req->r_mtx);
4604 continue;
4605 }
4606 /* The request has timed out */
4607 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
4608 req->r_procnum, proct[req->r_procnum],
4609 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts,
4610 (now.tv_sec - req->r_start)*NFS_HZ, maxtime));
4611 if (nmp->nm_timeouts < 8)
4612 nmp->nm_timeouts++;
4613 nfs_mount_check_dead_timeout(nmp);
4614 /* if it's been a few seconds, try poking the socket */
4615 if ((nmp->nm_sotype == SOCK_STREAM) &&
4616 ((now.tv_sec - req->r_start) >= 3) &&
4617 !(nmp->nm_sockflags & (NMSOCK_POKE|NMSOCK_UNMOUNT)) &&
4618 (nmp->nm_sockflags & NMSOCK_READY)) {
4619 nmp->nm_sockflags |= NMSOCK_POKE;
4620 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq);
4621 }
4622 lck_mtx_unlock(&nmp->nm_lock);
4623 }
4624
4625 /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */
4626 if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP|R_RECOVER))) &&
4627 ((req->r_rexmit >= req->r_retry) || /* too many */
4628 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */
4629 OSAddAtomic64(1, &nfsstats.rpctimeouts);
4630 lck_mtx_lock(&nmp->nm_lock);
4631 if (!(nmp->nm_state & NFSSTA_TIMEO)) {
4632 lck_mtx_unlock(&nmp->nm_lock);
4633 /* make sure we note the unresponsive server */
4634 /* (maxtime may be less than tprintf delay) */
4635 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
4636 "not responding");
4637 req->r_lastmsg = now.tv_sec;
4638 req->r_flags |= R_TPRINTFMSG;
4639 } else {
4640 lck_mtx_unlock(&nmp->nm_lock);
4641 }
4642 if (req->r_flags & R_NOINTR) {
4643 /* don't terminate nointr requests on timeout */
4644 lck_mtx_unlock(&req->r_mtx);
4645 continue;
4646 }
4647 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
4648 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt,
4649 now.tv_sec - req->r_start));
4650 nfs_softterm(req);
4651 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
4652 wakeup(req);
4653 lck_mtx_unlock(&req->r_mtx);
4654 if (finish_asyncio)
4655 nfs_asyncio_finish(req);
4656 continue;
4657 }
4658
4659 /* for TCP, only resend if explicitly requested */
4660 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) {
4661 if (++req->r_rexmit > NFS_MAXREXMIT)
4662 req->r_rexmit = NFS_MAXREXMIT;
4663 req->r_rtt = 0;
4664 lck_mtx_unlock(&req->r_mtx);
4665 continue;
4666 }
4667
4668 /*
4669 * The request needs to be (re)sent. Kick the requester to resend it.
4670 * (unless it's already marked as needing a resend)
4671 */
4672 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) {
4673 lck_mtx_unlock(&req->r_mtx);
4674 continue;
4675 }
4676 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
4677 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
4678 req->r_flags |= R_MUSTRESEND;
4679 req->r_rtt = -1;
4680 wakeup(req);
4681 if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
4682 nfs_asyncio_resend(req);
4683 lck_mtx_unlock(&req->r_mtx);
4684 }
4685
4686 lck_mtx_unlock(nfs_request_mutex);
4687
4688 /* poke any sockets */
4689 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) {
4690 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
4691 nfs_sock_poke(nmp);
4692 lck_mtx_lock(&nmp->nm_lock);
4693 nmp->nm_sockflags &= ~NMSOCK_POKE;
4694 wakeup(&nmp->nm_sockflags);
4695 lck_mtx_unlock(&nmp->nm_lock);
4696 }
4697
4698 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY);
4699 }
4700
4701 /*
4702 * check a thread's proc for the "noremotehang" flag.
4703 */
4704 int
4705 nfs_noremotehang(thread_t thd)
4706 {
4707 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
4708 return (p && proc_noremotehang(p));
4709 }
4710
4711 /*
4712 * Test for a termination condition pending on the process.
4713 * This is used to determine if we need to bail on a mount.
4714 * ETIMEDOUT is returned if there has been a soft timeout.
4715 * EINTR is returned if there is a signal pending that is not being ignored
4716 * and the mount is interruptable, or if we are a thread that is in the process
4717 * of cancellation (also SIGKILL posted).
4718 */
4719 extern int sigprop[NSIG+1];
4720 int
4721 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked)
4722 {
4723 proc_t p;
4724 int error = 0;
4725
4726 if (nmp == NULL)
4727 return (ENXIO);
4728
4729 if (req && (req->r_flags & R_SOFTTERM))
4730 return (ETIMEDOUT); /* request has been terminated. */
4731 if (req && (req->r_flags & R_NOINTR))
4732 thd = NULL; /* don't check for signal on R_NOINTR */
4733
4734 if (!nmplocked)
4735 lck_mtx_lock(&nmp->nm_lock);
4736 if (nmp->nm_state & NFSSTA_FORCE) {
4737 /* If a force unmount is in progress then fail. */
4738 error = EIO;
4739 } else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) {
4740 /* Someone is unmounting us, go soft and mark it. */
4741 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT);
4742 nmp->nm_state |= NFSSTA_FORCE;
4743 }
4744
4745 /* Check if the mount is marked dead. */
4746 if (!error && (nmp->nm_state & NFSSTA_DEAD))
4747 error = ENXIO;
4748
4749 /*
4750 * If the mount is hung and we've requested not to hang
4751 * on remote filesystems, then bail now.
4752 */
4753 if (!error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd))
4754 error = EIO;
4755
4756 if (!nmplocked)
4757 lck_mtx_unlock(&nmp->nm_lock);
4758 if (error)
4759 return (error);
4760
4761 /* may not have a thread for async I/O */
4762 if (thd == NULL)
4763 return (0);
4764
4765 /*
4766 * Check if the process is aborted, but don't interrupt if we
4767 * were killed by a signal and this is the exiting thread which
4768 * is attempting to dump core.
4769 */
4770 if (((p = current_proc()) != kernproc) && current_thread_aborted() &&
4771 (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) ||
4772 (p->p_sigacts == NULL) ||
4773 (p->p_sigacts->ps_sig < 1) || (p->p_sigacts->ps_sig > NSIG) ||
4774 !(sigprop[p->p_sigacts->ps_sig] & SA_CORE)))
4775 return (EINTR);
4776
4777 /* mask off thread and process blocked signals. */
4778 if (NMFLAG(nmp, INTR) && ((p = get_bsdthreadtask_info(thd))) &&
4779 proc_pendingsignals(p, NFSINT_SIGMASK))
4780 return (EINTR);
4781 return (0);
4782 }
4783
4784 /*
4785 * Lock a socket against others.
4786 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
4787 * and also to avoid race conditions between the processes with nfs requests
4788 * in progress when a reconnect is necessary.
4789 */
4790 int
4791 nfs_sndlock(struct nfsreq *req)
4792 {
4793 struct nfsmount *nmp = req->r_nmp;
4794 int *statep;
4795 int error = 0, slpflag = 0;
4796 struct timespec ts = { 0, 0 };
4797
4798 if (nmp == NULL)
4799 return (ENXIO);
4800
4801 lck_mtx_lock(&nmp->nm_lock);
4802 statep = &nmp->nm_state;
4803
4804 if (NMFLAG(nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR))
4805 slpflag = PCATCH;
4806 while (*statep & NFSSTA_SNDLOCK) {
4807 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1)))
4808 break;
4809 *statep |= NFSSTA_WANTSND;
4810 if (nfs_noremotehang(req->r_thread))
4811 ts.tv_sec = 1;
4812 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts);
4813 if (slpflag == PCATCH) {
4814 slpflag = 0;
4815 ts.tv_sec = 2;
4816 }
4817 }
4818 if (!error)
4819 *statep |= NFSSTA_SNDLOCK;
4820 lck_mtx_unlock(&nmp->nm_lock);
4821 return (error);
4822 }
4823
4824 /*
4825 * Unlock the stream socket for others.
4826 */
4827 void
4828 nfs_sndunlock(struct nfsreq *req)
4829 {
4830 struct nfsmount *nmp = req->r_nmp;
4831 int *statep, wake = 0;
4832
4833 if (nmp == NULL)
4834 return;
4835 lck_mtx_lock(&nmp->nm_lock);
4836 statep = &nmp->nm_state;
4837 if ((*statep & NFSSTA_SNDLOCK) == 0)
4838 panic("nfs sndunlock");
4839 *statep &= ~(NFSSTA_SNDLOCK|NFSSTA_SENDING);
4840 if (*statep & NFSSTA_WANTSND) {
4841 *statep &= ~NFSSTA_WANTSND;
4842 wake = 1;
4843 }
4844 lck_mtx_unlock(&nmp->nm_lock);
4845 if (wake)
4846 wakeup(statep);
4847 }
4848
4849 int
4850 nfs_aux_request(
4851 struct nfsmount *nmp,
4852 thread_t thd,
4853 struct sockaddr *saddr,
4854 socket_t so,
4855 int sotype,
4856 mbuf_t mreq,
4857 uint32_t xid,
4858 int bindresv,
4859 int timeo,
4860 struct nfsm_chain *nmrep)
4861 {
4862 int error = 0, on = 1, try, sendat = 2, soproto, recv, optlen, restoreto = 0;
4863 socket_t newso = NULL;
4864 struct sockaddr_storage ss;
4865 struct timeval orig_rcvto, orig_sndto, tv = { 1, 0 };
4866 mbuf_t m, mrep = NULL;
4867 struct msghdr msg;
4868 uint32_t rxid = 0, reply = 0, reply_status, rejected_status;
4869 uint32_t verf_type, verf_len, accepted_status;
4870 size_t readlen, sentlen;
4871 struct nfs_rpc_record_state nrrs;
4872
4873 if (!so) {
4874 /* create socket and set options */
4875 soproto = (sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP;
4876 if ((error = sock_socket(saddr->sa_family, sotype, soproto, NULL, NULL, &newso)))
4877 goto nfsmout;
4878
4879 if (bindresv) {
4880 int level = (saddr->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
4881 int optname = (saddr->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE;
4882 int portrange = IP_PORTRANGE_LOW;
4883 error = sock_setsockopt(newso, level, optname, &portrange, sizeof(portrange));
4884 nfsmout_if(error);
4885 ss.ss_len = saddr->sa_len;
4886 ss.ss_family = saddr->sa_family;
4887 if (ss.ss_family == AF_INET) {
4888 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY;
4889 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
4890 } else if (ss.ss_family == AF_INET6) {
4891 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any;
4892 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
4893 } else {
4894 error = EINVAL;
4895 }
4896 if (!error)
4897 error = sock_bind(newso, (struct sockaddr *)&ss);
4898 nfsmout_if(error);
4899 }
4900
4901 if (sotype == SOCK_STREAM) {
4902 on = 4; /* don't wait too long for the socket to connect */
4903 sock_setsockopt(newso, IPPROTO_TCP, TCP_CONNECTIONTIMEOUT, &on, sizeof(on));
4904 error = sock_connect(newso, saddr, 0);
4905 nfsmout_if(error);
4906 }
4907 if (((error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) ||
4908 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) ||
4909 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)))))
4910 goto nfsmout;
4911 so = newso;
4912 } else {
4913 /* make sure socket is using a one second timeout in this function */
4914 optlen = sizeof(orig_rcvto);
4915 error = sock_getsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, &optlen);
4916 if (!error) {
4917 optlen = sizeof(orig_sndto);
4918 error = sock_getsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, &optlen);
4919 }
4920 if (!error) {
4921 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
4922 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
4923 restoreto = 1;
4924 }
4925 }
4926
4927 if (sotype == SOCK_STREAM) {
4928 sendat = 0; /* we only resend the request for UDP */
4929 nfs_rpc_record_state_init(&nrrs);
4930 }
4931
4932 for (try=0; try < timeo; try++) {
4933 if ((error = nfs_sigintr(nmp, NULL, !try ? NULL : thd, 0)))
4934 break;
4935 if (!try || (try == sendat)) {
4936 /* send the request (resending periodically for UDP) */
4937 if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m)))
4938 goto nfsmout;
4939 bzero(&msg, sizeof(msg));
4940 if ((sotype == SOCK_DGRAM) && !sock_isconnected(so)) {
4941 msg.msg_name = saddr;
4942 msg.msg_namelen = saddr->sa_len;
4943 }
4944 if ((error = sock_sendmbuf(so, &msg, m, 0, &sentlen)))
4945 goto nfsmout;
4946 sendat *= 2;
4947 if (sendat > 30)
4948 sendat = 30;
4949 }
4950 /* wait for the response */
4951 if (sotype == SOCK_STREAM) {
4952 /* try to read (more of) record */
4953 error = nfs_rpc_record_read(so, &nrrs, 0, &recv, &mrep);
4954 /* if we don't have the whole record yet, we'll keep trying */
4955 } else {
4956 readlen = 1<<18;
4957 bzero(&msg, sizeof(msg));
4958 error = sock_receivembuf(so, &msg, &mrep, 0, &readlen);
4959 }
4960 if (error == EWOULDBLOCK)
4961 continue;
4962 nfsmout_if(error);
4963 /* parse the response */
4964 nfsm_chain_dissect_init(error, nmrep, mrep);
4965 nfsm_chain_get_32(error, nmrep, rxid);
4966 nfsm_chain_get_32(error, nmrep, reply);
4967 nfsmout_if(error);
4968 if ((rxid != xid) || (reply != RPC_REPLY))
4969 error = EBADRPC;
4970 nfsm_chain_get_32(error, nmrep, reply_status);
4971 nfsmout_if(error);
4972 if (reply_status == RPC_MSGDENIED) {
4973 nfsm_chain_get_32(error, nmrep, rejected_status);
4974 nfsmout_if(error);
4975 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES;
4976 goto nfsmout;
4977 }
4978 nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */
4979 nfsm_chain_get_32(error, nmrep, verf_len); /* verifier length */
4980 nfsmout_if(error);
4981 if (verf_len)
4982 nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len));
4983 nfsm_chain_get_32(error, nmrep, accepted_status);
4984 nfsmout_if(error);
4985 switch (accepted_status) {
4986 case RPC_SUCCESS:
4987 error = 0;
4988 break;
4989 case RPC_PROGUNAVAIL:
4990 error = EPROGUNAVAIL;
4991 break;
4992 case RPC_PROGMISMATCH:
4993 error = EPROGMISMATCH;
4994 break;
4995 case RPC_PROCUNAVAIL:
4996 error = EPROCUNAVAIL;
4997 break;
4998 case RPC_GARBAGE:
4999 error = EBADRPC;
5000 break;
5001 case RPC_SYSTEM_ERR:
5002 default:
5003 error = EIO;
5004 break;
5005 }
5006 break;
5007 }
5008 nfsmout:
5009 if (restoreto) {
5010 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, sizeof(tv));
5011 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, sizeof(tv));
5012 }
5013 if (newso) {
5014 sock_shutdown(newso, SHUT_RDWR);
5015 sock_close(newso);
5016 }
5017 mbuf_freem(mreq);
5018 return (error);
5019 }
5020
5021 int
5022 nfs_portmap_lookup(
5023 struct nfsmount *nmp,
5024 vfs_context_t ctx,
5025 struct sockaddr *sa,
5026 socket_t so,
5027 uint32_t protocol,
5028 uint32_t vers,
5029 uint32_t ipproto,
5030 int timeo)
5031 {
5032 thread_t thd = vfs_context_thread(ctx);
5033 kauth_cred_t cred = vfs_context_ucred(ctx);
5034 struct sockaddr_storage ss;
5035 struct sockaddr *saddr = (struct sockaddr*)&ss;
5036 struct nfsm_chain nmreq, nmrep;
5037 mbuf_t mreq;
5038 int error = 0, ip, pmprog, pmvers, pmproc, ualen = 0;
5039 uint32_t port;
5040 uint64_t xid = 0;
5041 char uaddr[MAX_IPv6_STR_LEN+16];
5042
5043 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5044 if (saddr->sa_family == AF_INET) {
5045 ip = 4;
5046 pmprog = PMAPPROG;
5047 pmvers = PMAPVERS;
5048 pmproc = PMAPPROC_GETPORT;
5049 } else if (saddr->sa_family == AF_INET6) {
5050 ip = 6;
5051 pmprog = RPCBPROG;
5052 pmvers = RPCBVERS4;
5053 pmproc = RPCBPROC_GETVERSADDR;
5054 } else {
5055 return (EINVAL);
5056 }
5057 nfsm_chain_null(&nmreq);
5058 nfsm_chain_null(&nmrep);
5059
5060 tryagain:
5061 /* send portmapper request to get port/uaddr */
5062 if (ip == 4)
5063 ((struct sockaddr_in*)saddr)->sin_port = htons(PMAPPORT);
5064 else
5065 ((struct sockaddr_in6*)saddr)->sin6_port = htons(PMAPPORT);
5066 nfsm_chain_build_alloc_init(error, &nmreq, 8*NFSX_UNSIGNED);
5067 nfsm_chain_add_32(error, &nmreq, protocol);
5068 nfsm_chain_add_32(error, &nmreq, vers);
5069 if (ip == 4) {
5070 nfsm_chain_add_32(error, &nmreq, ipproto);
5071 nfsm_chain_add_32(error, &nmreq, 0);
5072 } else {
5073 if (ipproto == IPPROTO_TCP)
5074 nfsm_chain_add_string(error, &nmreq, "tcp6", 4);
5075 else
5076 nfsm_chain_add_string(error, &nmreq, "udp6", 4);
5077 nfsm_chain_add_string(error, &nmreq, "", 0); /* uaddr */
5078 nfsm_chain_add_string(error, &nmreq, "", 0); /* owner */
5079 }
5080 nfsm_chain_build_done(error, &nmreq);
5081 nfsmout_if(error);
5082 error = nfsm_rpchead2(nmp, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
5083 pmprog, pmvers, pmproc, RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead,
5084 &xid, &mreq);
5085 nfsmout_if(error);
5086 nmreq.nmc_mhead = NULL;
5087 error = nfs_aux_request(nmp, thd, saddr, so, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
5088 mreq, R_XID32(xid), 0, timeo, &nmrep);
5089
5090 /* grab port from portmap response */
5091 if (ip == 4) {
5092 nfsm_chain_get_32(error, &nmrep, port);
5093 if (!error)
5094 ((struct sockaddr_in*)sa)->sin_port = htons(port);
5095 } else {
5096 /* get uaddr string and convert to sockaddr */
5097 nfsm_chain_get_32(error, &nmrep, ualen);
5098 if (!error) {
5099 if (ualen > ((int)sizeof(uaddr)-1))
5100 error = EIO;
5101 if (ualen < 1) {
5102 /* program is not available, just return a zero port */
5103 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5104 ((struct sockaddr_in6*)saddr)->sin6_port = htons(0);
5105 } else {
5106 nfsm_chain_get_opaque(error, &nmrep, ualen, uaddr);
5107 if (!error) {
5108 uaddr[ualen] = '\0';
5109 if (!nfs_uaddr2sockaddr(uaddr, saddr))
5110 error = EIO;
5111 }
5112 }
5113 }
5114 if ((error == EPROGMISMATCH) || (error == EPROCUNAVAIL) || (error == EIO) || (error == EBADRPC)) {
5115 /* remote doesn't support rpcbind version or proc (or we couldn't parse uaddr) */
5116 if (pmvers == RPCBVERS4) {
5117 /* fall back to v3 and GETADDR */
5118 pmvers = RPCBVERS3;
5119 pmproc = RPCBPROC_GETADDR;
5120 nfsm_chain_cleanup(&nmreq);
5121 nfsm_chain_cleanup(&nmrep);
5122 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5123 xid = 0;
5124 error = 0;
5125 goto tryagain;
5126 }
5127 }
5128 if (!error)
5129 bcopy(saddr, sa, min(saddr->sa_len, sa->sa_len));
5130 }
5131 nfsmout:
5132 nfsm_chain_cleanup(&nmreq);
5133 nfsm_chain_cleanup(&nmrep);
5134 return (error);
5135 }
5136
5137 int
5138 nfs_msg(thread_t thd,
5139 const char *server,
5140 const char *msg,
5141 int error)
5142 {
5143 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
5144 tpr_t tpr;
5145
5146 if (p)
5147 tpr = tprintf_open(p);
5148 else
5149 tpr = NULL;
5150 if (error)
5151 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error);
5152 else
5153 tprintf(tpr, "nfs server %s: %s\n", server, msg);
5154 tprintf_close(tpr);
5155 return (0);
5156 }
5157
5158 #define NFS_SQUISH_MOBILE_ONLY 0x0001 /* Squish mounts only on mobile machines */
5159 #define NFS_SQUISH_AUTOMOUNTED_ONLY 0x0002 /* Squish mounts only if the are automounted */
5160 #define NFS_SQUISH_SOFT 0x0004 /* Treat all soft mounts as though they were on a mobile machine */
5161 #define NFS_SQUISH_QUICK 0x0008 /* Try to squish mounts more quickly. */
5162 #define NFS_SQUISH_SHUTDOWN 0x1000 /* Squish all mounts on shutdown. Currently not implemented */
5163
5164 uint32_t nfs_squishy_flags = NFS_SQUISH_MOBILE_ONLY | NFS_SQUISH_AUTOMOUNTED_ONLY | NFS_SQUISH_QUICK;
5165 int32_t nfs_is_mobile;
5166
5167 #define NFS_SQUISHY_DEADTIMEOUT 8 /* Dead time out for squishy mounts */
5168 #define NFS_SQUISHY_QUICKTIMEOUT 4 /* Quicker dead time out when nfs_squish_flags NFS_SQUISH_QUICK bit is set*/
5169
5170 /*
5171 * Could this mount be squished?
5172 */
5173 int
5174 nfs_can_squish(struct nfsmount *nmp)
5175 {
5176 uint64_t flags = vfs_flags(nmp->nm_mountp);
5177 int softsquish = ((nfs_squishy_flags & NFS_SQUISH_SOFT) & NMFLAG(nmp, SOFT));
5178
5179 if (!softsquish && (nfs_squishy_flags & NFS_SQUISH_MOBILE_ONLY) && nfs_is_mobile == 0)
5180 return (0);
5181
5182 if ((nfs_squishy_flags & NFS_SQUISH_AUTOMOUNTED_ONLY) && (flags & MNT_AUTOMOUNTED) == 0)
5183 return (0);
5184
5185 return (1);
5186 }
5187
5188 /*
5189 * NFS mounts default to "rw,hard" - but frequently on mobile clients
5190 * the mount may become "not responding". It's desirable to be able
5191 * to unmount these dead mounts, but only if there is no risk of
5192 * losing data or crashing applications. A "squishy" NFS mount is one
5193 * that can be force unmounted with little risk of harm.
5194 *
5195 * nfs_is_squishy checks if a mount is in a squishy state. A mount is
5196 * in a squishy state iff it is allowed to be squishy and there are no
5197 * dirty pages and there are no mmapped files and there are no files
5198 * open for write. Mounts are allowed to be squishy is controlled by
5199 * the settings of the nfs_squishy_flags and its mobility state. These
5200 * flags can be set by sysctls.
5201 *
5202 * If nfs_is_squishy determines that we are in a squishy state we will
5203 * update the current dead timeout to at least NFS_SQUISHY_DEADTIMEOUT
5204 * (or NFS_SQUISHY_QUICKTIMEOUT if NFS_SQUISH_QUICK is set) (see
5205 * above) or 1/8th of the mount's nm_deadtimeout value, otherwise we just
5206 * update the current dead timeout with the mount's nm_deadtimeout
5207 * value set at mount time.
5208 *
5209 * Assumes that nm_lock is held.
5210 *
5211 * Note this routine is racey, but its effects on setting the
5212 * dead timeout only have effects when we're in trouble and are likely
5213 * to stay that way. Since by default its only for automounted
5214 * volumes on mobile machines; this is a reasonable trade off between
5215 * data integrity and user experience. It can be disabled or set via
5216 * nfs.conf file.
5217 */
5218
5219 int
5220 nfs_is_squishy(struct nfsmount *nmp)
5221 {
5222 mount_t mp = nmp->nm_mountp;
5223 int squishy = 0;
5224 int timeo = (nfs_squishy_flags & NFS_SQUISH_QUICK) ? NFS_SQUISHY_QUICKTIMEOUT : NFS_SQUISHY_DEADTIMEOUT;
5225
5226 NFS_SOCK_DBG(("nfs_is_squishy: %s: nm_curdeadtiemout = %d, nfs_is_mobile = %d\n",
5227 vfs_statfs(mp)->f_mntfromname, nmp->nm_curdeadtimeout, nfs_is_mobile));
5228
5229 if (!nfs_can_squish(nmp))
5230 goto out;
5231
5232 timeo = (nmp->nm_deadtimeout > timeo) ? max(nmp->nm_deadtimeout/8, timeo) : timeo;
5233 NFS_SOCK_DBG(("nfs_is_squishy: nm_writers = %d nm_mappers = %d timeo = %d\n", nmp->nm_writers, nmp->nm_mappers, timeo));
5234
5235 if (nmp->nm_writers == 0 && nmp->nm_mappers == 0) {
5236 uint64_t flags = mp ? vfs_flags(mp) : 0;
5237 squishy = 1;
5238
5239 /*
5240 * Walk the nfs nodes and check for dirty buffers it we're not
5241 * RDONLY and we've not already been declared as squishy since
5242 * this can be a bit expensive.
5243 */
5244 if (!(flags & MNT_RDONLY) && !(nmp->nm_state & NFSSTA_SQUISHY))
5245 squishy = !nfs_mount_is_dirty(mp);
5246 }
5247
5248 out:
5249 if (squishy)
5250 nmp->nm_state |= NFSSTA_SQUISHY;
5251 else
5252 nmp->nm_state &= ~NFSSTA_SQUISHY;
5253
5254 nmp->nm_curdeadtimeout = squishy ? timeo : nmp->nm_deadtimeout;
5255
5256 NFS_SOCK_DBG(("nfs_is_squishy: nm_curdeadtimeout = %d\n", nmp->nm_curdeadtimeout));
5257
5258 return (squishy);
5259 }
5260
5261 /*
5262 * On a send operation, if we can't reach the server and we've got only one server to talk to
5263 * and NFS_SQUISH_QUICK flag is set and we are in a squishy state then mark the mount as dead
5264 * and ask to be forcibly unmounted. Return 1 if we're dead and 0 otherwise.
5265 */
5266 static int
5267 nfs_is_dead_lock(int error, struct nfsmount *nmp)
5268 {
5269 if (nmp->nm_state & NFSSTA_DEAD)
5270 return (1);
5271
5272 if ((error != ENETUNREACH && error != EHOSTUNREACH) ||
5273 !(nmp->nm_locations.nl_numlocs == 1 && nmp->nm_locations.nl_locations[0]->nl_servcount == 1))
5274 return (0);
5275 if ((nfs_squishy_flags & NFS_SQUISH_QUICK) && nfs_is_squishy(nmp)) {
5276 printf("nfs_is_dead: nfs server %s: unreachable. Squished dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
5277 nmp->nm_state |= NFSSTA_DEAD;
5278 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0);
5279 return (1);
5280 }
5281 return (0);
5282 }
5283
5284 int
5285 nfs_is_dead(int error, struct nfsmount *nmp)
5286 {
5287 int is_dead;
5288
5289 lck_mtx_lock(&nmp->nm_lock);
5290 is_dead = nfs_is_dead_lock(error, nmp);
5291 lck_mtx_unlock(&nmp->nm_lock);
5292
5293 return (is_dead);
5294 }
5295
5296 void
5297 nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg)
5298 {
5299 int timeoutmask, wasunresponsive, unresponsive, softnobrowse;
5300 uint32_t do_vfs_signal;
5301 struct timeval now;
5302
5303 if (nmp == NULL)
5304 return;
5305
5306 lck_mtx_lock(&nmp->nm_lock);
5307
5308 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
5309 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */
5310 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
5311 wasunresponsive = (nmp->nm_state & timeoutmask);
5312
5313 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
5314 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
5315
5316 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO))
5317 nmp->nm_state |= NFSSTA_TIMEO;
5318 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO))
5319 nmp->nm_state |= NFSSTA_LOCKTIMEO;
5320 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(nmp->nm_state & NFSSTA_JUKEBOXTIMEO))
5321 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO;
5322
5323 unresponsive = (nmp->nm_state & timeoutmask);
5324
5325 nfs_is_squishy(nmp);
5326
5327 if (unresponsive && (nmp->nm_curdeadtimeout > 0)) {
5328 microuptime(&now);
5329 if (!wasunresponsive) {
5330 nmp->nm_deadto_start = now.tv_sec;
5331 nfs_mount_sock_thread_wake(nmp);
5332 } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_curdeadtimeout) {
5333 if (!(nmp->nm_state & NFSSTA_DEAD))
5334 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname,
5335 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : "");
5336 nmp->nm_state |= NFSSTA_DEAD;
5337 }
5338 }
5339 lck_mtx_unlock(&nmp->nm_lock);
5340
5341 if (nmp->nm_state & NFSSTA_DEAD)
5342 do_vfs_signal = VQ_DEAD;
5343 else if (softnobrowse || wasunresponsive || !unresponsive)
5344 do_vfs_signal = 0;
5345 else
5346 do_vfs_signal = VQ_NOTRESP;
5347 if (do_vfs_signal)
5348 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, do_vfs_signal, 0);
5349
5350 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
5351 }
5352
5353 void
5354 nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
5355 {
5356 int timeoutmask, wasunresponsive, unresponsive, softnobrowse;
5357 int do_vfs_signal;
5358
5359 if (nmp == NULL)
5360 return;
5361
5362 if (msg)
5363 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
5364
5365 lck_mtx_lock(&nmp->nm_lock);
5366
5367 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
5368 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */
5369 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
5370 wasunresponsive = (nmp->nm_state & timeoutmask);
5371
5372 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
5373 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
5374
5375 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO))
5376 nmp->nm_state &= ~NFSSTA_TIMEO;
5377 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO))
5378 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
5379 if ((flags & NFSSTA_JUKEBOXTIMEO) && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO))
5380 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO;
5381
5382 unresponsive = (nmp->nm_state & timeoutmask);
5383
5384 nmp->nm_deadto_start = 0;
5385 nmp->nm_curdeadtimeout = nmp->nm_deadtimeout;
5386 nmp->nm_state &= ~NFSSTA_SQUISHY;
5387 lck_mtx_unlock(&nmp->nm_lock);
5388
5389 if (softnobrowse)
5390 do_vfs_signal = 0;
5391 else
5392 do_vfs_signal = (wasunresponsive && !unresponsive);
5393 if (do_vfs_signal)
5394 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
5395 }
5396
5397
5398 #endif /* NFSCLIENT */
5399
5400 #if NFSSERVER
5401
5402 /*
5403 * Generate the rpc reply header
5404 * siz arg. is used to decide if adding a cluster is worthwhile
5405 */
5406 int
5407 nfsrv_rephead(
5408 struct nfsrv_descript *nd,
5409 __unused struct nfsrv_sock *slp,
5410 struct nfsm_chain *nmrepp,
5411 size_t siz)
5412 {
5413 mbuf_t mrep;
5414 u_int32_t *tl;
5415 struct nfsm_chain nmrep;
5416 int err, error;
5417
5418 err = nd->nd_repstat;
5419 if (err && (nd->nd_vers == NFS_VER2))
5420 siz = 0;
5421
5422 /*
5423 * If this is a big reply, use a cluster else
5424 * try and leave leading space for the lower level headers.
5425 */
5426 siz += RPC_REPLYSIZ;
5427 if (siz >= nfs_mbuf_minclsize) {
5428 error = mbuf_getpacket(MBUF_WAITOK, &mrep);
5429 } else {
5430 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
5431 }
5432 if (error) {
5433 /* unable to allocate packet */
5434 /* XXX should we keep statistics for these errors? */
5435 return (error);
5436 }
5437 if (siz < nfs_mbuf_minclsize) {
5438 /* leave space for lower level headers */
5439 tl = mbuf_data(mrep);
5440 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */
5441 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
5442 }
5443 nfsm_chain_init(&nmrep, mrep);
5444 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
5445 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
5446 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
5447 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
5448 if (err & NFSERR_AUTHERR) {
5449 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
5450 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
5451 } else {
5452 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
5453 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
5454 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
5455 }
5456 } else {
5457 /* reply status */
5458 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
5459 if (nd->nd_gss_context != NULL) {
5460 /* RPCSEC_GSS verifier */
5461 error = nfs_gss_svc_verf_put(nd, &nmrep);
5462 if (error) {
5463 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
5464 goto done;
5465 }
5466 } else {
5467 /* RPCAUTH_NULL verifier */
5468 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
5469 nfsm_chain_add_32(error, &nmrep, 0);
5470 }
5471 /* accepted status */
5472 switch (err) {
5473 case EPROGUNAVAIL:
5474 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
5475 break;
5476 case EPROGMISMATCH:
5477 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
5478 /* XXX hard coded versions? */
5479 nfsm_chain_add_32(error, &nmrep, NFS_VER2);
5480 nfsm_chain_add_32(error, &nmrep, NFS_VER3);
5481 break;
5482 case EPROCUNAVAIL:
5483 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
5484 break;
5485 case EBADRPC:
5486 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
5487 break;
5488 default:
5489 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
5490 if (nd->nd_gss_context != NULL)
5491 error = nfs_gss_svc_prepare_reply(nd, &nmrep);
5492 if (err != NFSERR_RETVOID)
5493 nfsm_chain_add_32(error, &nmrep,
5494 (err ? nfsrv_errmap(nd, err) : 0));
5495 break;
5496 }
5497 }
5498
5499 done:
5500 nfsm_chain_build_done(error, &nmrep);
5501 if (error) {
5502 /* error composing reply header */
5503 /* XXX should we keep statistics for these errors? */
5504 mbuf_freem(mrep);
5505 return (error);
5506 }
5507
5508 *nmrepp = nmrep;
5509 if ((err != 0) && (err != NFSERR_RETVOID))
5510 OSAddAtomic64(1, &nfsstats.srvrpc_errs);
5511 return (0);
5512 }
5513
5514 /*
5515 * The nfs server send routine.
5516 *
5517 * - return EINTR or ERESTART if interrupted by a signal
5518 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
5519 * - do any cleanup required by recoverable socket errors (???)
5520 */
5521 int
5522 nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
5523 {
5524 int error;
5525 socket_t so = slp->ns_so;
5526 struct sockaddr *sendnam;
5527 struct msghdr msg;
5528
5529 bzero(&msg, sizeof(msg));
5530 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
5531 if ((sendnam = mbuf_data(nam))) {
5532 msg.msg_name = (caddr_t)sendnam;
5533 msg.msg_namelen = sendnam->sa_len;
5534 }
5535 }
5536 error = sock_sendmbuf(so, &msg, top, 0, NULL);
5537 if (!error)
5538 return (0);
5539 log(LOG_INFO, "nfsd send error %d\n", error);
5540
5541 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM))
5542 error = EPIPE; /* zap TCP sockets if they time out on send */
5543
5544 /* Handle any recoverable (soft) socket errors here. (???) */
5545 if (error != EINTR && error != ERESTART && error != EIO &&
5546 error != EWOULDBLOCK && error != EPIPE)
5547 error = 0;
5548
5549 return (error);
5550 }
5551
5552 /*
5553 * Socket upcall routine for the nfsd sockets.
5554 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
5555 * Essentially do as much as possible non-blocking, else punt and it will
5556 * be called with MBUF_WAITOK from an nfsd.
5557 */
5558 void
5559 nfsrv_rcv(socket_t so, void *arg, int waitflag)
5560 {
5561 struct nfsrv_sock *slp = arg;
5562
5563 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID))
5564 return;
5565
5566 lck_rw_lock_exclusive(&slp->ns_rwlock);
5567 nfsrv_rcv_locked(so, slp, waitflag);
5568 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
5569 }
5570 void
5571 nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
5572 {
5573 mbuf_t m, mp, mhck, m2;
5574 int ns_flag=0, error;
5575 struct msghdr msg;
5576 size_t bytes_read;
5577
5578 if ((slp->ns_flag & SLP_VALID) == 0) {
5579 if (waitflag == MBUF_DONTWAIT)
5580 lck_rw_done(&slp->ns_rwlock);
5581 return;
5582 }
5583
5584 #ifdef notdef
5585 /*
5586 * Define this to test for nfsds handling this under heavy load.
5587 */
5588 if (waitflag == MBUF_DONTWAIT) {
5589 ns_flag = SLP_NEEDQ;
5590 goto dorecs;
5591 }
5592 #endif
5593 if (slp->ns_sotype == SOCK_STREAM) {
5594 /*
5595 * If there are already records on the queue, defer soreceive()
5596 * to an(other) nfsd so that there is feedback to the TCP layer that
5597 * the nfs servers are heavily loaded.
5598 */
5599 if (slp->ns_rec) {
5600 ns_flag = SLP_NEEDQ;
5601 goto dorecs;
5602 }
5603
5604 /*
5605 * Do soreceive().
5606 */
5607 bytes_read = 1000000000;
5608 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
5609 if (error || mp == NULL) {
5610 if (error == EWOULDBLOCK)
5611 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
5612 else
5613 ns_flag = SLP_DISCONN;
5614 goto dorecs;
5615 }
5616 m = mp;
5617 if (slp->ns_rawend) {
5618 if ((error = mbuf_setnext(slp->ns_rawend, m)))
5619 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
5620 slp->ns_cc += bytes_read;
5621 } else {
5622 slp->ns_raw = m;
5623 slp->ns_cc = bytes_read;
5624 }
5625 while ((m2 = mbuf_next(m)))
5626 m = m2;
5627 slp->ns_rawend = m;
5628
5629 /*
5630 * Now try and parse record(s) out of the raw stream data.
5631 */
5632 error = nfsrv_getstream(slp, waitflag);
5633 if (error) {
5634 if (error == EPERM)
5635 ns_flag = SLP_DISCONN;
5636 else
5637 ns_flag = SLP_NEEDQ;
5638 }
5639 } else {
5640 struct sockaddr_storage nam;
5641
5642 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
5643 /* already have max # RPC records queued on this socket */
5644 ns_flag = SLP_NEEDQ;
5645 goto dorecs;
5646 }
5647
5648 bzero(&msg, sizeof(msg));
5649 msg.msg_name = (caddr_t)&nam;
5650 msg.msg_namelen = sizeof(nam);
5651
5652 do {
5653 bytes_read = 1000000000;
5654 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
5655 if (mp) {
5656 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
5657 mbuf_setlen(mhck, nam.ss_len);
5658 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
5659 m = mhck;
5660 if (mbuf_setnext(m, mp)) {
5661 /* trouble... just drop it */
5662 printf("nfsrv_rcv: mbuf_setnext failed\n");
5663 mbuf_free(mhck);
5664 m = mp;
5665 }
5666 } else {
5667 m = mp;
5668 }
5669 if (slp->ns_recend)
5670 mbuf_setnextpkt(slp->ns_recend, m);
5671 else {
5672 slp->ns_rec = m;
5673 slp->ns_flag |= SLP_DOREC;
5674 }
5675 slp->ns_recend = m;
5676 mbuf_setnextpkt(m, NULL);
5677 slp->ns_reccnt++;
5678 }
5679 } while (mp);
5680 }
5681
5682 /*
5683 * Now try and process the request records, non-blocking.
5684 */
5685 dorecs:
5686 if (ns_flag)
5687 slp->ns_flag |= ns_flag;
5688 if (waitflag == MBUF_DONTWAIT) {
5689 int wake = (slp->ns_flag & SLP_WORKTODO);
5690 lck_rw_done(&slp->ns_rwlock);
5691 if (wake && nfsd_thread_count) {
5692 lck_mtx_lock(nfsd_mutex);
5693 nfsrv_wakenfsd(slp);
5694 lck_mtx_unlock(nfsd_mutex);
5695 }
5696 }
5697 }
5698
5699 /*
5700 * Try and extract an RPC request from the mbuf data list received on a
5701 * stream socket. The "waitflag" argument indicates whether or not it
5702 * can sleep.
5703 */
5704 int
5705 nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
5706 {
5707 mbuf_t m;
5708 char *cp1, *cp2, *mdata;
5709 int len, mlen, error;
5710 mbuf_t om, m2, recm;
5711 u_int32_t recmark;
5712
5713 if (slp->ns_flag & SLP_GETSTREAM)
5714 panic("nfs getstream");
5715 slp->ns_flag |= SLP_GETSTREAM;
5716 for (;;) {
5717 if (slp->ns_reclen == 0) {
5718 if (slp->ns_cc < NFSX_UNSIGNED) {
5719 slp->ns_flag &= ~SLP_GETSTREAM;
5720 return (0);
5721 }
5722 m = slp->ns_raw;
5723 mdata = mbuf_data(m);
5724 mlen = mbuf_len(m);
5725 if (mlen >= NFSX_UNSIGNED) {
5726 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
5727 mdata += NFSX_UNSIGNED;
5728 mlen -= NFSX_UNSIGNED;
5729 mbuf_setdata(m, mdata, mlen);
5730 } else {
5731 cp1 = (caddr_t)&recmark;
5732 cp2 = mdata;
5733 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
5734 while (mlen == 0) {
5735 m = mbuf_next(m);
5736 cp2 = mbuf_data(m);
5737 mlen = mbuf_len(m);
5738 }
5739 *cp1++ = *cp2++;
5740 mlen--;
5741 mbuf_setdata(m, cp2, mlen);
5742 }
5743 }
5744 slp->ns_cc -= NFSX_UNSIGNED;
5745 recmark = ntohl(recmark);
5746 slp->ns_reclen = recmark & ~0x80000000;
5747 if (recmark & 0x80000000)
5748 slp->ns_flag |= SLP_LASTFRAG;
5749 else
5750 slp->ns_flag &= ~SLP_LASTFRAG;
5751 if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) {
5752 slp->ns_flag &= ~SLP_GETSTREAM;
5753 return (EPERM);
5754 }
5755 }
5756
5757 /*
5758 * Now get the record part.
5759 *
5760 * Note that slp->ns_reclen may be 0. Linux sometimes
5761 * generates 0-length RPCs
5762 */
5763 recm = NULL;
5764 if (slp->ns_cc == slp->ns_reclen) {
5765 recm = slp->ns_raw;
5766 slp->ns_raw = slp->ns_rawend = NULL;
5767 slp->ns_cc = slp->ns_reclen = 0;
5768 } else if (slp->ns_cc > slp->ns_reclen) {
5769 len = 0;
5770 m = slp->ns_raw;
5771 mlen = mbuf_len(m);
5772 mdata = mbuf_data(m);
5773 om = NULL;
5774 while (len < slp->ns_reclen) {
5775 if ((len + mlen) > slp->ns_reclen) {
5776 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
5777 slp->ns_flag &= ~SLP_GETSTREAM;
5778 return (EWOULDBLOCK);
5779 }
5780 if (om) {
5781 if (mbuf_setnext(om, m2)) {
5782 /* trouble... just drop it */
5783 printf("nfsrv_getstream: mbuf_setnext failed\n");
5784 mbuf_freem(m2);
5785 slp->ns_flag &= ~SLP_GETSTREAM;
5786 return (EWOULDBLOCK);
5787 }
5788 recm = slp->ns_raw;
5789 } else {
5790 recm = m2;
5791 }
5792 mdata += slp->ns_reclen - len;
5793 mlen -= slp->ns_reclen - len;
5794 mbuf_setdata(m, mdata, mlen);
5795 len = slp->ns_reclen;
5796 } else if ((len + mlen) == slp->ns_reclen) {
5797 om = m;
5798 len += mlen;
5799 m = mbuf_next(m);
5800 recm = slp->ns_raw;
5801 if (mbuf_setnext(om, NULL)) {
5802 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
5803 slp->ns_flag &= ~SLP_GETSTREAM;
5804 return (EWOULDBLOCK);
5805 }
5806 mlen = mbuf_len(m);
5807 mdata = mbuf_data(m);
5808 } else {
5809 om = m;
5810 len += mlen;
5811 m = mbuf_next(m);
5812 mlen = mbuf_len(m);
5813 mdata = mbuf_data(m);
5814 }
5815 }
5816 slp->ns_raw = m;
5817 slp->ns_cc -= len;
5818 slp->ns_reclen = 0;
5819 } else {
5820 slp->ns_flag &= ~SLP_GETSTREAM;
5821 return (0);
5822 }
5823
5824 /*
5825 * Accumulate the fragments into a record.
5826 */
5827 if (slp->ns_frag == NULL) {
5828 slp->ns_frag = recm;
5829 } else {
5830 m = slp->ns_frag;
5831 while ((m2 = mbuf_next(m)))
5832 m = m2;
5833 if ((error = mbuf_setnext(m, recm)))
5834 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
5835 }
5836 if (slp->ns_flag & SLP_LASTFRAG) {
5837 if (slp->ns_recend)
5838 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
5839 else {
5840 slp->ns_rec = slp->ns_frag;
5841 slp->ns_flag |= SLP_DOREC;
5842 }
5843 slp->ns_recend = slp->ns_frag;
5844 slp->ns_frag = NULL;
5845 }
5846 }
5847 }
5848
5849 /*
5850 * Parse an RPC header.
5851 */
5852 int
5853 nfsrv_dorec(
5854 struct nfsrv_sock *slp,
5855 struct nfsd *nfsd,
5856 struct nfsrv_descript **ndp)
5857 {
5858 mbuf_t m;
5859 mbuf_t nam;
5860 struct nfsrv_descript *nd;
5861 int error = 0;
5862
5863 *ndp = NULL;
5864 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL))
5865 return (ENOBUFS);
5866 MALLOC_ZONE(nd, struct nfsrv_descript *,
5867 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
5868 if (!nd)
5869 return (ENOMEM);
5870 m = slp->ns_rec;
5871 slp->ns_rec = mbuf_nextpkt(m);
5872 if (slp->ns_rec)
5873 mbuf_setnextpkt(m, NULL);
5874 else {
5875 slp->ns_flag &= ~SLP_DOREC;
5876 slp->ns_recend = NULL;
5877 }
5878 slp->ns_reccnt--;
5879 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
5880 nam = m;
5881 m = mbuf_next(m);
5882 if ((error = mbuf_setnext(nam, NULL)))
5883 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
5884 } else
5885 nam = NULL;
5886 nd->nd_nam2 = nam;
5887 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
5888 if (!error)
5889 error = nfsrv_getreq(nd);
5890 if (error) {
5891 if (nam)
5892 mbuf_freem(nam);
5893 if (nd->nd_gss_context)
5894 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
5895 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
5896 return (error);
5897 }
5898 nd->nd_mrep = NULL;
5899 *ndp = nd;
5900 nfsd->nfsd_nd = nd;
5901 return (0);
5902 }
5903
5904 /*
5905 * Parse an RPC request
5906 * - verify it
5907 * - fill in the cred struct.
5908 */
5909 int
5910 nfsrv_getreq(struct nfsrv_descript *nd)
5911 {
5912 struct nfsm_chain *nmreq;
5913 int len, i;
5914 u_int32_t nfsvers, auth_type;
5915 int error = 0;
5916 uid_t user_id;
5917 gid_t group_id;
5918 int ngroups;
5919 uint32_t val;
5920
5921 nd->nd_cr = NULL;
5922 nd->nd_gss_context = NULL;
5923 nd->nd_gss_seqnum = 0;
5924 nd->nd_gss_mb = NULL;
5925
5926 user_id = group_id = -2;
5927 val = auth_type = len = 0;
5928
5929 nmreq = &nd->nd_nmreq;
5930 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
5931 nfsm_chain_get_32(error, nmreq, val); // RPC Call
5932 if (!error && (val != RPC_CALL))
5933 error = EBADRPC;
5934 nfsmout_if(error);
5935 nd->nd_repstat = 0;
5936 nfsm_chain_get_32(error, nmreq, val); // RPC Version
5937 nfsmout_if(error);
5938 if (val != RPC_VER2) {
5939 nd->nd_repstat = ERPCMISMATCH;
5940 nd->nd_procnum = NFSPROC_NOOP;
5941 return (0);
5942 }
5943 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number
5944 nfsmout_if(error);
5945 if (val != NFS_PROG) {
5946 nd->nd_repstat = EPROGUNAVAIL;
5947 nd->nd_procnum = NFSPROC_NOOP;
5948 return (0);
5949 }
5950 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
5951 nfsmout_if(error);
5952 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
5953 nd->nd_repstat = EPROGMISMATCH;
5954 nd->nd_procnum = NFSPROC_NOOP;
5955 return (0);
5956 }
5957 nd->nd_vers = nfsvers;
5958 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
5959 nfsmout_if(error);
5960 if ((nd->nd_procnum >= NFS_NPROCS) ||
5961 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
5962 nd->nd_repstat = EPROCUNAVAIL;
5963 nd->nd_procnum = NFSPROC_NOOP;
5964 return (0);
5965 }
5966 if (nfsvers != NFS_VER3)
5967 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
5968 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor
5969 nfsm_chain_get_32(error, nmreq, len); // Auth Length
5970 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ))
5971 error = EBADRPC;
5972 nfsmout_if(error);
5973
5974 /* Handle authentication */
5975 if (auth_type == RPCAUTH_SYS) {
5976 struct posix_cred temp_pcred;
5977 if (nd->nd_procnum == NFSPROC_NULL)
5978 return (0);
5979 nd->nd_sec = RPCAUTH_SYS;
5980 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp
5981 nfsm_chain_get_32(error, nmreq, len); // hostname length
5982 if (len < 0 || len > NFS_MAXNAMLEN)
5983 error = EBADRPC;
5984 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname
5985 nfsmout_if(error);
5986
5987 /* create a temporary credential using the bits from the wire */
5988 bzero(&temp_pcred, sizeof(temp_pcred));
5989 nfsm_chain_get_32(error, nmreq, user_id);
5990 nfsm_chain_get_32(error, nmreq, group_id);
5991 temp_pcred.cr_groups[0] = group_id;
5992 nfsm_chain_get_32(error, nmreq, len); // extra GID count
5993 if ((len < 0) || (len > RPCAUTH_UNIXGIDS))
5994 error = EBADRPC;
5995 nfsmout_if(error);
5996 for (i = 1; i <= len; i++)
5997 if (i < NGROUPS)
5998 nfsm_chain_get_32(error, nmreq, temp_pcred.cr_groups[i]);
5999 else
6000 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
6001 nfsmout_if(error);
6002 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
6003 if (ngroups > 1)
6004 nfsrv_group_sort(&temp_pcred.cr_groups[0], ngroups);
6005 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
6006 nfsm_chain_get_32(error, nmreq, len); // verifier length
6007 if (len < 0 || len > RPCAUTH_MAXSIZ)
6008 error = EBADRPC;
6009 if (len > 0)
6010 nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
6011
6012 /* request creation of a real credential */
6013 temp_pcred.cr_uid = user_id;
6014 temp_pcred.cr_ngroups = ngroups;
6015 nd->nd_cr = posix_cred_create(&temp_pcred);
6016 if (nd->nd_cr == NULL) {
6017 nd->nd_repstat = ENOMEM;
6018 nd->nd_procnum = NFSPROC_NOOP;
6019 return (0);
6020 }
6021 } else if (auth_type == RPCSEC_GSS) {
6022 error = nfs_gss_svc_cred_get(nd, nmreq);
6023 if (error) {
6024 if (error == EINVAL)
6025 goto nfsmout; // drop the request
6026 nd->nd_repstat = error;
6027 nd->nd_procnum = NFSPROC_NOOP;
6028 return (0);
6029 }
6030 } else {
6031 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE
6032 return (0);
6033 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
6034 nd->nd_procnum = NFSPROC_NOOP;
6035 return (0);
6036 }
6037 return (0);
6038 nfsmout:
6039 if (IS_VALID_CRED(nd->nd_cr))
6040 kauth_cred_unref(&nd->nd_cr);
6041 nfsm_chain_cleanup(nmreq);
6042 return (error);
6043 }
6044
6045 /*
6046 * Search for a sleeping nfsd and wake it up.
6047 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
6048 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
6049 * Note: Must be called with nfsd_mutex held.
6050 */
6051 void
6052 nfsrv_wakenfsd(struct nfsrv_sock *slp)
6053 {
6054 struct nfsd *nd;
6055
6056 if ((slp->ns_flag & SLP_VALID) == 0)
6057 return;
6058
6059 lck_rw_lock_exclusive(&slp->ns_rwlock);
6060 /* if there's work to do on this socket, make sure it's queued up */
6061 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
6062 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
6063 slp->ns_flag |= SLP_WAITQ;
6064 }
6065 lck_rw_done(&slp->ns_rwlock);
6066
6067 /* wake up a waiting nfsd, if possible */
6068 nd = TAILQ_FIRST(&nfsd_queue);
6069 if (!nd)
6070 return;
6071
6072 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
6073 nd->nfsd_flag &= ~NFSD_WAITING;
6074 wakeup(nd);
6075 }
6076
6077 #endif /* NFSSERVER */