]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66 */
67
68 #include <nfs/nfs_conf.h>
69 #if CONFIG_NFS
70
71 /*
72 * Socket operations for use by nfs
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/signalvar.h>
79 #include <sys/kauth.h>
80 #include <sys/mount_internal.h>
81 #include <sys/kernel.h>
82 #include <sys/kpi_mbuf.h>
83 #include <sys/malloc.h>
84 #include <sys/vnode.h>
85 #include <sys/domain.h>
86 #include <sys/protosw.h>
87 #include <sys/socket.h>
88 #include <sys/un.h>
89 #include <sys/syslog.h>
90 #include <sys/tprintf.h>
91 #include <libkern/OSAtomic.h>
92
93 #include <sys/time.h>
94 #include <kern/clock.h>
95 #include <kern/task.h>
96 #include <kern/thread.h>
97 #include <kern/thread_call.h>
98 #include <sys/user.h>
99 #include <sys/acct.h>
100
101 #include <netinet/in.h>
102 #include <netinet/tcp.h>
103
104 #include <nfs/rpcv2.h>
105 #include <nfs/krpc.h>
106 #include <nfs/nfsproto.h>
107 #include <nfs/nfs.h>
108 #include <nfs/xdr_subs.h>
109 #include <nfs/nfsm_subs.h>
110 #include <nfs/nfs_gss.h>
111 #include <nfs/nfsmount.h>
112 #include <nfs/nfsnode.h>
113
114 #define NFS_SOCK_DBG(...) NFS_DBG(NFS_FAC_SOCK, 7, ## __VA_ARGS__)
115 #define NFS_SOCK_DUMP_MBUF(msg, mb) if (NFS_IS_DBG(NFS_FAC_SOCK, 15)) nfs_dump_mbuf(__func__, __LINE__, (msg), (mb))
116
117 /* XXX */
118 boolean_t current_thread_aborted(void);
119 kern_return_t thread_terminate(thread_t);
120
121
122 #if CONFIG_NFS_SERVER
123 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
124
125 int nfsrv_getstream(struct nfsrv_sock *, int);
126 int nfsrv_getreq(struct nfsrv_descript *);
127 extern int nfsv3_procid[NFS_NPROCS];
128 #endif /* CONFIG_NFS_SERVER */
129
130 /*
131 * compare two sockaddr structures
132 */
133 int
134 nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2)
135 {
136 if (!sa1) {
137 return -1;
138 }
139 if (!sa2) {
140 return 1;
141 }
142 if (sa1->sa_family != sa2->sa_family) {
143 return (sa1->sa_family < sa2->sa_family) ? -1 : 1;
144 }
145 if (sa1->sa_len != sa2->sa_len) {
146 return (sa1->sa_len < sa2->sa_len) ? -1 : 1;
147 }
148 if (sa1->sa_family == AF_INET) {
149 return bcmp(&((struct sockaddr_in*)sa1)->sin_addr,
150 &((struct sockaddr_in*)sa2)->sin_addr, sizeof(((struct sockaddr_in*)sa1)->sin_addr));
151 }
152 if (sa1->sa_family == AF_INET6) {
153 return bcmp(&((struct sockaddr_in6*)sa1)->sin6_addr,
154 &((struct sockaddr_in6*)sa2)->sin6_addr, sizeof(((struct sockaddr_in6*)sa1)->sin6_addr));
155 }
156 return -1;
157 }
158
159 #if CONFIG_NFS_CLIENT
160
161 int nfs_connect_search_new_socket(struct nfsmount *, struct nfs_socket_search *, struct timeval *);
162 int nfs_connect_search_socket_connect(struct nfsmount *, struct nfs_socket *, int);
163 int nfs_connect_search_ping(struct nfsmount *, struct nfs_socket *, struct timeval *);
164 void nfs_connect_search_socket_found(struct nfsmount *, struct nfs_socket_search *, struct nfs_socket *);
165 void nfs_connect_search_socket_reap(struct nfsmount *, struct nfs_socket_search *, struct timeval *);
166 int nfs_connect_search_check(struct nfsmount *, struct nfs_socket_search *, struct timeval *);
167 int nfs_reconnect(struct nfsmount *);
168 int nfs_connect_setup(struct nfsmount *);
169 void nfs_mount_sock_thread(void *, wait_result_t);
170 void nfs_udp_rcv(socket_t, void*, int);
171 void nfs_tcp_rcv(socket_t, void*, int);
172 void nfs_sock_poke(struct nfsmount *);
173 void nfs_request_match_reply(struct nfsmount *, mbuf_t);
174 void nfs_reqdequeue(struct nfsreq *);
175 void nfs_reqbusy(struct nfsreq *);
176 struct nfsreq *nfs_reqnext(struct nfsreq *);
177 int nfs_wait_reply(struct nfsreq *);
178 void nfs_softterm(struct nfsreq *);
179 int nfs_can_squish(struct nfsmount *);
180 int nfs_is_squishy(struct nfsmount *);
181 int nfs_is_dead(int, struct nfsmount *);
182
183 /*
184 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
185 * Use the mean and mean deviation of rtt for the appropriate type of rpc
186 * for the frequent rpcs and a default for the others.
187 * The justification for doing "other" this way is that these rpcs
188 * happen so infrequently that timer est. would probably be stale.
189 * Also, since many of these rpcs are
190 * non-idempotent, a conservative timeout is desired.
191 * getattr, lookup - A+2D
192 * read, write - A+4D
193 * other - nm_timeo
194 */
195 #define NFS_RTO(n, t) \
196 ((t) == 0 ? (n)->nm_timeo : \
197 ((t) < 3 ? \
198 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
199 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
200 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
201 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
202
203 /*
204 * Defines which timer to use for the procnum.
205 * 0 - default
206 * 1 - getattr
207 * 2 - lookup
208 * 3 - read
209 * 4 - write
210 */
211 static const int proct[] = {
212 [NFSPROC_NULL] = 0,
213 [NFSPROC_GETATTR] = 1,
214 [NFSPROC_SETATTR] = 0,
215 [NFSPROC_LOOKUP] = 2,
216 [NFSPROC_ACCESS] = 1,
217 [NFSPROC_READLINK] = 3,
218 [NFSPROC_READ] = 3,
219 [NFSPROC_WRITE] = 4,
220 [NFSPROC_CREATE] = 0,
221 [NFSPROC_MKDIR] = 0,
222 [NFSPROC_SYMLINK] = 0,
223 [NFSPROC_MKNOD] = 0,
224 [NFSPROC_REMOVE] = 0,
225 [NFSPROC_RMDIR] = 0,
226 [NFSPROC_RENAME] = 0,
227 [NFSPROC_LINK] = 0,
228 [NFSPROC_READDIR] = 3,
229 [NFSPROC_READDIRPLUS] = 3,
230 [NFSPROC_FSSTAT] = 0,
231 [NFSPROC_FSINFO] = 0,
232 [NFSPROC_PATHCONF] = 0,
233 [NFSPROC_COMMIT] = 0,
234 [NFSPROC_NOOP] = 0,
235 };
236
237 /*
238 * There is a congestion window for outstanding rpcs maintained per mount
239 * point. The cwnd size is adjusted in roughly the way that:
240 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
241 * SIGCOMM '88". ACM, August 1988.
242 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
243 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
244 * of rpcs is in progress.
245 * (The sent count and cwnd are scaled for integer arith.)
246 * Variants of "slow start" were tried and were found to be too much of a
247 * performance hit (ave. rtt 3 times larger),
248 * I suspect due to the large rtt that nfs rpcs have.
249 */
250 #define NFS_CWNDSCALE 256
251 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
252 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
253
254 /*
255 * Increment location index to next address/server/location.
256 */
257 void
258 nfs_location_next(struct nfs_fs_locations *nlp, struct nfs_location_index *nlip)
259 {
260 uint8_t loc = nlip->nli_loc;
261 uint8_t serv = nlip->nli_serv;
262 uint8_t addr = nlip->nli_addr;
263
264 /* move to next address */
265 addr++;
266 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) {
267 /* no more addresses on current server, go to first address of next server */
268 next_server:
269 addr = 0;
270 serv++;
271 if (serv >= nlp->nl_locations[loc]->nl_servcount) {
272 /* no more servers on current location, go to first server of next location */
273 serv = 0;
274 loc++;
275 if (loc >= nlp->nl_numlocs) {
276 loc = 0; /* after last location, wrap back around to first location */
277 }
278 }
279 }
280 /*
281 * It's possible for this next server to not have any addresses.
282 * Check for that here and go to the next server.
283 * But bail out if we've managed to come back around to the original
284 * location that was passed in. (That would mean no servers had any
285 * addresses. And we don't want to spin here forever.)
286 */
287 if ((loc == nlip->nli_loc) && (serv == nlip->nli_serv) && (addr == nlip->nli_addr)) {
288 return;
289 }
290 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) {
291 goto next_server;
292 }
293
294 nlip->nli_loc = loc;
295 nlip->nli_serv = serv;
296 nlip->nli_addr = addr;
297 }
298
299 /*
300 * Compare two location indices.
301 */
302 int
303 nfs_location_index_cmp(struct nfs_location_index *nlip1, struct nfs_location_index *nlip2)
304 {
305 if (nlip1->nli_loc != nlip2->nli_loc) {
306 return nlip1->nli_loc - nlip2->nli_loc;
307 }
308 if (nlip1->nli_serv != nlip2->nli_serv) {
309 return nlip1->nli_serv - nlip2->nli_serv;
310 }
311 return nlip1->nli_addr - nlip2->nli_addr;
312 }
313
314 /*
315 * Get the mntfromname (or path portion only) for a given location.
316 */
317 void
318 nfs_location_mntfromname(struct nfs_fs_locations *locs, struct nfs_location_index idx, char *s, int size, int pathonly)
319 {
320 struct nfs_fs_location *fsl = locs->nl_locations[idx.nli_loc];
321 char *p;
322 int cnt, i;
323
324 p = s;
325 if (!pathonly) {
326 char *name = fsl->nl_servers[idx.nli_serv]->ns_name;
327 if (name == NULL) {
328 name = "";
329 }
330 if (*name == '\0') {
331 if (*fsl->nl_servers[idx.nli_serv]->ns_addresses[idx.nli_addr]) {
332 name = fsl->nl_servers[idx.nli_serv]->ns_addresses[idx.nli_addr];
333 }
334 cnt = scnprintf(p, size, "<%s>:", name);
335 } else {
336 cnt = scnprintf(p, size, "%s:", name);
337 }
338 p += cnt;
339 size -= cnt;
340 }
341 if (fsl->nl_path.np_compcount == 0) {
342 /* mounting root export on server */
343 if (size > 0) {
344 *p++ = '/';
345 *p++ = '\0';
346 }
347 return;
348 }
349 /* append each server path component */
350 for (i = 0; (size > 0) && (i < (int)fsl->nl_path.np_compcount); i++) {
351 cnt = scnprintf(p, size, "/%s", fsl->nl_path.np_components[i]);
352 p += cnt;
353 size -= cnt;
354 }
355 }
356
357 /*
358 * NFS client connect socket upcall.
359 * (Used only during socket connect/search.)
360 */
361 void
362 nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag)
363 {
364 struct nfs_socket *nso = arg;
365 size_t rcvlen;
366 mbuf_t m;
367 int error = 0, recv = 1;
368
369 if (nso->nso_flags & NSO_CONNECTING) {
370 NFS_SOCK_DBG("nfs connect - socket %p upcall - connecting flags = %8.8x\n", nso, nso->nso_flags);
371 wakeup(nso->nso_wake);
372 return;
373 }
374
375 lck_mtx_lock(&nso->nso_lock);
376 if ((nso->nso_flags & (NSO_UPCALL | NSO_DISCONNECTING | NSO_DEAD)) || !(nso->nso_flags & NSO_PINGING)) {
377 NFS_SOCK_DBG("nfs connect - socket %p upcall - nevermind\n", nso);
378 lck_mtx_unlock(&nso->nso_lock);
379 return;
380 }
381 NFS_SOCK_DBG("nfs connect - socket %p upcall %8.8x\n", nso, nso->nso_flags);
382 nso->nso_flags |= NSO_UPCALL;
383
384 /* loop while we make error-free progress */
385 while (!error && recv) {
386 /* make sure we're still interested in this socket */
387 if (nso->nso_flags & (NSO_DISCONNECTING | NSO_DEAD)) {
388 break;
389 }
390 lck_mtx_unlock(&nso->nso_lock);
391 m = NULL;
392 if (nso->nso_sotype == SOCK_STREAM) {
393 error = nfs_rpc_record_read(so, &nso->nso_rrs, MSG_DONTWAIT, &recv, &m);
394 NFS_SOCK_DBG("nfs_rpc_record_read returned %d recv = %d\n", error, recv);
395 } else {
396 rcvlen = 1000000;
397 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
398 recv = m ? 1 : 0;
399 }
400 lck_mtx_lock(&nso->nso_lock);
401 if (m) {
402 /* match response with request */
403 struct nfsm_chain nmrep;
404 uint32_t reply = 0, rxid = 0, verf_type, verf_len;
405 uint32_t reply_status, rejected_status, accepted_status;
406
407 NFS_SOCK_DUMP_MBUF("Got mbuf from ping", m);
408 nfsm_chain_dissect_init(error, &nmrep, m);
409 nfsm_chain_get_32(error, &nmrep, rxid);
410 nfsm_chain_get_32(error, &nmrep, reply);
411 if (!error && ((reply != RPC_REPLY) || (rxid != nso->nso_pingxid))) {
412 error = EBADRPC;
413 }
414 nfsm_chain_get_32(error, &nmrep, reply_status);
415 if (!error && (reply_status == RPC_MSGDENIED)) {
416 nfsm_chain_get_32(error, &nmrep, rejected_status);
417 if (!error) {
418 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES;
419 }
420 }
421 nfsm_chain_get_32(error, &nmrep, verf_type); /* verifier flavor */
422 nfsm_chain_get_32(error, &nmrep, verf_len); /* verifier length */
423 nfsmout_if(error);
424 if (verf_len) {
425 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
426 }
427 nfsm_chain_get_32(error, &nmrep, accepted_status);
428 nfsmout_if(error);
429 NFS_SOCK_DBG("Recevied accepted_status of %d nso_version = %d\n", accepted_status, nso->nso_version);
430 if ((accepted_status == RPC_PROGMISMATCH) && !nso->nso_version) {
431 uint32_t minvers, maxvers;
432 nfsm_chain_get_32(error, &nmrep, minvers);
433 nfsm_chain_get_32(error, &nmrep, maxvers);
434 nfsmout_if(error);
435 if (nso->nso_protocol == PMAPPROG) {
436 if ((minvers > RPCBVERS4) || (maxvers < PMAPVERS)) {
437 error = EPROGMISMATCH;
438 } else if ((nso->nso_saddr->sa_family == AF_INET) &&
439 (PMAPVERS >= minvers) && (PMAPVERS <= maxvers)) {
440 nso->nso_version = PMAPVERS;
441 } else if (nso->nso_saddr->sa_family == AF_INET6) {
442 if ((RPCBVERS4 >= minvers) && (RPCBVERS4 <= maxvers)) {
443 nso->nso_version = RPCBVERS4;
444 } else if ((RPCBVERS3 >= minvers) && (RPCBVERS3 <= maxvers)) {
445 nso->nso_version = RPCBVERS3;
446 }
447 }
448 } else if (nso->nso_protocol == NFS_PROG) {
449 int vers;
450
451 /*
452 * N.B. Both portmapper and rpcbind V3 are happy to return
453 * addresses for other versions than the one you ask (getport or
454 * getaddr) and thus we may have fallen to this code path. So if
455 * we get a version that we support, use highest supported
456 * version. This assumes that the server supports all versions
457 * between minvers and maxvers. Note for IPv6 we will try and
458 * use rpcbind V4 which has getversaddr and we should not get
459 * here if that was successful.
460 */
461 for (vers = nso->nso_nfs_max_vers; vers >= (int)nso->nso_nfs_min_vers; vers--) {
462 if (vers >= (int)minvers && vers <= (int)maxvers) {
463 break;
464 }
465 }
466 nso->nso_version = (vers < (int)nso->nso_nfs_min_vers) ? 0 : vers;
467 }
468 if (!error && nso->nso_version) {
469 accepted_status = RPC_SUCCESS;
470 }
471 }
472 if (!error) {
473 switch (accepted_status) {
474 case RPC_SUCCESS:
475 error = 0;
476 break;
477 case RPC_PROGUNAVAIL:
478 error = EPROGUNAVAIL;
479 break;
480 case RPC_PROGMISMATCH:
481 error = EPROGMISMATCH;
482 break;
483 case RPC_PROCUNAVAIL:
484 error = EPROCUNAVAIL;
485 break;
486 case RPC_GARBAGE:
487 error = EBADRPC;
488 break;
489 case RPC_SYSTEM_ERR:
490 default:
491 error = EIO;
492 break;
493 }
494 }
495 nfsmout:
496 nso->nso_flags &= ~NSO_PINGING;
497 if (error) {
498 NFS_SOCK_DBG("nfs upcalled failed for %d program %d vers error = %d\n",
499 nso->nso_protocol, nso->nso_version, error);
500 nso->nso_error = error;
501 nso->nso_flags |= NSO_DEAD;
502 } else {
503 nso->nso_flags |= NSO_VERIFIED;
504 }
505 mbuf_freem(m);
506 /* wake up search thread */
507 wakeup(nso->nso_wake);
508 break;
509 }
510 }
511
512 nso->nso_flags &= ~NSO_UPCALL;
513 if ((error != EWOULDBLOCK) && (error || !recv)) {
514 /* problems with the socket... */
515 NFS_SOCK_DBG("connect upcall failed %d\n", error);
516 nso->nso_error = error ? error : EPIPE;
517 nso->nso_flags |= NSO_DEAD;
518 wakeup(nso->nso_wake);
519 }
520 if (nso->nso_flags & NSO_DISCONNECTING) {
521 wakeup(&nso->nso_flags);
522 }
523 lck_mtx_unlock(&nso->nso_lock);
524 }
525
526 /*
527 * Create/initialize an nfs_socket structure.
528 */
529 int
530 nfs_socket_create(
531 struct nfsmount *nmp,
532 struct sockaddr *sa,
533 int sotype,
534 in_port_t port,
535 uint32_t protocol,
536 uint32_t vers,
537 int resvport,
538 struct nfs_socket **nsop)
539 {
540 struct nfs_socket *nso;
541 struct timeval now;
542 int error;
543 #define NFS_SOCKET_DEBUGGING
544 #ifdef NFS_SOCKET_DEBUGGING
545 char naddr[sizeof((struct sockaddr_un *)0)->sun_path];
546 void *sinaddr;
547
548 switch (sa->sa_family) {
549 case AF_INET:
550 case AF_INET6:
551 if (sa->sa_family == AF_INET) {
552 sinaddr = &((struct sockaddr_in*)sa)->sin_addr;
553 } else {
554 sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr;
555 }
556 if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr) {
557 strlcpy(naddr, "<unknown>", sizeof(naddr));
558 }
559 break;
560 case AF_LOCAL:
561 strlcpy(naddr, ((struct sockaddr_un *)sa)->sun_path, sizeof(naddr));
562 break;
563 default:
564 strlcpy(naddr, "<unsupported address family>", sizeof(naddr));
565 break;
566 }
567 #else
568 char naddr[1] = { 0 };
569 #endif
570
571 *nsop = NULL;
572
573 /* Create the socket. */
574 MALLOC(nso, struct nfs_socket *, sizeof(struct nfs_socket), M_TEMP, M_WAITOK | M_ZERO);
575 if (nso) {
576 MALLOC(nso->nso_saddr, struct sockaddr *, sa->sa_len, M_SONAME, M_WAITOK | M_ZERO);
577 }
578 if (!nso || !nso->nso_saddr) {
579 if (nso) {
580 FREE(nso, M_TEMP);
581 }
582 return ENOMEM;
583 }
584 lck_mtx_init(&nso->nso_lock, nfs_request_grp, LCK_ATTR_NULL);
585 nso->nso_sotype = sotype;
586 if (nso->nso_sotype == SOCK_STREAM) {
587 nfs_rpc_record_state_init(&nso->nso_rrs);
588 }
589 microuptime(&now);
590 nso->nso_timestamp = now.tv_sec;
591 bcopy(sa, nso->nso_saddr, sa->sa_len);
592 switch (sa->sa_family) {
593 case AF_INET:
594 case AF_INET6:
595 if (sa->sa_family == AF_INET) {
596 ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port);
597 } else if (sa->sa_family == AF_INET6) {
598 ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port);
599 }
600 break;
601 case AF_LOCAL:
602 break;
603 }
604 nso->nso_protocol = protocol;
605 nso->nso_version = vers;
606 nso->nso_nfs_min_vers = PVER2MAJOR(nmp->nm_min_vers);
607 nso->nso_nfs_max_vers = PVER2MAJOR(nmp->nm_max_vers);
608
609 error = sock_socket(sa->sa_family, nso->nso_sotype, 0, NULL, NULL, &nso->nso_so);
610
611 /* Some servers require that the client port be a reserved port number. */
612 if (!error && resvport && ((sa->sa_family == AF_INET) || (sa->sa_family == AF_INET6))) {
613 struct sockaddr_storage ss;
614 int level = (sa->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
615 int optname = (sa->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE;
616 int portrange = IP_PORTRANGE_LOW;
617
618 error = sock_setsockopt(nso->nso_so, level, optname, &portrange, sizeof(portrange));
619 if (!error) { /* bind now to check for failure */
620 ss.ss_len = sa->sa_len;
621 ss.ss_family = sa->sa_family;
622 if (ss.ss_family == AF_INET) {
623 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY;
624 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
625 } else if (ss.ss_family == AF_INET6) {
626 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any;
627 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
628 } else {
629 error = EINVAL;
630 }
631 if (!error) {
632 error = sock_bind(nso->nso_so, (struct sockaddr*)&ss);
633 }
634 }
635 }
636
637 if (error) {
638 NFS_SOCK_DBG("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n",
639 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nso, naddr, sotype,
640 resvport ? "r" : "", port, protocol, vers);
641 nfs_socket_destroy(nso);
642 } else {
643 NFS_SOCK_DBG("nfs connect %s created socket %p <%s> type %d%s port %d prot %d %d\n",
644 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, naddr,
645 sotype, resvport ? "r" : "", port, protocol, vers);
646 *nsop = nso;
647 }
648 return error;
649 }
650
651 /*
652 * Destroy an nfs_socket structure.
653 */
654 void
655 nfs_socket_destroy(struct nfs_socket *nso)
656 {
657 struct timespec ts = { .tv_sec = 4, .tv_nsec = 0 };
658
659 NFS_SOCK_DBG("Destoring socket %p flags = %8.8x error = %d\n", nso, nso->nso_flags, nso->nso_error);
660 lck_mtx_lock(&nso->nso_lock);
661 nso->nso_flags |= NSO_DISCONNECTING;
662 if (nso->nso_flags & NSO_UPCALL) { /* give upcall a chance to complete */
663 msleep(&nso->nso_flags, &nso->nso_lock, PZERO - 1, "nfswaitupcall", &ts);
664 }
665 lck_mtx_unlock(&nso->nso_lock);
666 sock_shutdown(nso->nso_so, SHUT_RDWR);
667 sock_close(nso->nso_so);
668 if (nso->nso_sotype == SOCK_STREAM) {
669 nfs_rpc_record_state_cleanup(&nso->nso_rrs);
670 }
671 lck_mtx_destroy(&nso->nso_lock, nfs_request_grp);
672 if (nso->nso_saddr) {
673 FREE(nso->nso_saddr, M_SONAME);
674 }
675 if (nso->nso_saddr2) {
676 FREE(nso->nso_saddr2, M_SONAME);
677 }
678 NFS_SOCK_DBG("nfs connect - socket %p destroyed\n", nso);
679 FREE(nso, M_TEMP);
680 }
681
682 /*
683 * Set common socket options on an nfs_socket.
684 */
685 void
686 nfs_socket_options(struct nfsmount *nmp, struct nfs_socket *nso)
687 {
688 /*
689 * Set socket send/receive timeouts
690 * - Receive timeout shouldn't matter because most receives are performed
691 * in the socket upcall non-blocking.
692 * - Send timeout should allow us to react to a blocked socket.
693 * Soft mounts will want to abort sooner.
694 */
695 struct timeval timeo;
696 int on = 1, proto;
697
698 timeo.tv_usec = 0;
699 timeo.tv_sec = (NMFLAG(nmp, SOFT) || nfs_can_squish(nmp)) ? 5 : 60;
700 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
701 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
702 if (nso->nso_sotype == SOCK_STREAM) {
703 /* Assume that SOCK_STREAM always requires a connection */
704 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
705 /* set nodelay for TCP */
706 sock_gettype(nso->nso_so, NULL, NULL, &proto);
707 if (proto == IPPROTO_TCP) {
708 sock_setsockopt(nso->nso_so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
709 }
710 }
711 if (nso->nso_sotype == SOCK_DGRAM || nso->nso_saddr->sa_family == AF_LOCAL) { /* set socket buffer sizes for UDP */
712 int reserve = (nso->nso_sotype == SOCK_DGRAM) ? NFS_UDPSOCKBUF : (2 * 1024 * 1024);
713 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
714 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
715 }
716 /* set SO_NOADDRERR to detect network changes ASAP */
717 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
718 /* just playin' it safe with upcalls */
719 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
720 /* socket should be interruptible if the mount is */
721 if (!NMFLAG(nmp, INTR)) {
722 sock_nointerrupt(nso->nso_so, 1);
723 }
724 }
725
726 /*
727 * Release resources held in an nfs_socket_search.
728 */
729 void
730 nfs_socket_search_cleanup(struct nfs_socket_search *nss)
731 {
732 struct nfs_socket *nso, *nsonext;
733
734 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) {
735 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
736 nss->nss_sockcnt--;
737 nfs_socket_destroy(nso);
738 }
739 if (nss->nss_sock) {
740 nfs_socket_destroy(nss->nss_sock);
741 nss->nss_sock = NULL;
742 }
743 }
744
745 /*
746 * Prefer returning certain errors over others.
747 * This function returns a ranking of the given error.
748 */
749 int
750 nfs_connect_error_class(int error)
751 {
752 switch (error) {
753 case 0:
754 return 0;
755 case ETIMEDOUT:
756 case EAGAIN:
757 return 1;
758 case EPIPE:
759 case EADDRNOTAVAIL:
760 case ENETDOWN:
761 case ENETUNREACH:
762 case ENETRESET:
763 case ECONNABORTED:
764 case ECONNRESET:
765 case EISCONN:
766 case ENOTCONN:
767 case ESHUTDOWN:
768 case ECONNREFUSED:
769 case EHOSTDOWN:
770 case EHOSTUNREACH:
771 return 2;
772 case ERPCMISMATCH:
773 case EPROCUNAVAIL:
774 case EPROGMISMATCH:
775 case EPROGUNAVAIL:
776 return 3;
777 case EBADRPC:
778 return 4;
779 default:
780 return 5;
781 }
782 }
783
784 /*
785 * Make sure a socket search returns the best error.
786 */
787 void
788 nfs_socket_search_update_error(struct nfs_socket_search *nss, int error)
789 {
790 if (nfs_connect_error_class(error) >= nfs_connect_error_class(nss->nss_error)) {
791 nss->nss_error = error;
792 }
793 }
794
795 /* nfs_connect_search_new_socket:
796 * Given a socket search structure for an nfs mount try to find a new socket from the set of addresses specified
797 * by nss.
798 *
799 * nss_last is set to -1 at initialization to indicate the first time. Its set to -2 if address was found but
800 * could not be used or if a socket timed out.
801 */
802 int
803 nfs_connect_search_new_socket(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now)
804 {
805 struct nfs_fs_location *fsl;
806 struct nfs_fs_server *fss;
807 struct sockaddr_storage ss;
808 struct nfs_socket *nso;
809 char *addrstr;
810 int error = 0;
811
812
813 NFS_SOCK_DBG("nfs connect %s nss_addrcnt = %d\n",
814 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss->nss_addrcnt);
815
816 /*
817 * while there are addresses and:
818 * we have no sockets or
819 * the last address failed and did not produce a socket (nss_last < 0) or
820 * Its been a while (2 seconds) and we have less than the max number of concurrent sockets to search (4)
821 * then attempt to create a socket with the current address.
822 */
823 while (nss->nss_addrcnt > 0 && ((nss->nss_last < 0) || (nss->nss_sockcnt == 0) ||
824 ((nss->nss_sockcnt < 4) && (now->tv_sec >= (nss->nss_last + 2))))) {
825 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) {
826 return EINTR;
827 }
828 /* Can we convert the address to a sockaddr? */
829 fsl = nmp->nm_locations.nl_locations[nss->nss_nextloc.nli_loc];
830 fss = fsl->nl_servers[nss->nss_nextloc.nli_serv];
831 addrstr = fss->ns_addresses[nss->nss_nextloc.nli_addr];
832 NFS_SOCK_DBG("Trying address %s for program %d on port %d\n", addrstr, nss->nss_protocol, nss->nss_port);
833 if (*addrstr == '\0') {
834 /*
835 * We have an unspecified local domain address. We use the program to translate to
836 * a well known local transport address. We only support PMAPROG and NFS for this.
837 */
838 if (nss->nss_protocol == PMAPPROG) {
839 addrstr = (nss->nss_sotype == SOCK_DGRAM) ? RPCB_TICLTS_PATH : RPCB_TICOTSORD_PATH;
840 } else if (nss->nss_protocol == NFS_PROG) {
841 addrstr = nmp->nm_nfs_localport;
842 if (!addrstr || *addrstr == '\0') {
843 addrstr = (nss->nss_sotype == SOCK_DGRAM) ? NFS_TICLTS_PATH : NFS_TICOTSORD_PATH;
844 }
845 }
846 NFS_SOCK_DBG("Calling prog %d with <%s>\n", nss->nss_protocol, addrstr);
847 }
848 if (!nfs_uaddr2sockaddr(addrstr, (struct sockaddr*)&ss)) {
849 NFS_SOCK_DBG("Could not convert address %s to socket\n", addrstr);
850 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
851 nss->nss_addrcnt -= 1;
852 nss->nss_last = -2;
853 continue;
854 }
855 /* Check that socket family is acceptable. */
856 if (nmp->nm_sofamily && (ss.ss_family != nmp->nm_sofamily)) {
857 NFS_SOCK_DBG("Skipping socket family %d, want mount family %d\n", ss.ss_family, nmp->nm_sofamily);
858 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
859 nss->nss_addrcnt -= 1;
860 nss->nss_last = -2;
861 continue;
862 }
863
864 /* Create the socket. */
865 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nss->nss_sotype,
866 nss->nss_port, nss->nss_protocol, nss->nss_version,
867 ((nss->nss_protocol == NFS_PROG) && NMFLAG(nmp, RESVPORT)), &nso);
868 if (error) {
869 return error;
870 }
871
872 nso->nso_location = nss->nss_nextloc;
873 nso->nso_wake = nss;
874 error = sock_setupcall(nso->nso_so, nfs_connect_upcall, nso);
875 if (error) {
876 NFS_SOCK_DBG("sock_setupcall failed for socket %p setting nfs_connect_upcall error = %d\n", nso, error);
877 lck_mtx_lock(&nso->nso_lock);
878 nso->nso_error = error;
879 nso->nso_flags |= NSO_DEAD;
880 lck_mtx_unlock(&nso->nso_lock);
881 }
882
883 TAILQ_INSERT_TAIL(&nss->nss_socklist, nso, nso_link);
884 nss->nss_sockcnt++;
885 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
886 nss->nss_addrcnt -= 1;
887
888 nss->nss_last = now->tv_sec;
889 }
890
891 if (nss->nss_addrcnt == 0 && nss->nss_last < 0) {
892 nss->nss_last = now->tv_sec;
893 }
894
895 return error;
896 }
897
898 /*
899 * nfs_connect_search_socket_connect: Connect an nfs socket nso for nfsmount nmp.
900 * If successful set the socket options for the socket as require from the mount.
901 *
902 * Assumes: nso->nso_lock is held on entry and return.
903 */
904 int
905 nfs_connect_search_socket_connect(struct nfsmount *nmp, struct nfs_socket *nso, int verbose)
906 {
907 int error;
908
909 if ((nso->nso_sotype != SOCK_STREAM) && NMFLAG(nmp, NOCONNECT)) {
910 /* no connection needed, just say it's already connected */
911 NFS_SOCK_DBG("nfs connect %s UDP socket %p noconnect\n",
912 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
913 nso->nso_flags |= NSO_CONNECTED;
914 nfs_socket_options(nmp, nso);
915 return 1; /* Socket is connected and setup */
916 } else if (!(nso->nso_flags & NSO_CONNECTING)) {
917 /* initiate the connection */
918 nso->nso_flags |= NSO_CONNECTING;
919 lck_mtx_unlock(&nso->nso_lock);
920 NFS_SOCK_DBG("nfs connect %s connecting socket %p %s\n",
921 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso,
922 nso->nso_saddr->sa_family == AF_LOCAL ? ((struct sockaddr_un*)nso->nso_saddr)->sun_path : "");
923 error = sock_connect(nso->nso_so, nso->nso_saddr, MSG_DONTWAIT);
924 if (error) {
925 NFS_SOCK_DBG("nfs connect %s connecting socket %p returned %d\n",
926 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error);
927 }
928 lck_mtx_lock(&nso->nso_lock);
929 if (error && (error != EINPROGRESS)) {
930 nso->nso_error = error;
931 nso->nso_flags |= NSO_DEAD;
932 return 0;
933 }
934 }
935 if (nso->nso_flags & NSO_CONNECTING) {
936 /* check the connection */
937 if (sock_isconnected(nso->nso_so)) {
938 NFS_SOCK_DBG("nfs connect %s socket %p is connected\n",
939 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
940 nso->nso_flags &= ~NSO_CONNECTING;
941 nso->nso_flags |= NSO_CONNECTED;
942 nfs_socket_options(nmp, nso);
943 return 1; /* Socket is connected and setup */
944 } else {
945 int optlen = sizeof(error);
946 error = 0;
947 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &error, &optlen);
948 if (error) { /* we got an error on the socket */
949 NFS_SOCK_DBG("nfs connect %s socket %p connection error %d\n",
950 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error);
951 if (verbose) {
952 printf("nfs connect socket error %d for %s\n",
953 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
954 }
955 nso->nso_error = error;
956 nso->nso_flags |= NSO_DEAD;
957 return 0;
958 }
959 }
960 }
961
962 return 0; /* Waiting to be connected */
963 }
964
965 /*
966 * nfs_connect_search_ping: Send a null proc on the nso socket.
967 */
968 int
969 nfs_connect_search_ping(struct nfsmount *nmp, struct nfs_socket *nso, struct timeval *now)
970 {
971 /* initiate a NULL RPC request */
972 uint64_t xid = nso->nso_pingxid;
973 mbuf_t m, mreq = NULL;
974 struct msghdr msg;
975 size_t reqlen, sentlen;
976 uint32_t vers = nso->nso_version;
977 int error;
978
979 if (!vers) {
980 if (nso->nso_protocol == PMAPPROG) {
981 vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
982 } else if (nso->nso_protocol == NFS_PROG) {
983 vers = PVER2MAJOR(nmp->nm_max_vers);
984 }
985 }
986 lck_mtx_unlock(&nso->nso_lock);
987 NFS_SOCK_DBG("Pinging socket %p %d %d %d\n", nso, nso->nso_sotype, nso->nso_protocol, vers);
988 error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS,
989 vfs_context_ucred(vfs_context_kernel()), NULL, NULL, &xid, &mreq);
990 lck_mtx_lock(&nso->nso_lock);
991 if (!error) {
992 nso->nso_flags |= NSO_PINGING;
993 nso->nso_pingxid = R_XID32(xid);
994 nso->nso_reqtimestamp = now->tv_sec;
995 bzero(&msg, sizeof(msg));
996 if ((nso->nso_sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so)) {
997 msg.msg_name = nso->nso_saddr;
998 msg.msg_namelen = nso->nso_saddr->sa_len;
999 }
1000 for (reqlen = 0, m = mreq; m; m = mbuf_next(m)) {
1001 reqlen += mbuf_len(m);
1002 }
1003 lck_mtx_unlock(&nso->nso_lock);
1004 NFS_SOCK_DUMP_MBUF("Sending ping packet", mreq);
1005 error = sock_sendmbuf(nso->nso_so, &msg, mreq, 0, &sentlen);
1006 NFS_SOCK_DBG("nfs connect %s verifying socket %p send rv %d\n",
1007 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error);
1008 lck_mtx_lock(&nso->nso_lock);
1009 if (!error && (sentlen != reqlen)) {
1010 error = ETIMEDOUT;
1011 }
1012 }
1013 if (error) {
1014 nso->nso_error = error;
1015 nso->nso_flags |= NSO_DEAD;
1016 return 0;
1017 }
1018
1019 return 1;
1020 }
1021
1022 /*
1023 * nfs_connect_search_socket_found: Take the found socket of the socket search list and assign it to the searched socket.
1024 * Set the nfs socket protocol and version if needed.
1025 */
1026 void
1027 nfs_connect_search_socket_found(struct nfsmount *nmp, struct nfs_socket_search *nss, struct nfs_socket *nso)
1028 {
1029 NFS_SOCK_DBG("nfs connect %s socket %p verified\n",
1030 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
1031 if (!nso->nso_version) {
1032 /* If the version isn't set, the default must have worked. */
1033 if (nso->nso_protocol == PMAPPROG) {
1034 nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
1035 }
1036 if (nso->nso_protocol == NFS_PROG) {
1037 nso->nso_version = PVER2MAJOR(nmp->nm_max_vers);
1038 }
1039 }
1040 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
1041 nss->nss_sockcnt--;
1042 nss->nss_sock = nso;
1043 }
1044
1045 /*
1046 * nfs_connect_search_socket_reap: For each socket in the search list mark any timed out socket as dead and remove from
1047 * the list. Dead socket are then destroyed.
1048 */
1049 void
1050 nfs_connect_search_socket_reap(struct nfsmount *nmp __unused, struct nfs_socket_search *nss, struct timeval *now)
1051 {
1052 struct nfs_socket *nso, *nsonext;
1053
1054 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) {
1055 lck_mtx_lock(&nso->nso_lock);
1056 if (now->tv_sec >= (nso->nso_timestamp + nss->nss_timeo)) {
1057 /* took too long */
1058 NFS_SOCK_DBG("nfs connect %s socket %p timed out\n",
1059 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
1060 nso->nso_error = ETIMEDOUT;
1061 nso->nso_flags |= NSO_DEAD;
1062 }
1063 if (!(nso->nso_flags & NSO_DEAD)) {
1064 lck_mtx_unlock(&nso->nso_lock);
1065 continue;
1066 }
1067 lck_mtx_unlock(&nso->nso_lock);
1068 NFS_SOCK_DBG("nfs connect %s reaping socket %p error = %d flags = %8.8x\n",
1069 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, nso->nso_error, nso->nso_flags);
1070 nfs_socket_search_update_error(nss, nso->nso_error);
1071 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
1072 nss->nss_sockcnt--;
1073 nfs_socket_destroy(nso);
1074 /* If there are more sockets to try, force the starting of another socket */
1075 if (nss->nss_addrcnt > 0) {
1076 nss->nss_last = -2;
1077 }
1078 }
1079 }
1080
1081 /*
1082 * nfs_connect_search_check: Check on the status of search and wait for replies if needed.
1083 */
1084 int
1085 nfs_connect_search_check(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now)
1086 {
1087 int error;
1088
1089 /* log a warning if connect is taking a while */
1090 if (((now->tv_sec - nss->nss_timestamp) >= 8) && ((nss->nss_flags & (NSS_VERBOSE | NSS_WARNED)) == NSS_VERBOSE)) {
1091 printf("nfs_connect: socket connect taking a while for %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1092 nss->nss_flags |= NSS_WARNED;
1093 }
1094 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) {
1095 return EINTR;
1096 }
1097 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0))) {
1098 return error;
1099 }
1100
1101 /* If we were succesfull at sending a ping, wait up to a second for a reply */
1102 if (nss->nss_last >= 0) {
1103 tsleep(nss, PSOCK, "nfs_connect_search_wait", hz);
1104 }
1105
1106 return 0;
1107 }
1108
1109
1110 /*
1111 * Continue the socket search until we have something to report.
1112 */
1113 int
1114 nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss)
1115 {
1116 struct nfs_socket *nso;
1117 struct timeval now;
1118 int error;
1119 int verbose = (nss->nss_flags & NSS_VERBOSE);
1120
1121 loop:
1122 microuptime(&now);
1123 NFS_SOCK_DBG("nfs connect %s search %ld\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, now.tv_sec);
1124
1125 /* add a new socket to the socket list if needed and available */
1126 error = nfs_connect_search_new_socket(nmp, nss, &now);
1127 if (error) {
1128 NFS_SOCK_DBG("nfs connect returned %d\n", error);
1129 return error;
1130 }
1131
1132 /* check each active socket on the list and try to push it along */
1133 TAILQ_FOREACH(nso, &nss->nss_socklist, nso_link) {
1134 lck_mtx_lock(&nso->nso_lock);
1135
1136 /* If not connected connect it */
1137 if (!(nso->nso_flags & NSO_CONNECTED)) {
1138 if (!nfs_connect_search_socket_connect(nmp, nso, verbose)) {
1139 lck_mtx_unlock(&nso->nso_lock);
1140 continue;
1141 }
1142 }
1143
1144 /* If the socket hasn't been verified or in a ping, ping it. We also handle UDP retransmits */
1145 if (!(nso->nso_flags & (NSO_PINGING | NSO_VERIFIED)) ||
1146 ((nso->nso_sotype == SOCK_DGRAM) && (now.tv_sec >= nso->nso_reqtimestamp + 2))) {
1147 if (!nfs_connect_search_ping(nmp, nso, &now)) {
1148 lck_mtx_unlock(&nso->nso_lock);
1149 continue;
1150 }
1151 }
1152
1153 /* Has the socket been verified by the up call routine? */
1154 if (nso->nso_flags & NSO_VERIFIED) {
1155 /* WOOHOO!! This socket looks good! */
1156 nfs_connect_search_socket_found(nmp, nss, nso);
1157 lck_mtx_unlock(&nso->nso_lock);
1158 break;
1159 }
1160 lck_mtx_unlock(&nso->nso_lock);
1161 }
1162
1163 /* Check for timed out sockets and mark as dead and then remove all dead sockets. */
1164 nfs_connect_search_socket_reap(nmp, nss, &now);
1165
1166 /*
1167 * Keep looping if we haven't found a socket yet and we have more
1168 * sockets to (continue to) try.
1169 */
1170 error = 0;
1171 if (!nss->nss_sock && (!TAILQ_EMPTY(&nss->nss_socklist) || nss->nss_addrcnt)) {
1172 error = nfs_connect_search_check(nmp, nss, &now);
1173 if (!error) {
1174 goto loop;
1175 }
1176 }
1177
1178 NFS_SOCK_DBG("nfs connect %s returning %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, error);
1179 return error;
1180 }
1181
1182 /*
1183 * Initialize a new NFS connection.
1184 *
1185 * Search for a location to connect a socket to and initialize the connection.
1186 *
1187 * An NFS mount may have multiple locations/servers/addresses available.
1188 * We attempt to connect to each one asynchronously and will start
1189 * several sockets in parallel if other locations are slow to answer.
1190 * We'll use the first NFS socket we can successfully set up.
1191 *
1192 * The search may involve contacting the portmapper service first.
1193 *
1194 * A mount's initial connection may require negotiating some parameters such
1195 * as socket type and NFS version.
1196 */
1197
1198 int
1199 nfs_connect(struct nfsmount *nmp, int verbose, int timeo)
1200 {
1201 struct nfs_socket_search nss;
1202 struct nfs_socket *nso, *nsonfs;
1203 struct sockaddr_storage ss;
1204 struct sockaddr *saddr, *oldsaddr;
1205 sock_upcall upcall;
1206 #if CONFIG_NFS4
1207 struct timeval now;
1208 #endif
1209 struct timeval start;
1210 int error, savederror, nfsvers;
1211 int tryv4 = 1;
1212 uint8_t sotype = nmp->nm_sotype ? nmp->nm_sotype : SOCK_STREAM;
1213 fhandle_t *fh = NULL;
1214 char *path = NULL;
1215 in_port_t port;
1216 int addrtotal = 0;
1217
1218 /* paranoia... check that we have at least one address in the locations */
1219 uint32_t loc, serv;
1220 for (loc = 0; loc < nmp->nm_locations.nl_numlocs; loc++) {
1221 for (serv = 0; serv < nmp->nm_locations.nl_locations[loc]->nl_servcount; serv++) {
1222 addrtotal += nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount;
1223 if (nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount == 0) {
1224 NFS_SOCK_DBG("nfs connect %s search, server %s has no addresses\n",
1225 vfs_statfs(nmp->nm_mountp)->f_mntfromname,
1226 nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name);
1227 }
1228 }
1229 }
1230
1231 if (addrtotal == 0) {
1232 NFS_SOCK_DBG("nfs connect %s search failed, no addresses\n",
1233 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1234 return EINVAL;
1235 } else {
1236 NFS_SOCK_DBG("nfs connect %s has %d addresses\n",
1237 vfs_statfs(nmp->nm_mountp)->f_mntfromname, addrtotal);
1238 }
1239
1240 lck_mtx_lock(&nmp->nm_lock);
1241 nmp->nm_sockflags |= NMSOCK_CONNECTING;
1242 nmp->nm_nss = &nss;
1243 lck_mtx_unlock(&nmp->nm_lock);
1244 microuptime(&start);
1245 savederror = error = 0;
1246
1247 tryagain:
1248 /* initialize socket search state */
1249 bzero(&nss, sizeof(nss));
1250 nss.nss_addrcnt = addrtotal;
1251 nss.nss_error = savederror;
1252 TAILQ_INIT(&nss.nss_socklist);
1253 nss.nss_sotype = sotype;
1254 nss.nss_startloc = nmp->nm_locations.nl_current;
1255 nss.nss_timestamp = start.tv_sec;
1256 nss.nss_timeo = timeo;
1257 if (verbose) {
1258 nss.nss_flags |= NSS_VERBOSE;
1259 }
1260
1261 /* First time connecting, we may need to negotiate some things */
1262 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1263 NFS_SOCK_DBG("so_family = %d\n", nmp->nm_sofamily);
1264 NFS_SOCK_DBG("nfs port = %d local: <%s>\n", nmp->nm_nfsport, nmp->nm_nfs_localport ? nmp->nm_nfs_localport : "");
1265 NFS_SOCK_DBG("mount port = %d local: <%s>\n", nmp->nm_mountport, nmp->nm_mount_localport ? nmp->nm_mount_localport : "");
1266 if (!nmp->nm_vers) {
1267 /* No NFS version specified... */
1268 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
1269 #if CONFIG_NFS4
1270 if (PVER2MAJOR(nmp->nm_max_vers) >= NFS_VER4 && tryv4) {
1271 nss.nss_port = NFS_PORT;
1272 nss.nss_protocol = NFS_PROG;
1273 nss.nss_version = 4;
1274 nss.nss_flags |= NSS_FALLBACK2PMAP;
1275 } else {
1276 #endif
1277 /* ...connect to portmapper first if we (may) need any ports. */
1278 nss.nss_port = PMAPPORT;
1279 nss.nss_protocol = PMAPPROG;
1280 nss.nss_version = 0;
1281 #if CONFIG_NFS4
1282 }
1283 #endif
1284 } else {
1285 /* ...connect to NFS port first. */
1286 nss.nss_port = nmp->nm_nfsport;
1287 nss.nss_protocol = NFS_PROG;
1288 nss.nss_version = 0;
1289 }
1290 #if CONFIG_NFS4
1291 } else if (nmp->nm_vers >= NFS_VER4) {
1292 if (tryv4) {
1293 /* For NFSv4, we use the given (or default) port. */
1294 nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT;
1295 nss.nss_protocol = NFS_PROG;
1296 nss.nss_version = 4;
1297 /*
1298 * set NSS_FALLBACK2PMAP here to pick up any non standard port
1299 * if no port is specified on the mount;
1300 * Note nm_vers is set so we will only try NFS_VER4.
1301 */
1302 if (!nmp->nm_nfsport) {
1303 nss.nss_flags |= NSS_FALLBACK2PMAP;
1304 }
1305 } else {
1306 nss.nss_port = PMAPPORT;
1307 nss.nss_protocol = PMAPPROG;
1308 nss.nss_version = 0;
1309 }
1310 #endif
1311 } else {
1312 /* For NFSv3/v2... */
1313 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
1314 /* ...connect to portmapper first if we need any ports. */
1315 nss.nss_port = PMAPPORT;
1316 nss.nss_protocol = PMAPPROG;
1317 nss.nss_version = 0;
1318 } else {
1319 /* ...connect to NFS port first. */
1320 nss.nss_port = nmp->nm_nfsport;
1321 nss.nss_protocol = NFS_PROG;
1322 nss.nss_version = nmp->nm_vers;
1323 }
1324 }
1325 NFS_SOCK_DBG("nfs connect first %s, so type %d port %d prot %d %d\n",
1326 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port,
1327 nss.nss_protocol, nss.nss_version);
1328 } else {
1329 /* we've connected before, just connect to NFS port */
1330 if (!nmp->nm_nfsport) {
1331 /* need to ask portmapper which port that would be */
1332 nss.nss_port = PMAPPORT;
1333 nss.nss_protocol = PMAPPROG;
1334 nss.nss_version = 0;
1335 } else {
1336 nss.nss_port = nmp->nm_nfsport;
1337 nss.nss_protocol = NFS_PROG;
1338 nss.nss_version = nmp->nm_vers;
1339 }
1340 NFS_SOCK_DBG("nfs connect %s, so type %d port %d prot %d %d\n",
1341 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port,
1342 nss.nss_protocol, nss.nss_version);
1343 }
1344
1345 /* Set next location to first valid location. */
1346 /* If start location is invalid, find next location. */
1347 nss.nss_nextloc = nss.nss_startloc;
1348 if ((nss.nss_nextloc.nli_serv >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servcount) ||
1349 (nss.nss_nextloc.nli_addr >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servers[nss.nss_nextloc.nli_serv]->ns_addrcount)) {
1350 nfs_location_next(&nmp->nm_locations, &nss.nss_nextloc);
1351 if (!nfs_location_index_cmp(&nss.nss_nextloc, &nss.nss_startloc)) {
1352 NFS_SOCK_DBG("nfs connect %s search failed, couldn't find a valid location index\n",
1353 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1354 return ENOENT;
1355 }
1356 }
1357 nss.nss_last = -1;
1358
1359 keepsearching:
1360
1361 error = nfs_connect_search_loop(nmp, &nss);
1362 if (error || !nss.nss_sock) {
1363 /* search failed */
1364 nfs_socket_search_cleanup(&nss);
1365 if (nss.nss_flags & NSS_FALLBACK2PMAP) {
1366 tryv4 = 0;
1367 NFS_SOCK_DBG("nfs connect %s TCP failed for V4 %d %d, trying PORTMAP\n",
1368 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error);
1369 goto tryagain;
1370 }
1371
1372 if (!error && (nss.nss_sotype == SOCK_STREAM) && !nmp->nm_sotype && (nmp->nm_vers < NFS_VER4)) {
1373 /* Try using UDP */
1374 sotype = SOCK_DGRAM;
1375 savederror = nss.nss_error;
1376 NFS_SOCK_DBG("nfs connect %s TCP failed %d %d, trying UDP\n",
1377 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error);
1378 goto tryagain;
1379 }
1380 if (!error) {
1381 error = nss.nss_error ? nss.nss_error : ETIMEDOUT;
1382 }
1383 lck_mtx_lock(&nmp->nm_lock);
1384 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
1385 nmp->nm_nss = NULL;
1386 lck_mtx_unlock(&nmp->nm_lock);
1387 if (nss.nss_flags & NSS_WARNED) {
1388 log(LOG_INFO, "nfs_connect: socket connect aborted for %s\n",
1389 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1390 }
1391 if (fh) {
1392 FREE(fh, M_TEMP);
1393 }
1394 if (path) {
1395 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1396 }
1397 NFS_SOCK_DBG("nfs connect %s search failed, returning %d\n",
1398 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error);
1399 return error;
1400 }
1401
1402 /* try to use nss_sock */
1403 nso = nss.nss_sock;
1404 nss.nss_sock = NULL;
1405
1406 /* We may be speaking to portmap first... to determine port(s). */
1407 if (nso->nso_saddr->sa_family == AF_INET) {
1408 port = ntohs(((struct sockaddr_in*)nso->nso_saddr)->sin_port);
1409 } else if (nso->nso_saddr->sa_family == AF_INET6) {
1410 port = ntohs(((struct sockaddr_in6*)nso->nso_saddr)->sin6_port);
1411 } else if (nso->nso_saddr->sa_family == AF_LOCAL) {
1412 if (nso->nso_protocol == PMAPPROG) {
1413 port = PMAPPORT;
1414 }
1415 }
1416
1417 if (port == PMAPPORT) {
1418 /* Use this portmapper port to get the port #s we need. */
1419 NFS_SOCK_DBG("nfs connect %s got portmapper socket %p\n",
1420 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
1421
1422 /* remove the connect upcall so nfs_portmap_lookup() can use this socket */
1423 sock_setupcall(nso->nso_so, NULL, NULL);
1424
1425 /* Set up socket address and port for NFS socket. */
1426 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1427
1428 /* If NFS version not set, try nm_max_vers down to nm_min_vers */
1429 nfsvers = nmp->nm_vers ? nmp->nm_vers : PVER2MAJOR(nmp->nm_max_vers);
1430 if (!(port = nmp->nm_nfsport)) {
1431 if (ss.ss_family == AF_INET) {
1432 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
1433 } else if (ss.ss_family == AF_INET6) {
1434 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
1435 } else if (ss.ss_family == AF_LOCAL) {
1436 if (((struct sockaddr_un*)&ss)->sun_path[0] == '/') {
1437 NFS_SOCK_DBG("Looking up NFS socket over %s\n", ((struct sockaddr_un*)&ss)->sun_path);
1438 }
1439 }
1440 for (; nfsvers >= (int)PVER2MAJOR(nmp->nm_min_vers); nfsvers--) {
1441 if (nmp->nm_vers && nmp->nm_vers != nfsvers) {
1442 continue; /* Wrong version */
1443 }
1444 #if CONFIG_NFS4
1445 if (nfsvers == NFS_VER4 && nso->nso_sotype == SOCK_DGRAM) {
1446 continue; /* NFSv4 does not do UDP */
1447 }
1448 #endif
1449 if (ss.ss_family == AF_LOCAL && nmp->nm_nfs_localport) {
1450 struct sockaddr_un *sun = (struct sockaddr_un *)&ss;
1451 NFS_SOCK_DBG("Using supplied local address %s for NFS_PROG\n", nmp->nm_nfs_localport);
1452 strlcpy(sun->sun_path, nmp->nm_nfs_localport, sizeof(sun->sun_path));
1453 error = 0;
1454 } else {
1455 NFS_SOCK_DBG("Calling Portmap/Rpcbind for NFS_PROG");
1456 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1457 nso->nso_so, NFS_PROG, nfsvers, nso->nso_sotype, timeo);
1458 }
1459 if (!error) {
1460 if (ss.ss_family == AF_INET) {
1461 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1462 } else if (ss.ss_family == AF_INET6) {
1463 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1464 } else if (ss.ss_family == AF_LOCAL) {
1465 port = ((struct sockaddr_un *)&ss)->sun_path[0] ? NFS_PORT : 0;
1466 }
1467 if (!port) {
1468 error = EPROGUNAVAIL;
1469 }
1470 #if CONFIG_NFS4
1471 if (port == NFS_PORT && nfsvers == NFS_VER4 && tryv4 == 0) {
1472 continue; /* We already tried this */
1473 }
1474 #endif
1475 }
1476 if (!error) {
1477 break;
1478 }
1479 }
1480 if (nfsvers < (int)PVER2MAJOR(nmp->nm_min_vers) && error == 0) {
1481 error = EPROGUNAVAIL;
1482 }
1483 if (error) {
1484 nfs_socket_search_update_error(&nss, error);
1485 nfs_socket_destroy(nso);
1486 NFS_SOCK_DBG("Could not lookup NFS socket address for version %d error = %d\n", nfsvers, error);
1487 goto keepsearching;
1488 }
1489 } else if (nmp->nm_nfs_localport) {
1490 strlcpy(((struct sockaddr_un*)&ss)->sun_path, nmp->nm_nfs_localport, sizeof(((struct sockaddr_un*)&ss)->sun_path));
1491 NFS_SOCK_DBG("Using supplied nfs_local_port %s for NFS_PROG\n", nmp->nm_nfs_localport);
1492 }
1493
1494 /* Create NFS protocol socket and add it to the list of sockets. */
1495 /* N.B. If nfsvers is NFS_VER4 at this point then we're on a non standard port */
1496 if (ss.ss_family == AF_LOCAL) {
1497 NFS_SOCK_DBG("Creating NFS socket for %s port = %d\n", ((struct sockaddr_un*)&ss)->sun_path, port);
1498 }
1499 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nso->nso_sotype, port,
1500 NFS_PROG, nfsvers, NMFLAG(nmp, RESVPORT), &nsonfs);
1501 if (error) {
1502 nfs_socket_search_update_error(&nss, error);
1503 nfs_socket_destroy(nso);
1504 NFS_SOCK_DBG("Could not create NFS socket: %d\n", error);
1505 goto keepsearching;
1506 }
1507 nsonfs->nso_location = nso->nso_location;
1508 nsonfs->nso_wake = &nss;
1509 error = sock_setupcall(nsonfs->nso_so, nfs_connect_upcall, nsonfs);
1510 if (error) {
1511 nfs_socket_search_update_error(&nss, error);
1512 nfs_socket_destroy(nsonfs);
1513 nfs_socket_destroy(nso);
1514 NFS_SOCK_DBG("Could not nfs_connect_upcall: %d", error);
1515 goto keepsearching;
1516 }
1517 TAILQ_INSERT_TAIL(&nss.nss_socklist, nsonfs, nso_link);
1518 nss.nss_sockcnt++;
1519 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) {
1520 /* Set up socket address and port for MOUNT socket. */
1521 error = 0;
1522 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1523 port = nmp->nm_mountport;
1524 NFS_SOCK_DBG("mount port = %d\n", port);
1525 if (ss.ss_family == AF_INET) {
1526 ((struct sockaddr_in*)&ss)->sin_port = htons(port);
1527 } else if (ss.ss_family == AF_INET6) {
1528 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port);
1529 } else if (ss.ss_family == AF_LOCAL && nmp->nm_mount_localport) {
1530 NFS_SOCK_DBG("Setting mount address to %s port = %d\n", nmp->nm_mount_localport, nmp->nm_mountport);
1531 strlcpy(((struct sockaddr_un*)&ss)->sun_path, nmp->nm_mount_localport, sizeof(((struct sockaddr_un*)&ss)->sun_path));
1532 }
1533 if (!port) {
1534 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */
1535 /* If NFS version is unknown, optimistically choose for NFSv3. */
1536 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
1537 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
1538 NFS_SOCK_DBG("Looking up mount port with socket %p\n", nso->nso_so);
1539 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1540 nso->nso_so, RPCPROG_MNT, mntvers, mntproto == IPPROTO_UDP ? SOCK_DGRAM : SOCK_STREAM, timeo);
1541 }
1542 if (!error) {
1543 if (ss.ss_family == AF_INET) {
1544 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1545 } else if (ss.ss_family == AF_INET6) {
1546 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1547 } else if (ss.ss_family == AF_LOCAL) {
1548 port = (((struct sockaddr_un*)&ss)->sun_path[0] != '\0');
1549 }
1550 if (!port) {
1551 error = EPROGUNAVAIL;
1552 }
1553 }
1554 /* create sockaddr for MOUNT */
1555 if (!error) {
1556 MALLOC(nsonfs->nso_saddr2, struct sockaddr *, ss.ss_len, M_SONAME, M_WAITOK | M_ZERO);
1557 }
1558 if (!error && !nsonfs->nso_saddr2) {
1559 error = ENOMEM;
1560 }
1561 if (!error) {
1562 bcopy(&ss, nsonfs->nso_saddr2, ss.ss_len);
1563 }
1564 if (error) {
1565 NFS_SOCK_DBG("Could not create mount sockaet address %d", error);
1566 lck_mtx_lock(&nsonfs->nso_lock);
1567 nsonfs->nso_error = error;
1568 nsonfs->nso_flags |= NSO_DEAD;
1569 lck_mtx_unlock(&nsonfs->nso_lock);
1570 }
1571 }
1572 NFS_SOCK_DBG("Destroying socket %p so %p\n", nso, nso->nso_so);
1573 nfs_socket_destroy(nso);
1574 goto keepsearching;
1575 }
1576
1577 /* nso is an NFS socket */
1578 NFS_SOCK_DBG("nfs connect %s got NFS socket %p\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
1579
1580 /* If NFS version wasn't specified, it was determined during the connect. */
1581 nfsvers = nmp->nm_vers ? nmp->nm_vers : (int)nso->nso_version;
1582
1583 /* Perform MOUNT call for initial NFSv2/v3 connection/mount. */
1584 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) {
1585 error = 0;
1586 saddr = nso->nso_saddr2;
1587 if (!saddr) {
1588 /* Need sockaddr for MOUNT port */
1589 NFS_SOCK_DBG("Getting mount address mountport = %d, mount_localport = %s\n", nmp->nm_mountport, nmp->nm_mount_localport);
1590 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1591 port = nmp->nm_mountport;
1592 if (ss.ss_family == AF_INET) {
1593 ((struct sockaddr_in*)&ss)->sin_port = htons(port);
1594 } else if (ss.ss_family == AF_INET6) {
1595 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port);
1596 } else if (ss.ss_family == AF_LOCAL && nmp->nm_mount_localport) {
1597 NFS_SOCK_DBG("Setting mount address to %s port = %d\n", nmp->nm_mount_localport, nmp->nm_mountport);
1598 strlcpy(((struct sockaddr_un*)&ss)->sun_path, nmp->nm_mount_localport, sizeof(((struct sockaddr_un*)&ss)->sun_path));
1599 }
1600 if (!port) {
1601 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */
1602 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
1603 int so_type = NM_OMFLAG(nmp, MNTUDP) ? SOCK_DGRAM : nso->nso_sotype;
1604 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1605 NULL, RPCPROG_MNT, mntvers, so_type, timeo);
1606 if (ss.ss_family == AF_INET) {
1607 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1608 } else if (ss.ss_family == AF_INET6) {
1609 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1610 }
1611 }
1612 if (!error) {
1613 if (port) {
1614 saddr = (struct sockaddr*)&ss;
1615 } else {
1616 error = EPROGUNAVAIL;
1617 }
1618 }
1619 }
1620 if (saddr) {
1621 MALLOC(fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK | M_ZERO);
1622 }
1623 if (saddr && fh) {
1624 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1625 }
1626 if (!saddr || !fh || !path) {
1627 if (!error) {
1628 error = ENOMEM;
1629 }
1630 if (fh) {
1631 FREE(fh, M_TEMP);
1632 }
1633 if (path) {
1634 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1635 }
1636 fh = NULL;
1637 path = NULL;
1638 nfs_socket_search_update_error(&nss, error);
1639 nfs_socket_destroy(nso);
1640 goto keepsearching;
1641 }
1642 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, path, MAXPATHLEN, 1);
1643 error = nfs3_mount_rpc(nmp, saddr, nso->nso_sotype, nfsvers,
1644 path, vfs_context_current(), timeo, fh, &nmp->nm_servsec);
1645 NFS_SOCK_DBG("nfs connect %s socket %p mount %d\n",
1646 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error);
1647 if (!error) {
1648 /* Make sure we can agree on a security flavor. */
1649 int o, s; /* indices into mount option and server security flavor lists */
1650 int found = 0;
1651
1652 if ((nfsvers == NFS_VER3) && !nmp->nm_servsec.count) {
1653 /* Some servers return an empty list to indicate RPCAUTH_SYS? */
1654 nmp->nm_servsec.count = 1;
1655 nmp->nm_servsec.flavors[0] = RPCAUTH_SYS;
1656 }
1657 if (nmp->nm_sec.count) {
1658 /* Choose the first flavor in our list that the server supports. */
1659 if (!nmp->nm_servsec.count) {
1660 /* we don't know what the server supports, just use our first choice */
1661 nmp->nm_auth = nmp->nm_sec.flavors[0];
1662 found = 1;
1663 }
1664 for (o = 0; !found && (o < nmp->nm_sec.count); o++) {
1665 for (s = 0; !found && (s < nmp->nm_servsec.count); s++) {
1666 if (nmp->nm_sec.flavors[o] == nmp->nm_servsec.flavors[s]) {
1667 nmp->nm_auth = nmp->nm_sec.flavors[o];
1668 found = 1;
1669 }
1670 }
1671 }
1672 } else {
1673 /* Choose the first one we support from the server's list. */
1674 if (!nmp->nm_servsec.count) {
1675 nmp->nm_auth = RPCAUTH_SYS;
1676 found = 1;
1677 }
1678 for (s = 0; s < nmp->nm_servsec.count; s++) {
1679 switch (nmp->nm_servsec.flavors[s]) {
1680 case RPCAUTH_SYS:
1681 /* prefer RPCAUTH_SYS to RPCAUTH_NONE */
1682 if (found && (nmp->nm_auth == RPCAUTH_NONE)) {
1683 found = 0;
1684 }
1685 case RPCAUTH_NONE:
1686 case RPCAUTH_KRB5:
1687 case RPCAUTH_KRB5I:
1688 case RPCAUTH_KRB5P:
1689 if (!found) {
1690 nmp->nm_auth = nmp->nm_servsec.flavors[s];
1691 found = 1;
1692 }
1693 break;
1694 }
1695 }
1696 }
1697 error = !found ? EAUTH : 0;
1698 }
1699 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1700 path = NULL;
1701 if (error) {
1702 nfs_socket_search_update_error(&nss, error);
1703 FREE(fh, M_TEMP);
1704 fh = NULL;
1705 nfs_socket_destroy(nso);
1706 goto keepsearching;
1707 }
1708 if (nmp->nm_fh) {
1709 FREE(nmp->nm_fh, M_TEMP);
1710 }
1711 nmp->nm_fh = fh;
1712 fh = NULL;
1713 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_CALLUMNT);
1714 }
1715
1716 /* put the real upcall in place */
1717 upcall = (nso->nso_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
1718 error = sock_setupcall(nso->nso_so, upcall, nmp);
1719 if (error) {
1720 nfs_socket_search_update_error(&nss, error);
1721 nfs_socket_destroy(nso);
1722 goto keepsearching;
1723 }
1724
1725 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1726 /* set mntfromname to this location */
1727 if (!NM_OMATTR_GIVEN(nmp, MNTFROM)) {
1728 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location,
1729 vfs_statfs(nmp->nm_mountp)->f_mntfromname,
1730 sizeof(vfs_statfs(nmp->nm_mountp)->f_mntfromname), 0);
1731 }
1732 /* some negotiated values need to remain unchanged for the life of the mount */
1733 if (!nmp->nm_sotype) {
1734 nmp->nm_sotype = nso->nso_sotype;
1735 }
1736 if (!nmp->nm_vers) {
1737 nmp->nm_vers = nfsvers;
1738 #if CONFIG_NFS4
1739 /* If we negotiated NFSv4, set nm_nfsport if we ended up on the standard NFS port */
1740 if ((nfsvers >= NFS_VER4) && !NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) {
1741 if (nso->nso_saddr->sa_family == AF_INET) {
1742 port = ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port);
1743 } else if (nso->nso_saddr->sa_family == AF_INET6) {
1744 port = ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port);
1745 } else {
1746 port = 0;
1747 }
1748 if (port == NFS_PORT) {
1749 nmp->nm_nfsport = NFS_PORT;
1750 }
1751 }
1752 #endif
1753 }
1754 #if CONFIG_NFS4
1755 /* do some version-specific pre-mount set up */
1756 if (nmp->nm_vers >= NFS_VER4) {
1757 microtime(&now);
1758 nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec;
1759 if (!NMFLAG(nmp, NOCALLBACK)) {
1760 nfs4_mount_callback_setup(nmp);
1761 }
1762 }
1763 #endif
1764 }
1765
1766 /* Initialize NFS socket state variables */
1767 lck_mtx_lock(&nmp->nm_lock);
1768 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
1769 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
1770 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
1771 nmp->nm_sdrtt[3] = 0;
1772 if (nso->nso_sotype == SOCK_DGRAM) {
1773 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
1774 nmp->nm_sent = 0;
1775 } else if (nso->nso_sotype == SOCK_STREAM) {
1776 nmp->nm_timeouts = 0;
1777 }
1778 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
1779 nmp->nm_sockflags |= NMSOCK_SETUP;
1780 /* move the socket to the mount structure */
1781 nmp->nm_nso = nso;
1782 oldsaddr = nmp->nm_saddr;
1783 nmp->nm_saddr = nso->nso_saddr;
1784 lck_mtx_unlock(&nmp->nm_lock);
1785 error = nfs_connect_setup(nmp);
1786 lck_mtx_lock(&nmp->nm_lock);
1787 nmp->nm_sockflags &= ~NMSOCK_SETUP;
1788 if (!error) {
1789 nmp->nm_sockflags |= NMSOCK_READY;
1790 wakeup(&nmp->nm_sockflags);
1791 }
1792 if (error) {
1793 NFS_SOCK_DBG("nfs connect %s socket %p setup failed %d\n",
1794 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error);
1795 nfs_socket_search_update_error(&nss, error);
1796 nmp->nm_saddr = oldsaddr;
1797 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1798 /* undo settings made prior to setup */
1799 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_SOCKET_TYPE)) {
1800 nmp->nm_sotype = 0;
1801 }
1802 #if CONFIG_NFS4
1803 if (nmp->nm_vers >= NFS_VER4) {
1804 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) {
1805 nmp->nm_nfsport = 0;
1806 }
1807 if (nmp->nm_cbid) {
1808 nfs4_mount_callback_shutdown(nmp);
1809 }
1810 if (IS_VALID_CRED(nmp->nm_mcred)) {
1811 kauth_cred_unref(&nmp->nm_mcred);
1812 }
1813 bzero(&nmp->nm_un, sizeof(nmp->nm_un));
1814 }
1815 #endif
1816 nmp->nm_vers = 0;
1817 }
1818 lck_mtx_unlock(&nmp->nm_lock);
1819 nmp->nm_nso = NULL;
1820 nfs_socket_destroy(nso);
1821 goto keepsearching;
1822 }
1823
1824 /* update current location */
1825 if ((nmp->nm_locations.nl_current.nli_flags & NLI_VALID) &&
1826 (nmp->nm_locations.nl_current.nli_serv != nso->nso_location.nli_serv)) {
1827 /* server has changed, we should initiate failover/recovery */
1828 // XXX
1829 }
1830 nmp->nm_locations.nl_current = nso->nso_location;
1831 nmp->nm_locations.nl_current.nli_flags |= NLI_VALID;
1832
1833 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1834 /* We have now successfully connected... make a note of it. */
1835 nmp->nm_sockflags |= NMSOCK_HASCONNECTED;
1836 }
1837
1838 lck_mtx_unlock(&nmp->nm_lock);
1839 if (oldsaddr) {
1840 FREE(oldsaddr, M_SONAME);
1841 }
1842
1843 if (nss.nss_flags & NSS_WARNED) {
1844 log(LOG_INFO, "nfs_connect: socket connect completed for %s\n",
1845 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1846 }
1847
1848 nmp->nm_nss = NULL;
1849 nfs_socket_search_cleanup(&nss);
1850 if (fh) {
1851 FREE(fh, M_TEMP);
1852 }
1853 if (path) {
1854 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1855 }
1856 NFS_SOCK_DBG("nfs connect %s success\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1857 return 0;
1858 }
1859
1860
1861 /* setup & confirm socket connection is functional */
1862 int
1863 nfs_connect_setup(
1864 #if !CONFIG_NFS4
1865 __unused
1866 #endif
1867 struct nfsmount *nmp)
1868 {
1869 int error = 0;
1870 #if CONFIG_NFS4
1871 if (nmp->nm_vers >= NFS_VER4) {
1872 if (nmp->nm_state & NFSSTA_CLIENTID) {
1873 /* first, try to renew our current state */
1874 error = nfs4_renew(nmp, R_SETUP);
1875 if ((error == NFSERR_ADMIN_REVOKED) ||
1876 (error == NFSERR_CB_PATH_DOWN) ||
1877 (error == NFSERR_EXPIRED) ||
1878 (error == NFSERR_LEASE_MOVED) ||
1879 (error == NFSERR_STALE_CLIENTID)) {
1880 lck_mtx_lock(&nmp->nm_lock);
1881 nfs_need_recover(nmp, error);
1882 lck_mtx_unlock(&nmp->nm_lock);
1883 }
1884 }
1885 error = nfs4_setclientid(nmp);
1886 }
1887 #endif
1888 return error;
1889 }
1890
1891 /*
1892 * NFS socket reconnect routine:
1893 * Called when a connection is broken.
1894 * - disconnect the old socket
1895 * - nfs_connect() again
1896 * - set R_MUSTRESEND for all outstanding requests on mount point
1897 * If this fails the mount point is DEAD!
1898 */
1899 int
1900 nfs_reconnect(struct nfsmount *nmp)
1901 {
1902 struct nfsreq *rq;
1903 struct timeval now;
1904 thread_t thd = current_thread();
1905 int error, wentdown = 0, verbose = 1;
1906 time_t lastmsg;
1907 int timeo;
1908
1909 microuptime(&now);
1910 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay);
1911
1912 nfs_disconnect(nmp);
1913
1914
1915 lck_mtx_lock(&nmp->nm_lock);
1916 timeo = nfs_is_squishy(nmp) ? 8 : 30;
1917 lck_mtx_unlock(&nmp->nm_lock);
1918
1919 while ((error = nfs_connect(nmp, verbose, timeo))) {
1920 verbose = 0;
1921 nfs_disconnect(nmp);
1922 if ((error == EINTR) || (error == ERESTART)) {
1923 return EINTR;
1924 }
1925 if (error == EIO) {
1926 return EIO;
1927 }
1928 microuptime(&now);
1929 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) {
1930 lastmsg = now.tv_sec;
1931 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect", 0);
1932 wentdown = 1;
1933 }
1934 lck_mtx_lock(&nmp->nm_lock);
1935 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
1936 /* we're not yet completely mounted and */
1937 /* we can't reconnect, so we fail */
1938 lck_mtx_unlock(&nmp->nm_lock);
1939 NFS_SOCK_DBG("Not mounted returning %d\n", error);
1940 return error;
1941 }
1942
1943 if (nfs_mount_check_dead_timeout(nmp)) {
1944 nfs_mount_make_zombie(nmp);
1945 lck_mtx_unlock(&nmp->nm_lock);
1946 return ENXIO;
1947 }
1948
1949 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) {
1950 lck_mtx_unlock(&nmp->nm_lock);
1951 return error;
1952 }
1953 lck_mtx_unlock(&nmp->nm_lock);
1954 tsleep(nfs_reconnect, PSOCK, "nfs_reconnect_delay", 2 * hz);
1955 if ((error = nfs_sigintr(nmp, NULL, thd, 0))) {
1956 return error;
1957 }
1958 }
1959
1960 if (wentdown) {
1961 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected");
1962 }
1963
1964 /*
1965 * Loop through outstanding request list and mark all requests
1966 * as needing a resend. (Though nfs_need_reconnect() probably
1967 * marked them all already.)
1968 */
1969 lck_mtx_lock(nfs_request_mutex);
1970 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
1971 if (rq->r_nmp == nmp) {
1972 lck_mtx_lock(&rq->r_mtx);
1973 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
1974 rq->r_flags |= R_MUSTRESEND;
1975 rq->r_rtt = -1;
1976 wakeup(rq);
1977 if ((rq->r_flags & (R_IOD | R_ASYNC | R_ASYNCWAIT | R_SENDING)) == R_ASYNC) {
1978 nfs_asyncio_resend(rq);
1979 }
1980 }
1981 lck_mtx_unlock(&rq->r_mtx);
1982 }
1983 }
1984 lck_mtx_unlock(nfs_request_mutex);
1985 return 0;
1986 }
1987
1988 /*
1989 * NFS disconnect. Clean up and unlink.
1990 */
1991 void
1992 nfs_disconnect(struct nfsmount *nmp)
1993 {
1994 struct nfs_socket *nso;
1995
1996 lck_mtx_lock(&nmp->nm_lock);
1997 tryagain:
1998 if (nmp->nm_nso) {
1999 struct timespec ts = { .tv_sec = 1, .tv_nsec = 0 };
2000 if (nmp->nm_state & NFSSTA_SENDING) { /* wait for sending to complete */
2001 nmp->nm_state |= NFSSTA_WANTSND;
2002 msleep(&nmp->nm_state, &nmp->nm_lock, PZERO - 1, "nfswaitsending", &ts);
2003 goto tryagain;
2004 }
2005 if (nmp->nm_sockflags & NMSOCK_POKE) { /* wait for poking to complete */
2006 msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO - 1, "nfswaitpoke", &ts);
2007 goto tryagain;
2008 }
2009 nmp->nm_sockflags |= NMSOCK_DISCONNECTING;
2010 nmp->nm_sockflags &= ~NMSOCK_READY;
2011 nso = nmp->nm_nso;
2012 nmp->nm_nso = NULL;
2013 if (nso->nso_saddr == nmp->nm_saddr) {
2014 nso->nso_saddr = NULL;
2015 }
2016 lck_mtx_unlock(&nmp->nm_lock);
2017 nfs_socket_destroy(nso);
2018 lck_mtx_lock(&nmp->nm_lock);
2019 nmp->nm_sockflags &= ~NMSOCK_DISCONNECTING;
2020 lck_mtx_unlock(&nmp->nm_lock);
2021 } else {
2022 lck_mtx_unlock(&nmp->nm_lock);
2023 }
2024 }
2025
2026 /*
2027 * mark an NFS mount as needing a reconnect/resends.
2028 */
2029 void
2030 nfs_need_reconnect(struct nfsmount *nmp)
2031 {
2032 struct nfsreq *rq;
2033
2034 lck_mtx_lock(&nmp->nm_lock);
2035 nmp->nm_sockflags &= ~(NMSOCK_READY | NMSOCK_SETUP);
2036 lck_mtx_unlock(&nmp->nm_lock);
2037
2038 /*
2039 * Loop through outstanding request list and
2040 * mark all requests as needing a resend.
2041 */
2042 lck_mtx_lock(nfs_request_mutex);
2043 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
2044 if (rq->r_nmp == nmp) {
2045 lck_mtx_lock(&rq->r_mtx);
2046 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
2047 rq->r_flags |= R_MUSTRESEND;
2048 rq->r_rtt = -1;
2049 wakeup(rq);
2050 if ((rq->r_flags & (R_IOD | R_ASYNC | R_ASYNCWAIT | R_SENDING)) == R_ASYNC) {
2051 nfs_asyncio_resend(rq);
2052 }
2053 }
2054 lck_mtx_unlock(&rq->r_mtx);
2055 }
2056 }
2057 lck_mtx_unlock(nfs_request_mutex);
2058 }
2059
2060
2061 /*
2062 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
2063 */
2064 void
2065 nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
2066 {
2067 struct nfsmount *nmp = arg;
2068 struct timespec ts = { .tv_sec = 30, .tv_nsec = 0 };
2069 thread_t thd = current_thread();
2070 struct nfsreq *req;
2071 struct timeval now;
2072 int error, dofinish;
2073 nfsnode_t np;
2074 int do_reconnect_sleep = 0;
2075
2076 lck_mtx_lock(&nmp->nm_lock);
2077 while (!(nmp->nm_sockflags & NMSOCK_READY) ||
2078 !TAILQ_EMPTY(&nmp->nm_resendq) ||
2079 !LIST_EMPTY(&nmp->nm_monlist) ||
2080 nmp->nm_deadto_start ||
2081 (nmp->nm_state & NFSSTA_RECOVER) ||
2082 ((nmp->nm_vers >= NFS_VER4) && !TAILQ_EMPTY(&nmp->nm_dreturnq))) {
2083 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) {
2084 break;
2085 }
2086 /* do reconnect, if necessary */
2087 if (!(nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD))) {
2088 if (nmp->nm_reconnect_start <= 0) {
2089 microuptime(&now);
2090 nmp->nm_reconnect_start = now.tv_sec;
2091 }
2092 lck_mtx_unlock(&nmp->nm_lock);
2093 NFS_SOCK_DBG("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
2094 /*
2095 * XXX We don't want to call reconnect again right away if returned errors
2096 * before that may not have blocked. This has caused spamming null procs
2097 * from machines in the pass.
2098 */
2099 if (do_reconnect_sleep) {
2100 tsleep(nfs_mount_sock_thread, PSOCK, "nfs_reconnect_sock_thread_delay", hz);
2101 }
2102 error = nfs_reconnect(nmp);
2103 if (error) {
2104 int lvl = 7;
2105 if (error == EIO || error == EINTR) {
2106 lvl = (do_reconnect_sleep++ % 600) ? 7 : 0;
2107 }
2108 NFS_DBG(NFS_FAC_SOCK, lvl, "nfs reconnect %s: returned %d\n",
2109 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error);
2110 } else {
2111 nmp->nm_reconnect_start = 0;
2112 do_reconnect_sleep = 0;
2113 }
2114 lck_mtx_lock(&nmp->nm_lock);
2115 }
2116 if ((nmp->nm_sockflags & NMSOCK_READY) &&
2117 (nmp->nm_state & NFSSTA_RECOVER) &&
2118 !(nmp->nm_sockflags & NMSOCK_UNMOUNT) &&
2119 !(nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD))) {
2120 /* perform state recovery */
2121 lck_mtx_unlock(&nmp->nm_lock);
2122 nfs_recover(nmp);
2123 lck_mtx_lock(&nmp->nm_lock);
2124 }
2125 #if CONFIG_NFS4
2126 /* handle NFSv4 delegation returns */
2127 while ((nmp->nm_vers >= NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD)) &&
2128 (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) &&
2129 ((np = TAILQ_FIRST(&nmp->nm_dreturnq)))) {
2130 lck_mtx_unlock(&nmp->nm_lock);
2131 nfs4_delegation_return(np, R_RECOVER, thd, nmp->nm_mcred);
2132 lck_mtx_lock(&nmp->nm_lock);
2133 }
2134 #endif
2135 /* do resends, if necessary/possible */
2136 while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) ||
2137 (nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD))) &&
2138 ((req = TAILQ_FIRST(&nmp->nm_resendq)))) {
2139 if (req->r_resendtime) {
2140 microuptime(&now);
2141 }
2142 while (req && !(nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD)) && req->r_resendtime && (now.tv_sec < req->r_resendtime)) {
2143 req = TAILQ_NEXT(req, r_rchain);
2144 }
2145 if (!req) {
2146 break;
2147 }
2148 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
2149 req->r_rchain.tqe_next = NFSREQNOLIST;
2150 lck_mtx_unlock(&nmp->nm_lock);
2151 lck_mtx_lock(&req->r_mtx);
2152 /* Note that we have a reference on the request that was taken nfs_asyncio_resend */
2153 if (req->r_error || req->r_nmrep.nmc_mhead) {
2154 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
2155 req->r_flags &= ~R_RESENDQ;
2156 wakeup(req);
2157 lck_mtx_unlock(&req->r_mtx);
2158 if (dofinish) {
2159 nfs_asyncio_finish(req);
2160 }
2161 nfs_request_rele(req);
2162 lck_mtx_lock(&nmp->nm_lock);
2163 continue;
2164 }
2165 if ((req->r_flags & R_RESTART) || nfs_request_using_gss(req)) {
2166 req->r_flags &= ~R_RESTART;
2167 req->r_resendtime = 0;
2168 lck_mtx_unlock(&req->r_mtx);
2169 /* async RPCs on GSS mounts need to be rebuilt and resent. */
2170 nfs_reqdequeue(req);
2171 #if CONFIG_NFS_GSS
2172 if (nfs_request_using_gss(req)) {
2173 nfs_gss_clnt_rpcdone(req);
2174 error = nfs_gss_clnt_args_restore(req);
2175 if (error == ENEEDAUTH) {
2176 req->r_xid = 0;
2177 }
2178 }
2179 #endif /* CONFIG_NFS_GSS */
2180 NFS_SOCK_DBG("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
2181 nfs_request_using_gss(req) ? " gss" : "", req->r_procnum, req->r_xid,
2182 req->r_flags, req->r_rtt);
2183 error = nfs_sigintr(nmp, req, req->r_thread, 0);
2184 if (!error) {
2185 error = nfs_request_add_header(req);
2186 }
2187 if (!error) {
2188 error = nfs_request_send(req, 0);
2189 }
2190 lck_mtx_lock(&req->r_mtx);
2191 if (req->r_flags & R_RESENDQ) {
2192 req->r_flags &= ~R_RESENDQ;
2193 }
2194 if (error) {
2195 req->r_error = error;
2196 }
2197 wakeup(req);
2198 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
2199 lck_mtx_unlock(&req->r_mtx);
2200 if (dofinish) {
2201 nfs_asyncio_finish(req);
2202 }
2203 nfs_request_rele(req);
2204 lck_mtx_lock(&nmp->nm_lock);
2205 error = 0;
2206 continue;
2207 }
2208 NFS_SOCK_DBG("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
2209 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt);
2210 error = nfs_sigintr(nmp, req, req->r_thread, 0);
2211 if (!error) {
2212 req->r_flags |= R_SENDING;
2213 lck_mtx_unlock(&req->r_mtx);
2214 error = nfs_send(req, 0);
2215 lck_mtx_lock(&req->r_mtx);
2216 if (!error) {
2217 if (req->r_flags & R_RESENDQ) {
2218 req->r_flags &= ~R_RESENDQ;
2219 }
2220 wakeup(req);
2221 lck_mtx_unlock(&req->r_mtx);
2222 nfs_request_rele(req);
2223 lck_mtx_lock(&nmp->nm_lock);
2224 continue;
2225 }
2226 }
2227 req->r_error = error;
2228 if (req->r_flags & R_RESENDQ) {
2229 req->r_flags &= ~R_RESENDQ;
2230 }
2231 wakeup(req);
2232 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
2233 lck_mtx_unlock(&req->r_mtx);
2234 if (dofinish) {
2235 nfs_asyncio_finish(req);
2236 }
2237 nfs_request_rele(req);
2238 lck_mtx_lock(&nmp->nm_lock);
2239 }
2240 if (nfs_mount_check_dead_timeout(nmp)) {
2241 nfs_mount_make_zombie(nmp);
2242 break;
2243 }
2244
2245 if (nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD)) {
2246 break;
2247 }
2248 /* check monitored nodes, if necessary/possible */
2249 if (!LIST_EMPTY(&nmp->nm_monlist)) {
2250 nmp->nm_state |= NFSSTA_MONITOR_SCAN;
2251 LIST_FOREACH(np, &nmp->nm_monlist, n_monlink) {
2252 if (!(nmp->nm_sockflags & NMSOCK_READY) ||
2253 (nmp->nm_state & (NFSSTA_RECOVER | NFSSTA_UNMOUNTING | NFSSTA_FORCE | NFSSTA_DEAD))) {
2254 break;
2255 }
2256 np->n_mflag |= NMMONSCANINPROG;
2257 lck_mtx_unlock(&nmp->nm_lock);
2258 error = nfs_getattr(np, NULL, vfs_context_kernel(), (NGA_UNCACHED | NGA_MONITOR));
2259 if (!error && ISSET(np->n_flag, NUPDATESIZE)) { /* update quickly to avoid multiple events */
2260 nfs_data_update_size(np, 0);
2261 }
2262 lck_mtx_lock(&nmp->nm_lock);
2263 np->n_mflag &= ~NMMONSCANINPROG;
2264 if (np->n_mflag & NMMONSCANWANT) {
2265 np->n_mflag &= ~NMMONSCANWANT;
2266 wakeup(&np->n_mflag);
2267 }
2268 if (error || !(nmp->nm_sockflags & NMSOCK_READY) ||
2269 (nmp->nm_state & (NFSSTA_RECOVER | NFSSTA_UNMOUNTING | NFSSTA_FORCE | NFSSTA_DEAD))) {
2270 break;
2271 }
2272 }
2273 nmp->nm_state &= ~NFSSTA_MONITOR_SCAN;
2274 if (nmp->nm_state & NFSSTA_UNMOUNTING) {
2275 wakeup(&nmp->nm_state); /* let unmounting thread know scan is done */
2276 }
2277 }
2278 if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER | NFSSTA_UNMOUNTING))) {
2279 if (nmp->nm_deadto_start || !TAILQ_EMPTY(&nmp->nm_resendq) ||
2280 (nmp->nm_state & NFSSTA_RECOVER)) {
2281 ts.tv_sec = 1;
2282 } else {
2283 ts.tv_sec = 5;
2284 }
2285 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts);
2286 }
2287 }
2288
2289 /* If we're unmounting, send the unmount RPC, if requested/appropriate. */
2290 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) &&
2291 (nmp->nm_state & NFSSTA_MOUNTED) && NMFLAG(nmp, CALLUMNT) &&
2292 (nmp->nm_vers < NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD))) {
2293 lck_mtx_unlock(&nmp->nm_lock);
2294 nfs3_umount_rpc(nmp, vfs_context_kernel(),
2295 (nmp->nm_sockflags & NMSOCK_READY) ? 6 : 2);
2296 lck_mtx_lock(&nmp->nm_lock);
2297 }
2298
2299 if (nmp->nm_sockthd == thd) {
2300 nmp->nm_sockthd = NULL;
2301 }
2302 lck_mtx_unlock(&nmp->nm_lock);
2303 wakeup(&nmp->nm_sockthd);
2304 thread_terminate(thd);
2305 }
2306
2307 /* start or wake a mount's socket thread */
2308 void
2309 nfs_mount_sock_thread_wake(struct nfsmount *nmp)
2310 {
2311 if (nmp->nm_sockthd) {
2312 wakeup(&nmp->nm_sockthd);
2313 } else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS) {
2314 thread_deallocate(nmp->nm_sockthd);
2315 }
2316 }
2317
2318 /*
2319 * Check if we should mark the mount dead because the
2320 * unresponsive mount has reached the dead timeout.
2321 * (must be called with nmp locked)
2322 */
2323 int
2324 nfs_mount_check_dead_timeout(struct nfsmount *nmp)
2325 {
2326 struct timeval now;
2327
2328 if (nmp->nm_state & NFSSTA_DEAD) {
2329 return 1;
2330 }
2331 if (nmp->nm_deadto_start == 0) {
2332 return 0;
2333 }
2334 nfs_is_squishy(nmp);
2335 if (nmp->nm_curdeadtimeout <= 0) {
2336 return 0;
2337 }
2338 microuptime(&now);
2339 if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_curdeadtimeout) {
2340 return 0;
2341 }
2342 return 1;
2343 }
2344
2345 /*
2346 * Call nfs_mount_zombie to remove most of the
2347 * nfs state for the mount, and then ask to be forcibly unmounted.
2348 *
2349 * Assumes the nfs mount structure lock nm_lock is held.
2350 */
2351
2352 void
2353 nfs_mount_make_zombie(struct nfsmount *nmp)
2354 {
2355 fsid_t fsid;
2356
2357 if (!nmp) {
2358 return;
2359 }
2360
2361 if (nmp->nm_state & NFSSTA_DEAD) {
2362 return;
2363 }
2364
2365 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname,
2366 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : "");
2367 fsid = vfs_statfs(nmp->nm_mountp)->f_fsid;
2368 lck_mtx_unlock(&nmp->nm_lock);
2369 nfs_mount_zombie(nmp, NFSSTA_DEAD);
2370 vfs_event_signal(&fsid, VQ_DEAD, 0);
2371 lck_mtx_lock(&nmp->nm_lock);
2372 }
2373
2374
2375 /*
2376 * NFS callback channel socket state
2377 */
2378 struct nfs_callback_socket {
2379 TAILQ_ENTRY(nfs_callback_socket) ncbs_link;
2380 socket_t ncbs_so; /* the socket */
2381 struct sockaddr_storage ncbs_saddr; /* socket address */
2382 struct nfs_rpc_record_state ncbs_rrs; /* RPC record parsing state */
2383 time_t ncbs_stamp; /* last accessed at */
2384 uint32_t ncbs_flags; /* see below */
2385 };
2386 #define NCBSOCK_UPCALL 0x0001
2387 #define NCBSOCK_UPCALLWANT 0x0002
2388 #define NCBSOCK_DEAD 0x0004
2389
2390 #if CONFIG_NFS4
2391 /*
2392 * NFS callback channel state
2393 *
2394 * One listening socket for accepting socket connections from servers and
2395 * a list of connected sockets to handle callback requests on.
2396 * Mounts registered with the callback channel are assigned IDs and
2397 * put on a list so that the callback request handling code can match
2398 * the requests up with mounts.
2399 */
2400 socket_t nfs4_cb_so = NULL;
2401 socket_t nfs4_cb_so6 = NULL;
2402 in_port_t nfs4_cb_port = 0;
2403 in_port_t nfs4_cb_port6 = 0;
2404 uint32_t nfs4_cb_id = 0;
2405 uint32_t nfs4_cb_so_usecount = 0;
2406 TAILQ_HEAD(nfs4_cb_sock_list, nfs_callback_socket) nfs4_cb_socks;
2407 TAILQ_HEAD(nfs4_cb_mount_list, nfsmount) nfs4_cb_mounts;
2408
2409 int nfs4_cb_handler(struct nfs_callback_socket *, mbuf_t);
2410
2411 /*
2412 * Set up the callback channel for the NFS mount.
2413 *
2414 * Initializes the callback channel socket state and
2415 * assigns a callback ID to the mount.
2416 */
2417 void
2418 nfs4_mount_callback_setup(struct nfsmount *nmp)
2419 {
2420 struct sockaddr_in sin;
2421 struct sockaddr_in6 sin6;
2422 socket_t so = NULL;
2423 socket_t so6 = NULL;
2424 struct timeval timeo;
2425 int error, on = 1;
2426 in_port_t port;
2427
2428 lck_mtx_lock(nfs_global_mutex);
2429 if (nfs4_cb_id == 0) {
2430 TAILQ_INIT(&nfs4_cb_mounts);
2431 TAILQ_INIT(&nfs4_cb_socks);
2432 nfs4_cb_id++;
2433 }
2434 nmp->nm_cbid = nfs4_cb_id++;
2435 if (nmp->nm_cbid == 0) {
2436 nmp->nm_cbid = nfs4_cb_id++;
2437 }
2438 nfs4_cb_so_usecount++;
2439 TAILQ_INSERT_HEAD(&nfs4_cb_mounts, nmp, nm_cblink);
2440
2441 if (nfs4_cb_so) {
2442 lck_mtx_unlock(nfs_global_mutex);
2443 return;
2444 }
2445
2446 /* IPv4 */
2447 error = sock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so);
2448 if (error) {
2449 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv4 socket\n", error);
2450 goto fail;
2451 }
2452 so = nfs4_cb_so;
2453
2454 sock_setsockopt(so, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
2455 sin.sin_len = sizeof(struct sockaddr_in);
2456 sin.sin_family = AF_INET;
2457 sin.sin_addr.s_addr = htonl(INADDR_ANY);
2458 sin.sin_port = htons(nfs_callback_port); /* try to use specified port */
2459 error = sock_bind(so, (struct sockaddr *)&sin);
2460 if (error) {
2461 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv4 socket\n", error);
2462 goto fail;
2463 }
2464 error = sock_getsockname(so, (struct sockaddr *)&sin, sin.sin_len);
2465 if (error) {
2466 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv4 socket port\n", error);
2467 goto fail;
2468 }
2469 nfs4_cb_port = ntohs(sin.sin_port);
2470
2471 error = sock_listen(so, 32);
2472 if (error) {
2473 log(LOG_INFO, "nfs callback setup: error %d on IPv4 listen\n", error);
2474 goto fail;
2475 }
2476
2477 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2478 timeo.tv_usec = 0;
2479 timeo.tv_sec = 60;
2480 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
2481 if (error) {
2482 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket rx timeout\n", error);
2483 }
2484 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
2485 if (error) {
2486 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket tx timeout\n", error);
2487 }
2488 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2489 sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
2490 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
2491 error = 0;
2492
2493 /* IPv6 */
2494 error = sock_socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so6);
2495 if (error) {
2496 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv6 socket\n", error);
2497 goto fail;
2498 }
2499 so6 = nfs4_cb_so6;
2500
2501 sock_setsockopt(so6, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
2502 sock_setsockopt(so6, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on));
2503 /* try to use specified port or same port as IPv4 */
2504 port = nfs_callback_port ? nfs_callback_port : nfs4_cb_port;
2505 ipv6_bind_again:
2506 sin6.sin6_len = sizeof(struct sockaddr_in6);
2507 sin6.sin6_family = AF_INET6;
2508 sin6.sin6_addr = in6addr_any;
2509 sin6.sin6_port = htons(port);
2510 error = sock_bind(so6, (struct sockaddr *)&sin6);
2511 if (error) {
2512 if (port != nfs_callback_port) {
2513 /* if we simply tried to match the IPv4 port, then try any port */
2514 port = 0;
2515 goto ipv6_bind_again;
2516 }
2517 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv6 socket\n", error);
2518 goto fail;
2519 }
2520 error = sock_getsockname(so6, (struct sockaddr *)&sin6, sin6.sin6_len);
2521 if (error) {
2522 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv6 socket port\n", error);
2523 goto fail;
2524 }
2525 nfs4_cb_port6 = ntohs(sin6.sin6_port);
2526
2527 error = sock_listen(so6, 32);
2528 if (error) {
2529 log(LOG_INFO, "nfs callback setup: error %d on IPv6 listen\n", error);
2530 goto fail;
2531 }
2532
2533 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2534 timeo.tv_usec = 0;
2535 timeo.tv_sec = 60;
2536 error = sock_setsockopt(so6, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
2537 if (error) {
2538 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket rx timeout\n", error);
2539 }
2540 error = sock_setsockopt(so6, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
2541 if (error) {
2542 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket tx timeout\n", error);
2543 }
2544 sock_setsockopt(so6, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2545 sock_setsockopt(so6, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
2546 sock_setsockopt(so6, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
2547 error = 0;
2548
2549 fail:
2550 if (error) {
2551 nfs4_cb_so = nfs4_cb_so6 = NULL;
2552 lck_mtx_unlock(nfs_global_mutex);
2553 if (so) {
2554 sock_shutdown(so, SHUT_RDWR);
2555 sock_close(so);
2556 }
2557 if (so6) {
2558 sock_shutdown(so6, SHUT_RDWR);
2559 sock_close(so6);
2560 }
2561 } else {
2562 lck_mtx_unlock(nfs_global_mutex);
2563 }
2564 }
2565
2566 /*
2567 * Shut down the callback channel for the NFS mount.
2568 *
2569 * Clears the mount's callback ID and releases the mounts
2570 * reference on the callback socket. Last reference dropped
2571 * will also shut down the callback socket(s).
2572 */
2573 void
2574 nfs4_mount_callback_shutdown(struct nfsmount *nmp)
2575 {
2576 struct nfs_callback_socket *ncbsp;
2577 socket_t so, so6;
2578 struct nfs4_cb_sock_list cb_socks;
2579 struct timespec ts = { .tv_sec = 1, .tv_nsec = 0 };
2580
2581 lck_mtx_lock(nfs_global_mutex);
2582 TAILQ_REMOVE(&nfs4_cb_mounts, nmp, nm_cblink);
2583 /* wait for any callbacks in progress to complete */
2584 while (nmp->nm_cbrefs) {
2585 msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait", &ts);
2586 }
2587 nmp->nm_cbid = 0;
2588 if (--nfs4_cb_so_usecount) {
2589 lck_mtx_unlock(nfs_global_mutex);
2590 return;
2591 }
2592 so = nfs4_cb_so;
2593 so6 = nfs4_cb_so6;
2594 nfs4_cb_so = nfs4_cb_so6 = NULL;
2595 TAILQ_INIT(&cb_socks);
2596 TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link);
2597 lck_mtx_unlock(nfs_global_mutex);
2598 if (so) {
2599 sock_shutdown(so, SHUT_RDWR);
2600 sock_close(so);
2601 }
2602 if (so6) {
2603 sock_shutdown(so6, SHUT_RDWR);
2604 sock_close(so6);
2605 }
2606 while ((ncbsp = TAILQ_FIRST(&cb_socks))) {
2607 TAILQ_REMOVE(&cb_socks, ncbsp, ncbs_link);
2608 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
2609 sock_close(ncbsp->ncbs_so);
2610 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs);
2611 FREE(ncbsp, M_TEMP);
2612 }
2613 }
2614
2615 /*
2616 * Check periodically for stale/unused nfs callback sockets
2617 */
2618 #define NFS4_CB_TIMER_PERIOD 30
2619 #define NFS4_CB_IDLE_MAX 300
2620 void
2621 nfs4_callback_timer(__unused void *param0, __unused void *param1)
2622 {
2623 struct nfs_callback_socket *ncbsp, *nextncbsp;
2624 struct timeval now;
2625
2626 loop:
2627 lck_mtx_lock(nfs_global_mutex);
2628 if (TAILQ_EMPTY(&nfs4_cb_socks)) {
2629 nfs4_callback_timer_on = 0;
2630 lck_mtx_unlock(nfs_global_mutex);
2631 return;
2632 }
2633 microuptime(&now);
2634 TAILQ_FOREACH_SAFE(ncbsp, &nfs4_cb_socks, ncbs_link, nextncbsp) {
2635 if (!(ncbsp->ncbs_flags & NCBSOCK_DEAD) &&
2636 (now.tv_sec < (ncbsp->ncbs_stamp + NFS4_CB_IDLE_MAX))) {
2637 continue;
2638 }
2639 TAILQ_REMOVE(&nfs4_cb_socks, ncbsp, ncbs_link);
2640 lck_mtx_unlock(nfs_global_mutex);
2641 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
2642 sock_close(ncbsp->ncbs_so);
2643 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs);
2644 FREE(ncbsp, M_TEMP);
2645 goto loop;
2646 }
2647 nfs4_callback_timer_on = 1;
2648 nfs_interval_timer_start(nfs4_callback_timer_call,
2649 NFS4_CB_TIMER_PERIOD * 1000);
2650 lck_mtx_unlock(nfs_global_mutex);
2651 }
2652
2653 /*
2654 * Accept a new callback socket.
2655 */
2656 void
2657 nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag)
2658 {
2659 socket_t newso = NULL;
2660 struct nfs_callback_socket *ncbsp;
2661 struct nfsmount *nmp;
2662 struct timeval timeo, now;
2663 int error, on = 1, ip;
2664
2665 if (so == nfs4_cb_so) {
2666 ip = 4;
2667 } else if (so == nfs4_cb_so6) {
2668 ip = 6;
2669 } else {
2670 return;
2671 }
2672
2673 /* allocate/initialize a new nfs_callback_socket */
2674 MALLOC(ncbsp, struct nfs_callback_socket *, sizeof(struct nfs_callback_socket), M_TEMP, M_WAITOK);
2675 if (!ncbsp) {
2676 log(LOG_ERR, "nfs callback accept: no memory for new socket\n");
2677 return;
2678 }
2679 bzero(ncbsp, sizeof(*ncbsp));
2680 ncbsp->ncbs_saddr.ss_len = (ip == 4) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
2681 nfs_rpc_record_state_init(&ncbsp->ncbs_rrs);
2682
2683 /* accept a new socket */
2684 error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_saddr,
2685 ncbsp->ncbs_saddr.ss_len, MSG_DONTWAIT,
2686 nfs4_cb_rcv, ncbsp, &newso);
2687 if (error) {
2688 log(LOG_INFO, "nfs callback accept: error %d accepting IPv%d socket\n", error, ip);
2689 FREE(ncbsp, M_TEMP);
2690 return;
2691 }
2692
2693 /* set up the new socket */
2694 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2695 timeo.tv_usec = 0;
2696 timeo.tv_sec = 60;
2697 error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
2698 if (error) {
2699 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket rx timeout\n", error, ip);
2700 }
2701 error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
2702 if (error) {
2703 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket tx timeout\n", error, ip);
2704 }
2705 sock_setsockopt(newso, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2706 sock_setsockopt(newso, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
2707 sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
2708 sock_setsockopt(newso, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
2709
2710 ncbsp->ncbs_so = newso;
2711 microuptime(&now);
2712 ncbsp->ncbs_stamp = now.tv_sec;
2713
2714 lck_mtx_lock(nfs_global_mutex);
2715
2716 /* add it to the list */
2717 TAILQ_INSERT_HEAD(&nfs4_cb_socks, ncbsp, ncbs_link);
2718
2719 /* verify it's from a host we have mounted */
2720 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
2721 /* check if socket's source address matches this mount's server address */
2722 if (!nmp->nm_saddr) {
2723 continue;
2724 }
2725 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0) {
2726 break;
2727 }
2728 }
2729 if (!nmp) { /* we don't want this socket, mark it dead */
2730 ncbsp->ncbs_flags |= NCBSOCK_DEAD;
2731 }
2732
2733 /* make sure the callback socket cleanup timer is running */
2734 /* (shorten the timer if we've got a socket we don't want) */
2735 if (!nfs4_callback_timer_on) {
2736 nfs4_callback_timer_on = 1;
2737 nfs_interval_timer_start(nfs4_callback_timer_call,
2738 !nmp ? 500 : (NFS4_CB_TIMER_PERIOD * 1000));
2739 } else if (!nmp && (nfs4_callback_timer_on < 2)) {
2740 nfs4_callback_timer_on = 2;
2741 thread_call_cancel(nfs4_callback_timer_call);
2742 nfs_interval_timer_start(nfs4_callback_timer_call, 500);
2743 }
2744
2745 lck_mtx_unlock(nfs_global_mutex);
2746 }
2747
2748 /*
2749 * Receive mbufs from callback sockets into RPC records and process each record.
2750 * Detect connection has been closed and shut down.
2751 */
2752 void
2753 nfs4_cb_rcv(socket_t so, void *arg, __unused int waitflag)
2754 {
2755 struct nfs_callback_socket *ncbsp = arg;
2756 struct timespec ts = { .tv_sec = 1, .tv_nsec = 0 };
2757 struct timeval now;
2758 mbuf_t m;
2759 int error = 0, recv = 1;
2760
2761 lck_mtx_lock(nfs_global_mutex);
2762 while (ncbsp->ncbs_flags & NCBSOCK_UPCALL) {
2763 /* wait if upcall is already in progress */
2764 ncbsp->ncbs_flags |= NCBSOCK_UPCALLWANT;
2765 msleep(ncbsp, nfs_global_mutex, PSOCK, "cbupcall", &ts);
2766 }
2767 ncbsp->ncbs_flags |= NCBSOCK_UPCALL;
2768 lck_mtx_unlock(nfs_global_mutex);
2769
2770 /* loop while we make error-free progress */
2771 while (!error && recv) {
2772 error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, MSG_DONTWAIT, &recv, &m);
2773 if (m) { /* handle the request */
2774 error = nfs4_cb_handler(ncbsp, m);
2775 }
2776 }
2777
2778 /* note: no error and no data indicates server closed its end */
2779 if ((error != EWOULDBLOCK) && (error || !recv)) {
2780 /*
2781 * Socket is either being closed or should be.
2782 * We can't close the socket in the context of the upcall.
2783 * So we mark it as dead and leave it for the cleanup timer to reap.
2784 */
2785 ncbsp->ncbs_stamp = 0;
2786 ncbsp->ncbs_flags |= NCBSOCK_DEAD;
2787 } else {
2788 microuptime(&now);
2789 ncbsp->ncbs_stamp = now.tv_sec;
2790 }
2791
2792 lck_mtx_lock(nfs_global_mutex);
2793 ncbsp->ncbs_flags &= ~NCBSOCK_UPCALL;
2794 lck_mtx_unlock(nfs_global_mutex);
2795 wakeup(ncbsp);
2796 }
2797
2798 /*
2799 * Handle an NFS callback channel request.
2800 */
2801 int
2802 nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
2803 {
2804 socket_t so = ncbsp->ncbs_so;
2805 struct nfsm_chain nmreq, nmrep;
2806 mbuf_t mhead = NULL, mrest = NULL, m;
2807 struct msghdr msg;
2808 struct nfsmount *nmp;
2809 fhandle_t fh;
2810 nfsnode_t np;
2811 nfs_stateid stateid;
2812 uint32_t bitmap[NFS_ATTR_BITMAP_LEN], rbitmap[NFS_ATTR_BITMAP_LEN], bmlen, truncate, attrbytes;
2813 uint32_t val, xid, procnum, taglen, cbid, numops, op, status;
2814 uint32_t auth_type, auth_len;
2815 uint32_t numres, *pnumres;
2816 int error = 0, replen, len;
2817 size_t sentlen = 0;
2818
2819 xid = numops = op = status = procnum = taglen = cbid = 0;
2820
2821 nfsm_chain_dissect_init(error, &nmreq, mreq);
2822 nfsm_chain_get_32(error, &nmreq, xid); // RPC XID
2823 nfsm_chain_get_32(error, &nmreq, val); // RPC Call
2824 nfsm_assert(error, (val == RPC_CALL), EBADRPC);
2825 nfsm_chain_get_32(error, &nmreq, val); // RPC Version
2826 nfsm_assert(error, (val == RPC_VER2), ERPCMISMATCH);
2827 nfsm_chain_get_32(error, &nmreq, val); // RPC Program Number
2828 nfsm_assert(error, (val == NFS4_CALLBACK_PROG), EPROGUNAVAIL);
2829 nfsm_chain_get_32(error, &nmreq, val); // NFS Callback Program Version Number
2830 nfsm_assert(error, (val == NFS4_CALLBACK_PROG_VERSION), EPROGMISMATCH);
2831 nfsm_chain_get_32(error, &nmreq, procnum); // NFS Callback Procedure Number
2832 nfsm_assert(error, (procnum <= NFSPROC4_CB_COMPOUND), EPROCUNAVAIL);
2833
2834 /* Handle authentication */
2835 /* XXX just ignore auth for now - handling kerberos may be tricky */
2836 nfsm_chain_get_32(error, &nmreq, auth_type); // RPC Auth Flavor
2837 nfsm_chain_get_32(error, &nmreq, auth_len); // RPC Auth Length
2838 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC);
2839 if (!error && (auth_len > 0)) {
2840 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len));
2841 }
2842 nfsm_chain_adv(error, &nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
2843 nfsm_chain_get_32(error, &nmreq, auth_len); // verifier length
2844 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC);
2845 if (!error && (auth_len > 0)) {
2846 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len));
2847 }
2848 if (error) {
2849 status = error;
2850 error = 0;
2851 goto nfsmout;
2852 }
2853
2854 switch (procnum) {
2855 case NFSPROC4_CB_NULL:
2856 status = NFSERR_RETVOID;
2857 break;
2858 case NFSPROC4_CB_COMPOUND:
2859 /* tag, minorversion, cb ident, numops, op array */
2860 nfsm_chain_get_32(error, &nmreq, taglen); /* tag length */
2861 nfsm_assert(error, (val <= NFS4_OPAQUE_LIMIT), EBADRPC);
2862
2863 /* start building the body of the response */
2864 nfsm_mbuf_get(error, &mrest, nfsm_rndup(taglen) + 5 * NFSX_UNSIGNED);
2865 nfsm_chain_init(&nmrep, mrest);
2866
2867 /* copy tag from request to response */
2868 nfsm_chain_add_32(error, &nmrep, taglen); /* tag length */
2869 for (len = (int)taglen; !error && (len > 0); len -= NFSX_UNSIGNED) {
2870 nfsm_chain_get_32(error, &nmreq, val);
2871 nfsm_chain_add_32(error, &nmrep, val);
2872 }
2873
2874 /* insert number of results placeholder */
2875 numres = 0;
2876 nfsm_chain_add_32(error, &nmrep, numres);
2877 pnumres = (uint32_t*)(nmrep.nmc_ptr - NFSX_UNSIGNED);
2878
2879 nfsm_chain_get_32(error, &nmreq, val); /* minorversion */
2880 nfsm_assert(error, (val == 0), NFSERR_MINOR_VERS_MISMATCH);
2881 nfsm_chain_get_32(error, &nmreq, cbid); /* callback ID */
2882 nfsm_chain_get_32(error, &nmreq, numops); /* number of operations */
2883 if (error) {
2884 if ((error == EBADRPC) || (error == NFSERR_MINOR_VERS_MISMATCH)) {
2885 status = error;
2886 } else if ((error == ENOBUFS) || (error == ENOMEM)) {
2887 status = NFSERR_RESOURCE;
2888 } else {
2889 status = NFSERR_SERVERFAULT;
2890 }
2891 error = 0;
2892 nfsm_chain_null(&nmrep);
2893 goto nfsmout;
2894 }
2895 /* match the callback ID to a registered mount */
2896 lck_mtx_lock(nfs_global_mutex);
2897 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
2898 if (nmp->nm_cbid != cbid) {
2899 continue;
2900 }
2901 /* verify socket's source address matches this mount's server address */
2902 if (!nmp->nm_saddr) {
2903 continue;
2904 }
2905 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0) {
2906 break;
2907 }
2908 }
2909 /* mark the NFS mount as busy */
2910 if (nmp) {
2911 nmp->nm_cbrefs++;
2912 }
2913 lck_mtx_unlock(nfs_global_mutex);
2914 if (!nmp) {
2915 /* if no mount match, just drop socket. */
2916 error = EPERM;
2917 nfsm_chain_null(&nmrep);
2918 goto out;
2919 }
2920
2921 /* process ops, adding results to mrest */
2922 while (numops > 0) {
2923 numops--;
2924 nfsm_chain_get_32(error, &nmreq, op);
2925 if (error) {
2926 break;
2927 }
2928 switch (op) {
2929 case NFS_OP_CB_GETATTR:
2930 // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS)
2931 np = NULL;
2932 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh);
2933 bmlen = NFS_ATTR_BITMAP_LEN;
2934 nfsm_chain_get_bitmap(error, &nmreq, bitmap, bmlen);
2935 if (error) {
2936 status = error;
2937 error = 0;
2938 numops = 0; /* don't process any more ops */
2939 } else {
2940 /* find the node for the file handle */
2941 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
2942 if (error || !np) {
2943 status = NFSERR_BADHANDLE;
2944 error = 0;
2945 np = NULL;
2946 numops = 0; /* don't process any more ops */
2947 }
2948 }
2949 nfsm_chain_add_32(error, &nmrep, op);
2950 nfsm_chain_add_32(error, &nmrep, status);
2951 if (!error && (status == EBADRPC)) {
2952 error = status;
2953 }
2954 if (np) {
2955 /* only allow returning size, change, and mtime attrs */
2956 NFS_CLEAR_ATTRIBUTES(&rbitmap);
2957 attrbytes = 0;
2958 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) {
2959 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_CHANGE);
2960 attrbytes += 2 * NFSX_UNSIGNED;
2961 }
2962 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) {
2963 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_SIZE);
2964 attrbytes += 2 * NFSX_UNSIGNED;
2965 }
2966 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) {
2967 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_TIME_MODIFY);
2968 attrbytes += 3 * NFSX_UNSIGNED;
2969 }
2970 nfsm_chain_add_bitmap(error, &nmrep, rbitmap, NFS_ATTR_BITMAP_LEN);
2971 nfsm_chain_add_32(error, &nmrep, attrbytes);
2972 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) {
2973 nfsm_chain_add_64(error, &nmrep,
2974 np->n_vattr.nva_change + ((np->n_flag & NMODIFIED) ? 1 : 0));
2975 }
2976 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) {
2977 nfsm_chain_add_64(error, &nmrep, np->n_size);
2978 }
2979 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) {
2980 nfsm_chain_add_64(error, &nmrep, np->n_vattr.nva_timesec[NFSTIME_MODIFY]);
2981 nfsm_chain_add_32(error, &nmrep, np->n_vattr.nva_timensec[NFSTIME_MODIFY]);
2982 }
2983 nfs_node_unlock(np);
2984 vnode_put(NFSTOV(np));
2985 np = NULL;
2986 }
2987 /*
2988 * If we hit an error building the reply, we can't easily back up.
2989 * So we'll just update the status and hope the server ignores the
2990 * extra garbage.
2991 */
2992 break;
2993 case NFS_OP_CB_RECALL:
2994 // (STATEID, TRUNCATE, FH) -> (STATUS)
2995 np = NULL;
2996 nfsm_chain_get_stateid(error, &nmreq, &stateid);
2997 nfsm_chain_get_32(error, &nmreq, truncate);
2998 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh);
2999 if (error) {
3000 status = error;
3001 error = 0;
3002 numops = 0; /* don't process any more ops */
3003 } else {
3004 /* find the node for the file handle */
3005 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
3006 if (error || !np) {
3007 status = NFSERR_BADHANDLE;
3008 error = 0;
3009 np = NULL;
3010 numops = 0; /* don't process any more ops */
3011 } else if (!(np->n_openflags & N_DELEG_MASK) ||
3012 bcmp(&np->n_dstateid, &stateid, sizeof(stateid))) {
3013 /* delegation stateid state doesn't match */
3014 status = NFSERR_BAD_STATEID;
3015 numops = 0; /* don't process any more ops */
3016 }
3017 if (!status) { /* add node to recall queue, and wake socket thread */
3018 nfs4_delegation_return_enqueue(np);
3019 }
3020 if (np) {
3021 nfs_node_unlock(np);
3022 vnode_put(NFSTOV(np));
3023 }
3024 }
3025 nfsm_chain_add_32(error, &nmrep, op);
3026 nfsm_chain_add_32(error, &nmrep, status);
3027 if (!error && (status == EBADRPC)) {
3028 error = status;
3029 }
3030 break;
3031 case NFS_OP_CB_ILLEGAL:
3032 default:
3033 nfsm_chain_add_32(error, &nmrep, NFS_OP_CB_ILLEGAL);
3034 status = NFSERR_OP_ILLEGAL;
3035 nfsm_chain_add_32(error, &nmrep, status);
3036 numops = 0; /* don't process any more ops */
3037 break;
3038 }
3039 numres++;
3040 }
3041
3042 if (!status && error) {
3043 if (error == EBADRPC) {
3044 status = error;
3045 } else if ((error == ENOBUFS) || (error == ENOMEM)) {
3046 status = NFSERR_RESOURCE;
3047 } else {
3048 status = NFSERR_SERVERFAULT;
3049 }
3050 error = 0;
3051 }
3052
3053 /* Now, set the numres field */
3054 *pnumres = txdr_unsigned(numres);
3055 nfsm_chain_build_done(error, &nmrep);
3056 nfsm_chain_null(&nmrep);
3057
3058 /* drop the callback reference on the mount */
3059 lck_mtx_lock(nfs_global_mutex);
3060 nmp->nm_cbrefs--;
3061 if (!nmp->nm_cbid) {
3062 wakeup(&nmp->nm_cbrefs);
3063 }
3064 lck_mtx_unlock(nfs_global_mutex);
3065 break;
3066 }
3067
3068 nfsmout:
3069 if (status == EBADRPC) {
3070 OSAddAtomic64(1, &nfsstats.rpcinvalid);
3071 }
3072
3073 /* build reply header */
3074 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mhead);
3075 nfsm_chain_init(&nmrep, mhead);
3076 nfsm_chain_add_32(error, &nmrep, 0); /* insert space for an RPC record mark */
3077 nfsm_chain_add_32(error, &nmrep, xid);
3078 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
3079 if ((status == ERPCMISMATCH) || (status & NFSERR_AUTHERR)) {
3080 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
3081 if (status & NFSERR_AUTHERR) {
3082 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
3083 nfsm_chain_add_32(error, &nmrep, (status & ~NFSERR_AUTHERR));
3084 } else {
3085 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
3086 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
3087 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
3088 }
3089 } else {
3090 /* reply status */
3091 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
3092 /* XXX RPCAUTH_NULL verifier */
3093 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
3094 nfsm_chain_add_32(error, &nmrep, 0);
3095 /* accepted status */
3096 switch (status) {
3097 case EPROGUNAVAIL:
3098 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
3099 break;
3100 case EPROGMISMATCH:
3101 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
3102 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION);
3103 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION);
3104 break;
3105 case EPROCUNAVAIL:
3106 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
3107 break;
3108 case EBADRPC:
3109 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
3110 break;
3111 default:
3112 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
3113 if (status != NFSERR_RETVOID) {
3114 nfsm_chain_add_32(error, &nmrep, status);
3115 }
3116 break;
3117 }
3118 }
3119 nfsm_chain_build_done(error, &nmrep);
3120 if (error) {
3121 nfsm_chain_null(&nmrep);
3122 goto out;
3123 }
3124 error = mbuf_setnext(nmrep.nmc_mcur, mrest);
3125 if (error) {
3126 printf("nfs cb: mbuf_setnext failed %d\n", error);
3127 goto out;
3128 }
3129 mrest = NULL;
3130 /* Calculate the size of the reply */
3131 replen = 0;
3132 for (m = nmrep.nmc_mhead; m; m = mbuf_next(m)) {
3133 replen += mbuf_len(m);
3134 }
3135 mbuf_pkthdr_setlen(mhead, replen);
3136 error = mbuf_pkthdr_setrcvif(mhead, NULL);
3137 nfsm_chain_set_recmark(error, &nmrep, (replen - NFSX_UNSIGNED) | 0x80000000);
3138 nfsm_chain_null(&nmrep);
3139
3140 /* send the reply */
3141 bzero(&msg, sizeof(msg));
3142 error = sock_sendmbuf(so, &msg, mhead, 0, &sentlen);
3143 mhead = NULL;
3144 if (!error && ((int)sentlen != replen)) {
3145 error = EWOULDBLOCK;
3146 }
3147 if (error == EWOULDBLOCK) { /* inability to send response is considered fatal */
3148 error = ETIMEDOUT;
3149 }
3150 out:
3151 if (error) {
3152 nfsm_chain_cleanup(&nmrep);
3153 }
3154 if (mhead) {
3155 mbuf_freem(mhead);
3156 }
3157 if (mrest) {
3158 mbuf_freem(mrest);
3159 }
3160 if (mreq) {
3161 mbuf_freem(mreq);
3162 }
3163 return error;
3164 }
3165 #endif /* CONFIG_NFS4 */
3166
3167 /*
3168 * Initialize an nfs_rpc_record_state structure.
3169 */
3170 void
3171 nfs_rpc_record_state_init(struct nfs_rpc_record_state *nrrsp)
3172 {
3173 bzero(nrrsp, sizeof(*nrrsp));
3174 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft);
3175 }
3176
3177 /*
3178 * Clean up an nfs_rpc_record_state structure.
3179 */
3180 void
3181 nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state *nrrsp)
3182 {
3183 if (nrrsp->nrrs_m) {
3184 mbuf_freem(nrrsp->nrrs_m);
3185 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL;
3186 }
3187 }
3188
3189 /*
3190 * Read the next (marked) RPC record from the socket.
3191 *
3192 * *recvp returns if any data was received.
3193 * *mp returns the next complete RPC record
3194 */
3195 int
3196 nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int flags, int *recvp, mbuf_t *mp)
3197 {
3198 struct iovec aio;
3199 struct msghdr msg;
3200 size_t rcvlen;
3201 int error = 0;
3202 mbuf_t m;
3203
3204 *recvp = 0;
3205 *mp = NULL;
3206
3207 /* read the TCP RPC record marker */
3208 while (!error && nrrsp->nrrs_markerleft) {
3209 aio.iov_base = ((char*)&nrrsp->nrrs_fragleft +
3210 sizeof(nrrsp->nrrs_fragleft) - nrrsp->nrrs_markerleft);
3211 aio.iov_len = nrrsp->nrrs_markerleft;
3212 bzero(&msg, sizeof(msg));
3213 msg.msg_iov = &aio;
3214 msg.msg_iovlen = 1;
3215 error = sock_receive(so, &msg, flags, &rcvlen);
3216 if (error || !rcvlen) {
3217 break;
3218 }
3219 *recvp = 1;
3220 nrrsp->nrrs_markerleft -= rcvlen;
3221 if (nrrsp->nrrs_markerleft) {
3222 continue;
3223 }
3224 /* record marker complete */
3225 nrrsp->nrrs_fragleft = ntohl(nrrsp->nrrs_fragleft);
3226 if (nrrsp->nrrs_fragleft & 0x80000000) {
3227 nrrsp->nrrs_lastfrag = 1;
3228 nrrsp->nrrs_fragleft &= ~0x80000000;
3229 }
3230 nrrsp->nrrs_reclen += nrrsp->nrrs_fragleft;
3231 if (nrrsp->nrrs_reclen > NFS_MAXPACKET) {
3232 /* This is SERIOUS! We are out of sync with the sender. */
3233 log(LOG_ERR, "impossible RPC record length (%d) on callback", nrrsp->nrrs_reclen);
3234 error = EFBIG;
3235 }
3236 }
3237
3238 /* read the TCP RPC record fragment */
3239 while (!error && !nrrsp->nrrs_markerleft && nrrsp->nrrs_fragleft) {
3240 m = NULL;
3241 rcvlen = nrrsp->nrrs_fragleft;
3242 error = sock_receivembuf(so, NULL, &m, flags, &rcvlen);
3243 if (error || !rcvlen || !m) {
3244 break;
3245 }
3246 *recvp = 1;
3247 /* append mbufs to list */
3248 nrrsp->nrrs_fragleft -= rcvlen;
3249 if (!nrrsp->nrrs_m) {
3250 nrrsp->nrrs_m = m;
3251 } else {
3252 error = mbuf_setnext(nrrsp->nrrs_mlast, m);
3253 if (error) {
3254 printf("nfs tcp rcv: mbuf_setnext failed %d\n", error);
3255 mbuf_freem(m);
3256 break;
3257 }
3258 }
3259 while (mbuf_next(m)) {
3260 m = mbuf_next(m);
3261 }
3262 nrrsp->nrrs_mlast = m;
3263 }
3264
3265 /* done reading fragment? */
3266 if (!error && !nrrsp->nrrs_markerleft && !nrrsp->nrrs_fragleft) {
3267 /* reset socket fragment parsing state */
3268 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft);
3269 if (nrrsp->nrrs_lastfrag) {
3270 /* RPC record complete */
3271 *mp = nrrsp->nrrs_m;
3272 /* reset socket record parsing state */
3273 nrrsp->nrrs_reclen = 0;
3274 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL;
3275 nrrsp->nrrs_lastfrag = 0;
3276 }
3277 }
3278
3279 return error;
3280 }
3281
3282
3283
3284 /*
3285 * The NFS client send routine.
3286 *
3287 * Send the given NFS request out the mount's socket.
3288 * Holds nfs_sndlock() for the duration of this call.
3289 *
3290 * - check for request termination (sigintr)
3291 * - wait for reconnect, if necessary
3292 * - UDP: check the congestion window
3293 * - make a copy of the request to send
3294 * - UDP: update the congestion window
3295 * - send the request
3296 *
3297 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
3298 * rexmit count is also updated if this isn't the first send.
3299 *
3300 * If the send is not successful, make sure R_MUSTRESEND is set.
3301 * If this wasn't the first transmit, set R_RESENDERR.
3302 * Also, undo any UDP congestion window changes made.
3303 *
3304 * If the error appears to indicate that the socket should
3305 * be reconnected, mark the socket for reconnection.
3306 *
3307 * Only return errors when the request should be aborted.
3308 */
3309 int
3310 nfs_send(struct nfsreq *req, int wait)
3311 {
3312 struct nfsmount *nmp;
3313 struct nfs_socket *nso;
3314 int error, error2, sotype, rexmit, slpflag = 0, needrecon;
3315 struct msghdr msg;
3316 struct sockaddr *sendnam;
3317 mbuf_t mreqcopy;
3318 size_t sentlen = 0;
3319 struct timespec ts = { .tv_sec = 2, .tv_nsec = 0 };
3320
3321 again:
3322 error = nfs_sndlock(req);
3323 if (error) {
3324 lck_mtx_lock(&req->r_mtx);
3325 req->r_error = error;
3326 req->r_flags &= ~R_SENDING;
3327 lck_mtx_unlock(&req->r_mtx);
3328 return error;
3329 }
3330
3331 error = nfs_sigintr(req->r_nmp, req, NULL, 0);
3332 if (error) {
3333 nfs_sndunlock(req);
3334 lck_mtx_lock(&req->r_mtx);
3335 req->r_error = error;
3336 req->r_flags &= ~R_SENDING;
3337 lck_mtx_unlock(&req->r_mtx);
3338 return error;
3339 }
3340 nmp = req->r_nmp;
3341 sotype = nmp->nm_sotype;
3342
3343 /*
3344 * If it's a setup RPC but we're not in SETUP... must need reconnect.
3345 * If it's a recovery RPC but the socket's not ready... must need reconnect.
3346 */
3347 if (((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) ||
3348 ((req->r_flags & R_RECOVER) && !(nmp->nm_sockflags & NMSOCK_READY))) {
3349 error = ETIMEDOUT;
3350 nfs_sndunlock(req);
3351 lck_mtx_lock(&req->r_mtx);
3352 req->r_error = error;
3353 req->r_flags &= ~R_SENDING;
3354 lck_mtx_unlock(&req->r_mtx);
3355 return error;
3356 }
3357
3358 /* If the socket needs reconnection, do that now. */
3359 /* wait until socket is ready - unless this request is part of setup */
3360 lck_mtx_lock(&nmp->nm_lock);
3361 if (!(nmp->nm_sockflags & NMSOCK_READY) &&
3362 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) {
3363 if (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) {
3364 slpflag |= PCATCH;
3365 }
3366 lck_mtx_unlock(&nmp->nm_lock);
3367 nfs_sndunlock(req);
3368 if (!wait) {
3369 lck_mtx_lock(&req->r_mtx);
3370 req->r_flags &= ~R_SENDING;
3371 req->r_flags |= R_MUSTRESEND;
3372 req->r_rtt = 0;
3373 lck_mtx_unlock(&req->r_mtx);
3374 return 0;
3375 }
3376 NFS_SOCK_DBG("nfs_send: 0x%llx wait reconnect\n", req->r_xid);
3377 lck_mtx_lock(&req->r_mtx);
3378 req->r_flags &= ~R_MUSTRESEND;
3379 req->r_rtt = 0;
3380 lck_mtx_unlock(&req->r_mtx);
3381 lck_mtx_lock(&nmp->nm_lock);
3382 while (!(nmp->nm_sockflags & NMSOCK_READY)) {
3383 /* don't bother waiting if the socket thread won't be reconnecting it */
3384 if (nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD)) {
3385 error = EIO;
3386 break;
3387 }
3388 if ((NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && (nmp->nm_reconnect_start > 0)) {
3389 struct timeval now;
3390 microuptime(&now);
3391 if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) {
3392 /* soft mount in reconnect for a while... terminate ASAP */
3393 OSAddAtomic64(1, &nfsstats.rpctimeouts);
3394 req->r_flags |= R_SOFTTERM;
3395 req->r_error = error = ETIMEDOUT;
3396 break;
3397 }
3398 }
3399 /* make sure socket thread is running, then wait */
3400 nfs_mount_sock_thread_wake(nmp);
3401 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1))) {
3402 break;
3403 }
3404 msleep(req, &nmp->nm_lock, slpflag | PSOCK, "nfsconnectwait", &ts);
3405 slpflag = 0;
3406 }
3407 lck_mtx_unlock(&nmp->nm_lock);
3408 if (error) {
3409 lck_mtx_lock(&req->r_mtx);
3410 req->r_error = error;
3411 req->r_flags &= ~R_SENDING;
3412 lck_mtx_unlock(&req->r_mtx);
3413 return error;
3414 }
3415 goto again;
3416 }
3417 nso = nmp->nm_nso;
3418 /* note that we're using the mount's socket to do the send */
3419 nmp->nm_state |= NFSSTA_SENDING; /* will be cleared by nfs_sndunlock() */
3420 lck_mtx_unlock(&nmp->nm_lock);
3421 if (!nso) {
3422 nfs_sndunlock(req);
3423 lck_mtx_lock(&req->r_mtx);
3424 req->r_flags &= ~R_SENDING;
3425 req->r_flags |= R_MUSTRESEND;
3426 req->r_rtt = 0;
3427 lck_mtx_unlock(&req->r_mtx);
3428 return 0;
3429 }
3430
3431 lck_mtx_lock(&req->r_mtx);
3432 rexmit = (req->r_flags & R_SENT);
3433
3434 if (sotype == SOCK_DGRAM) {
3435 lck_mtx_lock(&nmp->nm_lock);
3436 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) {
3437 /* if we can't send this out yet, wait on the cwnd queue */
3438 slpflag = (NMFLAG(nmp, INTR) && req->r_thread) ? PCATCH : 0;
3439 lck_mtx_unlock(&nmp->nm_lock);
3440 nfs_sndunlock(req);
3441 req->r_flags &= ~R_SENDING;
3442 req->r_flags |= R_MUSTRESEND;
3443 lck_mtx_unlock(&req->r_mtx);
3444 if (!wait) {
3445 req->r_rtt = 0;
3446 return 0;
3447 }
3448 lck_mtx_lock(&nmp->nm_lock);
3449 while (nmp->nm_sent >= nmp->nm_cwnd) {
3450 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1))) {
3451 break;
3452 }
3453 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain);
3454 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts);
3455 slpflag = 0;
3456 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) {
3457 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
3458 req->r_cchain.tqe_next = NFSREQNOLIST;
3459 }
3460 }
3461 lck_mtx_unlock(&nmp->nm_lock);
3462 goto again;
3463 }
3464 /*
3465 * We update these *before* the send to avoid racing
3466 * against others who may be looking to send requests.
3467 */
3468 if (!rexmit) {
3469 /* first transmit */
3470 req->r_flags |= R_CWND;
3471 nmp->nm_sent += NFS_CWNDSCALE;
3472 } else {
3473 /*
3474 * When retransmitting, turn timing off
3475 * and divide congestion window by 2.
3476 */
3477 req->r_flags &= ~R_TIMING;
3478 nmp->nm_cwnd >>= 1;
3479 if (nmp->nm_cwnd < NFS_CWNDSCALE) {
3480 nmp->nm_cwnd = NFS_CWNDSCALE;
3481 }
3482 }
3483 lck_mtx_unlock(&nmp->nm_lock);
3484 }
3485
3486 req->r_flags &= ~R_MUSTRESEND;
3487 lck_mtx_unlock(&req->r_mtx);
3488
3489 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL,
3490 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy);
3491 if (error) {
3492 if (wait) {
3493 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error);
3494 }
3495 nfs_sndunlock(req);
3496 lck_mtx_lock(&req->r_mtx);
3497 req->r_flags &= ~R_SENDING;
3498 req->r_flags |= R_MUSTRESEND;
3499 req->r_rtt = 0;
3500 lck_mtx_unlock(&req->r_mtx);
3501 return 0;
3502 }
3503
3504 bzero(&msg, sizeof(msg));
3505 if ((sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so) && ((sendnam = nmp->nm_saddr))) {
3506 msg.msg_name = (caddr_t)sendnam;
3507 msg.msg_namelen = sendnam->sa_len;
3508 }
3509 NFS_SOCK_DUMP_MBUF("Sending mbuf\n", mreqcopy);
3510 error = sock_sendmbuf(nso->nso_so, &msg, mreqcopy, 0, &sentlen);
3511 if (error || (sentlen != req->r_mreqlen)) {
3512 NFS_SOCK_DBG("nfs_send: 0x%llx sent %d/%d error %d\n",
3513 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error);
3514 }
3515
3516 if (!error && (sentlen != req->r_mreqlen)) {
3517 error = EWOULDBLOCK;
3518 }
3519 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen));
3520
3521 lck_mtx_lock(&req->r_mtx);
3522 req->r_flags &= ~R_SENDING;
3523 req->r_rtt = 0;
3524 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT)) {
3525 req->r_rexmit = NFS_MAXREXMIT;
3526 }
3527
3528 if (!error) {
3529 /* SUCCESS */
3530 req->r_flags &= ~R_RESENDERR;
3531 if (rexmit) {
3532 OSAddAtomic64(1, &nfsstats.rpcretries);
3533 }
3534 req->r_flags |= R_SENT;
3535 if (req->r_flags & R_WAITSENT) {
3536 req->r_flags &= ~R_WAITSENT;
3537 wakeup(req);
3538 }
3539 nfs_sndunlock(req);
3540 lck_mtx_unlock(&req->r_mtx);
3541 return 0;
3542 }
3543
3544 /* send failed */
3545 req->r_flags |= R_MUSTRESEND;
3546 if (rexmit) {
3547 req->r_flags |= R_RESENDERR;
3548 }
3549 if ((error == EINTR) || (error == ERESTART)) {
3550 req->r_error = error;
3551 }
3552 lck_mtx_unlock(&req->r_mtx);
3553
3554 if (sotype == SOCK_DGRAM) {
3555 /*
3556 * Note: even though a first send may fail, we consider
3557 * the request sent for congestion window purposes.
3558 * So we don't need to undo any of the changes made above.
3559 */
3560 /*
3561 * Socket errors ignored for connectionless sockets??
3562 * For now, ignore them all
3563 */
3564 if ((error != EINTR) && (error != ERESTART) &&
3565 (error != EWOULDBLOCK) && (error != EIO) && (nso == nmp->nm_nso)) {
3566 int clearerror = 0, optlen = sizeof(clearerror);
3567 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
3568 #ifdef NFS_SOCKET_DEBUGGING
3569 if (clearerror) {
3570 NFS_SOCK_DBG("nfs_send: ignoring UDP socket error %d so %d\n",
3571 error, clearerror);
3572 }
3573 #endif
3574 }
3575 }
3576
3577 /* check if it appears we should reconnect the socket */
3578 switch (error) {
3579 case EWOULDBLOCK:
3580 /* if send timed out, reconnect if on TCP */
3581 if (sotype != SOCK_STREAM) {
3582 break;
3583 }
3584 case EPIPE:
3585 case EADDRNOTAVAIL:
3586 case ENETDOWN:
3587 case ENETUNREACH:
3588 case ENETRESET:
3589 case ECONNABORTED:
3590 case ECONNRESET:
3591 case ENOTCONN:
3592 case ESHUTDOWN:
3593 case ECONNREFUSED:
3594 case EHOSTDOWN:
3595 case EHOSTUNREACH:
3596 /* case ECANCELED??? */
3597 needrecon = 1;
3598 break;
3599 }
3600 if (needrecon && (nso == nmp->nm_nso)) { /* mark socket as needing reconnect */
3601 NFS_SOCK_DBG("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error);
3602 nfs_need_reconnect(nmp);
3603 }
3604
3605 nfs_sndunlock(req);
3606
3607 if (nfs_is_dead(error, nmp)) {
3608 error = EIO;
3609 }
3610
3611 /*
3612 * Don't log some errors:
3613 * EPIPE errors may be common with servers that drop idle connections.
3614 * EADDRNOTAVAIL may occur on network transitions.
3615 * ENOTCONN may occur under some network conditions.
3616 */
3617 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN)) {
3618 error = 0;
3619 }
3620 if (error && (error != EINTR) && (error != ERESTART)) {
3621 log(LOG_INFO, "nfs send error %d for server %s\n", error,
3622 !req->r_nmp ? "<unmounted>" :
3623 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
3624 }
3625
3626 /* prefer request termination error over other errors */
3627 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
3628 if (error2) {
3629 error = error2;
3630 }
3631
3632 /* only allow the following errors to be returned */
3633 if ((error != EINTR) && (error != ERESTART) && (error != EIO) &&
3634 (error != ENXIO) && (error != ETIMEDOUT)) {
3635 /*
3636 * We got some error we don't know what do do with,
3637 * i.e., we're not reconnecting, we map it to
3638 * EIO. Presumably our send failed and we better tell
3639 * the caller so they don't wait for a reply that is
3640 * never going to come. If we are reconnecting we
3641 * return 0 and the request will be resent.
3642 */
3643 error = needrecon ? 0 : EIO;
3644 }
3645 return error;
3646 }
3647
3648 /*
3649 * NFS client socket upcalls
3650 *
3651 * Pull RPC replies out of an NFS mount's socket and match them
3652 * up with the pending request.
3653 *
3654 * The datagram code is simple because we always get whole
3655 * messages out of the socket.
3656 *
3657 * The stream code is more involved because we have to parse
3658 * the RPC records out of the stream.
3659 */
3660
3661 /* NFS client UDP socket upcall */
3662 void
3663 nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag)
3664 {
3665 struct nfsmount *nmp = arg;
3666 struct nfs_socket *nso = nmp->nm_nso;
3667 size_t rcvlen;
3668 mbuf_t m;
3669 int error = 0;
3670
3671 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
3672 return;
3673 }
3674
3675 do {
3676 /* make sure we're on the current socket */
3677 if (!nso || (nso->nso_so != so)) {
3678 return;
3679 }
3680
3681 m = NULL;
3682 rcvlen = 1000000;
3683 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
3684 if (m) {
3685 nfs_request_match_reply(nmp, m);
3686 }
3687 } while (m && !error);
3688
3689 if (error && (error != EWOULDBLOCK)) {
3690 /* problems with the socket... mark for reconnection */
3691 NFS_SOCK_DBG("nfs_udp_rcv: need reconnect %d\n", error);
3692 nfs_need_reconnect(nmp);
3693 }
3694 }
3695
3696 /* NFS client TCP socket upcall */
3697 void
3698 nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag)
3699 {
3700 struct nfsmount *nmp = arg;
3701 struct nfs_socket *nso = nmp->nm_nso;
3702 struct nfs_rpc_record_state nrrs;
3703 mbuf_t m;
3704 int error = 0;
3705 int recv = 1;
3706 int wup = 0;
3707
3708 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
3709 return;
3710 }
3711
3712 /* make sure we're on the current socket */
3713 lck_mtx_lock(&nmp->nm_lock);
3714 nso = nmp->nm_nso;
3715 if (!nso || (nso->nso_so != so) || (nmp->nm_sockflags & (NMSOCK_DISCONNECTING))) {
3716 lck_mtx_unlock(&nmp->nm_lock);
3717 return;
3718 }
3719 lck_mtx_unlock(&nmp->nm_lock);
3720
3721 /* make sure this upcall should be trying to do work */
3722 lck_mtx_lock(&nso->nso_lock);
3723 if (nso->nso_flags & (NSO_UPCALL | NSO_DISCONNECTING | NSO_DEAD)) {
3724 lck_mtx_unlock(&nso->nso_lock);
3725 return;
3726 }
3727 nso->nso_flags |= NSO_UPCALL;
3728 nrrs = nso->nso_rrs;
3729 lck_mtx_unlock(&nso->nso_lock);
3730
3731 /* loop while we make error-free progress */
3732 while (!error && recv) {
3733 error = nfs_rpc_record_read(so, &nrrs, MSG_DONTWAIT, &recv, &m);
3734 if (m) { /* match completed response with request */
3735 nfs_request_match_reply(nmp, m);
3736 }
3737 }
3738
3739 /* Update the sockets's rpc parsing state */
3740 lck_mtx_lock(&nso->nso_lock);
3741 nso->nso_rrs = nrrs;
3742 if (nso->nso_flags & NSO_DISCONNECTING) {
3743 wup = 1;
3744 }
3745 nso->nso_flags &= ~NSO_UPCALL;
3746 lck_mtx_unlock(&nso->nso_lock);
3747 if (wup) {
3748 wakeup(&nso->nso_flags);
3749 }
3750
3751 #ifdef NFS_SOCKET_DEBUGGING
3752 if (!recv && (error != EWOULDBLOCK)) {
3753 NFS_SOCK_DBG("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error);
3754 }
3755 #endif
3756 /* note: no error and no data indicates server closed its end */
3757 if ((error != EWOULDBLOCK) && (error || !recv)) {
3758 /* problems with the socket... mark for reconnection */
3759 NFS_SOCK_DBG("nfs_tcp_rcv: need reconnect %d\n", error);
3760 nfs_need_reconnect(nmp);
3761 }
3762 }
3763
3764 /*
3765 * "poke" a socket to try to provoke any pending errors
3766 */
3767 void
3768 nfs_sock_poke(struct nfsmount *nmp)
3769 {
3770 struct iovec aio;
3771 struct msghdr msg;
3772 size_t len;
3773 int error = 0;
3774 int dummy;
3775
3776 lck_mtx_lock(&nmp->nm_lock);
3777 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) ||
3778 !(nmp->nm_sockflags & NMSOCK_READY) || !nmp->nm_nso || !nmp->nm_nso->nso_so) {
3779 /* Nothing to poke */
3780 nmp->nm_sockflags &= ~NMSOCK_POKE;
3781 wakeup(&nmp->nm_sockflags);
3782 lck_mtx_unlock(&nmp->nm_lock);
3783 return;
3784 }
3785 lck_mtx_unlock(&nmp->nm_lock);
3786 aio.iov_base = &dummy;
3787 aio.iov_len = 0;
3788 len = 0;
3789 bzero(&msg, sizeof(msg));
3790 msg.msg_iov = &aio;
3791 msg.msg_iovlen = 1;
3792 error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len);
3793 NFS_SOCK_DBG("nfs_sock_poke: error %d\n", error);
3794 lck_mtx_lock(&nmp->nm_lock);
3795 nmp->nm_sockflags &= ~NMSOCK_POKE;
3796 wakeup(&nmp->nm_sockflags);
3797 lck_mtx_unlock(&nmp->nm_lock);
3798 nfs_is_dead(error, nmp);
3799 }
3800
3801 /*
3802 * Match an RPC reply with the corresponding request
3803 */
3804 void
3805 nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
3806 {
3807 struct nfsreq *req;
3808 struct nfsm_chain nmrep;
3809 u_int32_t reply = 0, rxid = 0;
3810 int error = 0, asyncioq, t1;
3811
3812 /* Get the xid and check that it is an rpc reply */
3813 nfsm_chain_dissect_init(error, &nmrep, mrep);
3814 nfsm_chain_get_32(error, &nmrep, rxid);
3815 nfsm_chain_get_32(error, &nmrep, reply);
3816 if (error || (reply != RPC_REPLY)) {
3817 OSAddAtomic64(1, &nfsstats.rpcinvalid);
3818 mbuf_freem(mrep);
3819 return;
3820 }
3821
3822 /*
3823 * Loop through the request list to match up the reply
3824 * Iff no match, just drop it.
3825 */
3826 lck_mtx_lock(nfs_request_mutex);
3827 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
3828 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
3829 continue;
3830 }
3831 /* looks like we have it, grab lock and double check */
3832 lck_mtx_lock(&req->r_mtx);
3833 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
3834 lck_mtx_unlock(&req->r_mtx);
3835 continue;
3836 }
3837 /* Found it.. */
3838 req->r_nmrep = nmrep;
3839 lck_mtx_lock(&nmp->nm_lock);
3840 if (nmp->nm_sotype == SOCK_DGRAM) {
3841 /*
3842 * Update congestion window.
3843 * Do the additive increase of one rpc/rtt.
3844 */
3845 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
3846 if (nmp->nm_cwnd <= nmp->nm_sent) {
3847 nmp->nm_cwnd +=
3848 ((NFS_CWNDSCALE * NFS_CWNDSCALE) +
3849 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
3850 if (nmp->nm_cwnd > NFS_MAXCWND) {
3851 nmp->nm_cwnd = NFS_MAXCWND;
3852 }
3853 }
3854 if (req->r_flags & R_CWND) {
3855 nmp->nm_sent -= NFS_CWNDSCALE;
3856 req->r_flags &= ~R_CWND;
3857 }
3858 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
3859 /* congestion window is open, poke the cwnd queue */
3860 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
3861 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
3862 req2->r_cchain.tqe_next = NFSREQNOLIST;
3863 wakeup(req2);
3864 }
3865 }
3866 /*
3867 * Update rtt using a gain of 0.125 on the mean
3868 * and a gain of 0.25 on the deviation.
3869 */
3870 if (req->r_flags & R_TIMING) {
3871 /*
3872 * Since the timer resolution of
3873 * NFS_HZ is so course, it can often
3874 * result in r_rtt == 0. Since
3875 * r_rtt == N means that the actual
3876 * rtt is between N+dt and N+2-dt ticks,
3877 * add 1.
3878 */
3879 if (proct[req->r_procnum] == 0) {
3880 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum);
3881 }
3882 t1 = req->r_rtt + 1;
3883 t1 -= (NFS_SRTT(req) >> 3);
3884 NFS_SRTT(req) += t1;
3885 if (t1 < 0) {
3886 t1 = -t1;
3887 }
3888 t1 -= (NFS_SDRTT(req) >> 2);
3889 NFS_SDRTT(req) += t1;
3890 }
3891 nmp->nm_timeouts = 0;
3892 lck_mtx_unlock(&nmp->nm_lock);
3893 /* signal anyone waiting on this request */
3894 wakeup(req);
3895 asyncioq = (req->r_callback.rcb_func != NULL);
3896 #if CONFIG_NFS_GSS
3897 if (nfs_request_using_gss(req)) {
3898 nfs_gss_clnt_rpcdone(req);
3899 }
3900 #endif /* CONFIG_NFS_GSS */
3901 lck_mtx_unlock(&req->r_mtx);
3902 lck_mtx_unlock(nfs_request_mutex);
3903 /* if it's an async RPC with a callback, queue it up */
3904 if (asyncioq) {
3905 nfs_asyncio_finish(req);
3906 }
3907 break;
3908 }
3909
3910 if (!req) {
3911 /* not matched to a request, so drop it. */
3912 lck_mtx_unlock(nfs_request_mutex);
3913 OSAddAtomic64(1, &nfsstats.rpcunexpected);
3914 mbuf_freem(mrep);
3915 }
3916 }
3917
3918 /*
3919 * Wait for the reply for a given request...
3920 * ...potentially resending the request if necessary.
3921 */
3922 int
3923 nfs_wait_reply(struct nfsreq *req)
3924 {
3925 struct timespec ts = { .tv_sec = 2, .tv_nsec = 0 };
3926 int error = 0, slpflag, first = 1;
3927
3928 if (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) {
3929 slpflag = PCATCH;
3930 } else {
3931 slpflag = 0;
3932 }
3933
3934 lck_mtx_lock(&req->r_mtx);
3935 while (!req->r_nmrep.nmc_mhead) {
3936 if ((error = nfs_sigintr(req->r_nmp, req, first ? NULL : req->r_thread, 0))) {
3937 break;
3938 }
3939 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) {
3940 break;
3941 }
3942 /* check if we need to resend */
3943 if (req->r_flags & R_MUSTRESEND) {
3944 NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
3945 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt);
3946 req->r_flags |= R_SENDING;
3947 lck_mtx_unlock(&req->r_mtx);
3948 if (nfs_request_using_gss(req)) {
3949 /*
3950 * It's an RPCSEC_GSS request.
3951 * Can't just resend the original request
3952 * without bumping the cred sequence number.
3953 * Go back and re-build the request.
3954 */
3955 lck_mtx_lock(&req->r_mtx);
3956 req->r_flags &= ~R_SENDING;
3957 lck_mtx_unlock(&req->r_mtx);
3958 return EAGAIN;
3959 }
3960 error = nfs_send(req, 1);
3961 lck_mtx_lock(&req->r_mtx);
3962 NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
3963 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error);
3964 if (error) {
3965 break;
3966 }
3967 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) {
3968 break;
3969 }
3970 }
3971 /* need to poll if we're P_NOREMOTEHANG */
3972 if (nfs_noremotehang(req->r_thread)) {
3973 ts.tv_sec = 1;
3974 }
3975 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts);
3976 first = slpflag = 0;
3977 }
3978 lck_mtx_unlock(&req->r_mtx);
3979
3980 return error;
3981 }
3982
3983 /*
3984 * An NFS request goes something like this:
3985 * (nb: always frees up mreq mbuf list)
3986 * nfs_request_create()
3987 * - allocates a request struct if one is not provided
3988 * - initial fill-in of the request struct
3989 * nfs_request_add_header()
3990 * - add the RPC header
3991 * nfs_request_send()
3992 * - link it into list
3993 * - call nfs_send() for first transmit
3994 * nfs_request_wait()
3995 * - call nfs_wait_reply() to wait for the reply
3996 * nfs_request_finish()
3997 * - break down rpc header and return with error or nfs reply
3998 * pointed to by nmrep.
3999 * nfs_request_rele()
4000 * nfs_request_destroy()
4001 * - clean up the request struct
4002 * - free the request struct if it was allocated by nfs_request_create()
4003 */
4004
4005 /*
4006 * Set up an NFS request struct (allocating if no request passed in).
4007 */
4008 int
4009 nfs_request_create(
4010 nfsnode_t np,
4011 mount_t mp, /* used only if !np */
4012 struct nfsm_chain *nmrest,
4013 int procnum,
4014 thread_t thd,
4015 kauth_cred_t cred,
4016 struct nfsreq **reqp)
4017 {
4018 struct nfsreq *req, *newreq = NULL;
4019 struct nfsmount *nmp;
4020
4021 req = *reqp;
4022 if (!req) {
4023 /* allocate a new NFS request structure */
4024 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK);
4025 if (!newreq) {
4026 mbuf_freem(nmrest->nmc_mhead);
4027 nmrest->nmc_mhead = NULL;
4028 return ENOMEM;
4029 }
4030 req = newreq;
4031 }
4032
4033 bzero(req, sizeof(*req));
4034 if (req == newreq) {
4035 req->r_flags = R_ALLOCATED;
4036 }
4037
4038 nmp = VFSTONFS(np ? NFSTOMP(np) : mp);
4039 if (nfs_mount_gone(nmp)) {
4040 if (newreq) {
4041 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
4042 }
4043 return ENXIO;
4044 }
4045 lck_mtx_lock(&nmp->nm_lock);
4046 if ((nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD)) &&
4047 (nmp->nm_state & NFSSTA_TIMEO)) {
4048 lck_mtx_unlock(&nmp->nm_lock);
4049 mbuf_freem(nmrest->nmc_mhead);
4050 nmrest->nmc_mhead = NULL;
4051 if (newreq) {
4052 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
4053 }
4054 return ENXIO;
4055 }
4056
4057 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS)) {
4058 OSAddAtomic64(1, &nfsstats.rpccnt[procnum]);
4059 }
4060 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL)) {
4061 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
4062 }
4063
4064 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL);
4065 req->r_nmp = nmp;
4066 nmp->nm_ref++;
4067 req->r_np = np;
4068 req->r_thread = thd;
4069 if (!thd) {
4070 req->r_flags |= R_NOINTR;
4071 }
4072 if (IS_VALID_CRED(cred)) {
4073 kauth_cred_ref(cred);
4074 req->r_cred = cred;
4075 }
4076 req->r_procnum = procnum;
4077 if (proct[procnum] > 0) {
4078 req->r_flags |= R_TIMING;
4079 }
4080 req->r_nmrep.nmc_mhead = NULL;
4081 SLIST_INIT(&req->r_gss_seqlist);
4082 req->r_achain.tqe_next = NFSREQNOLIST;
4083 req->r_rchain.tqe_next = NFSREQNOLIST;
4084 req->r_cchain.tqe_next = NFSREQNOLIST;
4085
4086 /* set auth flavor to use for request */
4087 if (!req->r_cred) {
4088 req->r_auth = RPCAUTH_NONE;
4089 } else if (req->r_np && (req->r_np->n_auth != RPCAUTH_INVALID)) {
4090 req->r_auth = req->r_np->n_auth;
4091 } else {
4092 req->r_auth = nmp->nm_auth;
4093 }
4094
4095 lck_mtx_unlock(&nmp->nm_lock);
4096
4097 /* move the request mbuf chain to the nfsreq */
4098 req->r_mrest = nmrest->nmc_mhead;
4099 nmrest->nmc_mhead = NULL;
4100
4101 req->r_flags |= R_INITTED;
4102 req->r_refs = 1;
4103 if (newreq) {
4104 *reqp = req;
4105 }
4106 return 0;
4107 }
4108
4109 /*
4110 * Clean up and free an NFS request structure.
4111 */
4112 void
4113 nfs_request_destroy(struct nfsreq *req)
4114 {
4115 struct nfsmount *nmp;
4116 int clearjbtimeo = 0;
4117
4118 #if CONFIG_NFS_GSS
4119 struct gss_seq *gsp, *ngsp;
4120 #endif
4121
4122 if (!req || !(req->r_flags & R_INITTED)) {
4123 return;
4124 }
4125 nmp = req->r_nmp;
4126 req->r_flags &= ~R_INITTED;
4127 if (req->r_lflags & RL_QUEUED) {
4128 nfs_reqdequeue(req);
4129 }
4130
4131 if (req->r_achain.tqe_next != NFSREQNOLIST) {
4132 /*
4133 * Still on an async I/O queue?
4134 * %%% But which one, we may be on a local iod.
4135 */
4136 lck_mtx_lock(nfsiod_mutex);
4137 if (nmp && req->r_achain.tqe_next != NFSREQNOLIST) {
4138 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
4139 req->r_achain.tqe_next = NFSREQNOLIST;
4140 }
4141 lck_mtx_unlock(nfsiod_mutex);
4142 }
4143
4144 lck_mtx_lock(&req->r_mtx);
4145 if (nmp) {
4146 lck_mtx_lock(&nmp->nm_lock);
4147 if (req->r_flags & R_CWND) {
4148 /* Decrement the outstanding request count. */
4149 req->r_flags &= ~R_CWND;
4150 nmp->nm_sent -= NFS_CWNDSCALE;
4151 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
4152 /* congestion window is open, poke the cwnd queue */
4153 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
4154 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
4155 req2->r_cchain.tqe_next = NFSREQNOLIST;
4156 wakeup(req2);
4157 }
4158 }
4159 assert((req->r_flags & R_RESENDQ) == 0);
4160 /* XXX should we just remove this conditional, we should have a reference if we're resending */
4161 if (req->r_rchain.tqe_next != NFSREQNOLIST) {
4162 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
4163 req->r_rchain.tqe_next = NFSREQNOLIST;
4164 if (req->r_flags & R_RESENDQ) {
4165 req->r_flags &= ~R_RESENDQ;
4166 }
4167 }
4168 if (req->r_cchain.tqe_next != NFSREQNOLIST) {
4169 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
4170 req->r_cchain.tqe_next = NFSREQNOLIST;
4171 }
4172 if (req->r_flags & R_JBTPRINTFMSG) {
4173 req->r_flags &= ~R_JBTPRINTFMSG;
4174 nmp->nm_jbreqs--;
4175 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
4176 }
4177 lck_mtx_unlock(&nmp->nm_lock);
4178 }
4179 lck_mtx_unlock(&req->r_mtx);
4180
4181 if (clearjbtimeo) {
4182 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL);
4183 }
4184 if (req->r_mhead) {
4185 mbuf_freem(req->r_mhead);
4186 } else if (req->r_mrest) {
4187 mbuf_freem(req->r_mrest);
4188 }
4189 if (req->r_nmrep.nmc_mhead) {
4190 mbuf_freem(req->r_nmrep.nmc_mhead);
4191 }
4192 if (IS_VALID_CRED(req->r_cred)) {
4193 kauth_cred_unref(&req->r_cred);
4194 }
4195 #if CONFIG_NFS_GSS
4196 if (nfs_request_using_gss(req)) {
4197 nfs_gss_clnt_rpcdone(req);
4198 }
4199 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp)
4200 FREE(gsp, M_TEMP);
4201 if (req->r_gss_ctx) {
4202 nfs_gss_clnt_ctx_unref(req);
4203 }
4204 #endif /* CONFIG_NFS_GSS */
4205 if (req->r_wrongsec) {
4206 FREE(req->r_wrongsec, M_TEMP);
4207 }
4208 if (nmp) {
4209 nfs_mount_rele(nmp);
4210 }
4211 lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
4212 if (req->r_flags & R_ALLOCATED) {
4213 FREE_ZONE(req, sizeof(*req), M_NFSREQ);
4214 }
4215 }
4216
4217 void
4218 nfs_request_ref(struct nfsreq *req, int locked)
4219 {
4220 if (!locked) {
4221 lck_mtx_lock(&req->r_mtx);
4222 }
4223 if (req->r_refs <= 0) {
4224 panic("nfsreq reference error");
4225 }
4226 req->r_refs++;
4227 if (!locked) {
4228 lck_mtx_unlock(&req->r_mtx);
4229 }
4230 }
4231
4232 void
4233 nfs_request_rele(struct nfsreq *req)
4234 {
4235 int destroy;
4236
4237 lck_mtx_lock(&req->r_mtx);
4238 if (req->r_refs <= 0) {
4239 panic("nfsreq reference underflow");
4240 }
4241 req->r_refs--;
4242 destroy = (req->r_refs == 0);
4243 lck_mtx_unlock(&req->r_mtx);
4244 if (destroy) {
4245 nfs_request_destroy(req);
4246 }
4247 }
4248
4249
4250 /*
4251 * Add an (updated) RPC header with authorization to an NFS request.
4252 */
4253 int
4254 nfs_request_add_header(struct nfsreq *req)
4255 {
4256 struct nfsmount *nmp;
4257 int error = 0;
4258 mbuf_t m;
4259
4260 /* free up any previous header */
4261 if ((m = req->r_mhead)) {
4262 while (m && (m != req->r_mrest)) {
4263 m = mbuf_free(m);
4264 }
4265 req->r_mhead = NULL;
4266 }
4267
4268 nmp = req->r_nmp;
4269 if (nfs_mount_gone(nmp)) {
4270 return ENXIO;
4271 }
4272
4273 error = nfsm_rpchead(req, req->r_mrest, &req->r_xid, &req->r_mhead);
4274 if (error) {
4275 return error;
4276 }
4277
4278 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead);
4279 nmp = req->r_nmp;
4280 if (nfs_mount_gone(nmp)) {
4281 return ENXIO;
4282 }
4283 lck_mtx_lock(&nmp->nm_lock);
4284 if (NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) {
4285 req->r_retry = nmp->nm_retry;
4286 } else {
4287 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
4288 }
4289 lck_mtx_unlock(&nmp->nm_lock);
4290
4291 return error;
4292 }
4293
4294
4295 /*
4296 * Queue an NFS request up and send it out.
4297 */
4298 int
4299 nfs_request_send(struct nfsreq *req, int wait)
4300 {
4301 struct nfsmount *nmp;
4302 struct timeval now;
4303
4304 lck_mtx_lock(&req->r_mtx);
4305 req->r_flags |= R_SENDING;
4306 lck_mtx_unlock(&req->r_mtx);
4307
4308 lck_mtx_lock(nfs_request_mutex);
4309
4310 nmp = req->r_nmp;
4311 if (nfs_mount_gone(nmp)) {
4312 lck_mtx_unlock(nfs_request_mutex);
4313 return ENXIO;
4314 }
4315
4316 microuptime(&now);
4317 if (!req->r_start) {
4318 req->r_start = now.tv_sec;
4319 req->r_lastmsg = now.tv_sec -
4320 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
4321 }
4322
4323 OSAddAtomic64(1, &nfsstats.rpcrequests);
4324
4325 /*
4326 * Chain request into list of outstanding requests. Be sure
4327 * to put it LAST so timer finds oldest requests first.
4328 * Make sure that the request queue timer is running
4329 * to check for possible request timeout.
4330 */
4331 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain);
4332 req->r_lflags |= RL_QUEUED;
4333 if (!nfs_request_timer_on) {
4334 nfs_request_timer_on = 1;
4335 nfs_interval_timer_start(nfs_request_timer_call,
4336 NFS_REQUESTDELAY);
4337 }
4338 lck_mtx_unlock(nfs_request_mutex);
4339
4340 /* Send the request... */
4341 return nfs_send(req, wait);
4342 }
4343
4344 /*
4345 * Call nfs_wait_reply() to wait for the reply.
4346 */
4347 void
4348 nfs_request_wait(struct nfsreq *req)
4349 {
4350 req->r_error = nfs_wait_reply(req);
4351 }
4352
4353 /*
4354 * Finish up an NFS request by dequeueing it and
4355 * doing the initial NFS request reply processing.
4356 */
4357 int
4358 nfs_request_finish(
4359 struct nfsreq *req,
4360 struct nfsm_chain *nmrepp,
4361 int *status)
4362 {
4363 struct nfsmount *nmp;
4364 mbuf_t mrep;
4365 int verf_type = 0;
4366 uint32_t verf_len = 0;
4367 uint32_t reply_status = 0;
4368 uint32_t rejected_status = 0;
4369 uint32_t auth_status = 0;
4370 uint32_t accepted_status = 0;
4371 struct nfsm_chain nmrep;
4372 int error, clearjbtimeo;
4373
4374 error = req->r_error;
4375
4376 if (nmrepp) {
4377 nmrepp->nmc_mhead = NULL;
4378 }
4379
4380 /* RPC done, unlink the request. */
4381 nfs_reqdequeue(req);
4382
4383 mrep = req->r_nmrep.nmc_mhead;
4384
4385 nmp = req->r_nmp;
4386
4387 if ((req->r_flags & R_CWND) && nmp) {
4388 /*
4389 * Decrement the outstanding request count.
4390 */
4391 req->r_flags &= ~R_CWND;
4392 lck_mtx_lock(&nmp->nm_lock);
4393 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
4394 nmp->nm_sent -= NFS_CWNDSCALE;
4395 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
4396 /* congestion window is open, poke the cwnd queue */
4397 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
4398 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
4399 req2->r_cchain.tqe_next = NFSREQNOLIST;
4400 wakeup(req2);
4401 }
4402 lck_mtx_unlock(&nmp->nm_lock);
4403 }
4404
4405 #if CONFIG_NFS_GSS
4406 if (nfs_request_using_gss(req)) {
4407 /*
4408 * If the request used an RPCSEC_GSS credential
4409 * then reset its sequence number bit in the
4410 * request window.
4411 */
4412 nfs_gss_clnt_rpcdone(req);
4413
4414 /*
4415 * If we need to re-send, go back and re-build the
4416 * request based on a new sequence number.
4417 * Note that we're using the original XID.
4418 */
4419 if (error == EAGAIN) {
4420 req->r_error = 0;
4421 if (mrep) {
4422 mbuf_freem(mrep);
4423 }
4424 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs
4425 req->r_nmrep.nmc_mhead = NULL;
4426 req->r_flags |= R_RESTART;
4427 if (error == ENEEDAUTH) {
4428 req->r_xid = 0; // get a new XID
4429 error = 0;
4430 }
4431 goto nfsmout;
4432 }
4433 }
4434 #endif /* CONFIG_NFS_GSS */
4435
4436 /*
4437 * If there was a successful reply, make sure to mark the mount as up.
4438 * If a tprintf message was given (or if this is a timed-out soft mount)
4439 * then post a tprintf message indicating the server is alive again.
4440 */
4441 if (!error) {
4442 if ((req->r_flags & R_TPRINTFMSG) ||
4443 (nmp && (NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) &&
4444 ((nmp->nm_state & (NFSSTA_TIMEO | NFSSTA_FORCE | NFSSTA_DEAD)) == NFSSTA_TIMEO))) {
4445 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again");
4446 } else {
4447 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL);
4448 }
4449 }
4450 if (!error && !nmp) {
4451 error = ENXIO;
4452 }
4453 nfsmout_if(error);
4454
4455 /*
4456 * break down the RPC header and check if ok
4457 */
4458 nmrep = req->r_nmrep;
4459 nfsm_chain_get_32(error, &nmrep, reply_status);
4460 nfsmout_if(error);
4461 if (reply_status == RPC_MSGDENIED) {
4462 nfsm_chain_get_32(error, &nmrep, rejected_status);
4463 nfsmout_if(error);
4464 if (rejected_status == RPC_MISMATCH) {
4465 error = ENOTSUP;
4466 goto nfsmout;
4467 }
4468 nfsm_chain_get_32(error, &nmrep, auth_status);
4469 nfsmout_if(error);
4470 switch (auth_status) {
4471 #if CONFIG_NFS_GSS
4472 case RPCSEC_GSS_CREDPROBLEM:
4473 case RPCSEC_GSS_CTXPROBLEM:
4474 /*
4475 * An RPCSEC_GSS cred or context problem.
4476 * We can't use it anymore.
4477 * Restore the args, renew the context
4478 * and set up for a resend.
4479 */
4480 error = nfs_gss_clnt_args_restore(req);
4481 if (error && error != ENEEDAUTH) {
4482 break;
4483 }
4484
4485 if (!error) {
4486 error = nfs_gss_clnt_ctx_renew(req);
4487 if (error) {
4488 break;
4489 }
4490 }
4491 mbuf_freem(mrep);
4492 req->r_nmrep.nmc_mhead = NULL;
4493 req->r_xid = 0; // get a new XID
4494 req->r_flags |= R_RESTART;
4495 goto nfsmout;
4496 #endif /* CONFIG_NFS_GSS */
4497 default:
4498 error = EACCES;
4499 break;
4500 }
4501 goto nfsmout;
4502 }
4503
4504 /* Now check the verifier */
4505 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor
4506 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length
4507 nfsmout_if(error);
4508
4509 switch (req->r_auth) {
4510 case RPCAUTH_NONE:
4511 case RPCAUTH_SYS:
4512 /* Any AUTH_SYS verifier is ignored */
4513 if (verf_len > 0) {
4514 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
4515 }
4516 nfsm_chain_get_32(error, &nmrep, accepted_status);
4517 break;
4518 #if CONFIG_NFS_GSS
4519 case RPCAUTH_KRB5:
4520 case RPCAUTH_KRB5I:
4521 case RPCAUTH_KRB5P:
4522 error = nfs_gss_clnt_verf_get(req, &nmrep,
4523 verf_type, verf_len, &accepted_status);
4524 break;
4525 #endif /* CONFIG_NFS_GSS */
4526 }
4527 nfsmout_if(error);
4528
4529 switch (accepted_status) {
4530 case RPC_SUCCESS:
4531 if (req->r_procnum == NFSPROC_NULL) {
4532 /*
4533 * The NFS null procedure is unique,
4534 * in not returning an NFS status.
4535 */
4536 *status = NFS_OK;
4537 } else {
4538 nfsm_chain_get_32(error, &nmrep, *status);
4539 nfsmout_if(error);
4540 }
4541
4542 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) {
4543 /*
4544 * It's a JUKEBOX error - delay and try again
4545 */
4546 int delay, slpflag = (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
4547
4548 mbuf_freem(mrep);
4549 req->r_nmrep.nmc_mhead = NULL;
4550 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) {
4551 /* we're not yet completely mounted and */
4552 /* we can't complete an RPC, so we fail */
4553 OSAddAtomic64(1, &nfsstats.rpctimeouts);
4554 nfs_softterm(req);
4555 error = req->r_error;
4556 goto nfsmout;
4557 }
4558 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2);
4559 if (req->r_delay > 30) {
4560 req->r_delay = 30;
4561 }
4562 if (nmp->nm_tprintf_initial_delay && (req->r_delay >= nmp->nm_tprintf_initial_delay)) {
4563 if (!(req->r_flags & R_JBTPRINTFMSG)) {
4564 req->r_flags |= R_JBTPRINTFMSG;
4565 lck_mtx_lock(&nmp->nm_lock);
4566 nmp->nm_jbreqs++;
4567 lck_mtx_unlock(&nmp->nm_lock);
4568 }
4569 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO,
4570 "resource temporarily unavailable (jukebox)", 0);
4571 }
4572 if ((NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && (req->r_delay == 30) &&
4573 !(req->r_flags & R_NOINTR)) {
4574 /* for soft mounts, just give up after a short while */
4575 OSAddAtomic64(1, &nfsstats.rpctimeouts);
4576 nfs_softterm(req);
4577 error = req->r_error;
4578 goto nfsmout;
4579 }
4580 delay = req->r_delay;
4581 if (req->r_callback.rcb_func) {
4582 struct timeval now;
4583 microuptime(&now);
4584 req->r_resendtime = now.tv_sec + delay;
4585 } else {
4586 do {
4587 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) {
4588 goto nfsmout;
4589 }
4590 tsleep(nfs_request_finish, PSOCK | slpflag, "nfs_jukebox_trylater", hz);
4591 slpflag = 0;
4592 } while (--delay > 0);
4593 }
4594 req->r_xid = 0; // get a new XID
4595 req->r_flags |= R_RESTART;
4596 req->r_start = 0;
4597 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER);
4598 return 0;
4599 }
4600
4601 if (req->r_flags & R_JBTPRINTFMSG) {
4602 req->r_flags &= ~R_JBTPRINTFMSG;
4603 lck_mtx_lock(&nmp->nm_lock);
4604 nmp->nm_jbreqs--;
4605 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
4606 lck_mtx_unlock(&nmp->nm_lock);
4607 nfs_up(nmp, req->r_thread, clearjbtimeo, "resource available again");
4608 }
4609
4610 #if CONFIG_NFS4
4611 if ((nmp->nm_vers >= NFS_VER4) && (*status == NFSERR_WRONGSEC)) {
4612 /*
4613 * Hmmm... we need to try a different security flavor.
4614 * The first time a request hits this, we will allocate an array
4615 * to track flavors to try. We fill the array with the mount's
4616 * preferred flavors or the server's preferred flavors or just the
4617 * flavors we support.
4618 */
4619 uint32_t srvflavors[NX_MAX_SEC_FLAVORS];
4620 int srvcount, i, j;
4621
4622 /* Call SECINFO to try to get list of flavors from server. */
4623 srvcount = NX_MAX_SEC_FLAVORS;
4624 nfs4_secinfo_rpc(nmp, &req->r_secinfo, req->r_cred, srvflavors, &srvcount);
4625
4626 if (!req->r_wrongsec) {
4627 /* first time... set up flavor array */
4628 MALLOC(req->r_wrongsec, uint32_t*, NX_MAX_SEC_FLAVORS * sizeof(uint32_t), M_TEMP, M_WAITOK);
4629 if (!req->r_wrongsec) {
4630 error = EACCES;
4631 goto nfsmout;
4632 }
4633 i = 0;
4634 if (nmp->nm_sec.count) { /* use the mount's preferred list of flavors */
4635 for (; i < nmp->nm_sec.count; i++) {
4636 req->r_wrongsec[i] = nmp->nm_sec.flavors[i];
4637 }
4638 } else if (srvcount) { /* otherwise use the server's list of flavors */
4639 for (; i < srvcount; i++) {
4640 req->r_wrongsec[i] = srvflavors[i];
4641 }
4642 } else { /* otherwise, just try the flavors we support. */
4643 req->r_wrongsec[i++] = RPCAUTH_KRB5P;
4644 req->r_wrongsec[i++] = RPCAUTH_KRB5I;
4645 req->r_wrongsec[i++] = RPCAUTH_KRB5;
4646 req->r_wrongsec[i++] = RPCAUTH_SYS;
4647 req->r_wrongsec[i++] = RPCAUTH_NONE;
4648 }
4649 for (; i < NX_MAX_SEC_FLAVORS; i++) { /* invalidate any remaining slots */
4650 req->r_wrongsec[i] = RPCAUTH_INVALID;
4651 }
4652 }
4653
4654 /* clear the current flavor from the list */
4655 for (i = 0; i < NX_MAX_SEC_FLAVORS; i++) {
4656 if (req->r_wrongsec[i] == req->r_auth) {
4657 req->r_wrongsec[i] = RPCAUTH_INVALID;
4658 }
4659 }
4660
4661 /* find the next flavor to try */
4662 for (i = 0; i < NX_MAX_SEC_FLAVORS; i++) {
4663 if (req->r_wrongsec[i] != RPCAUTH_INVALID) {
4664 if (!srvcount) { /* no server list, just try it */
4665 break;
4666 }
4667 /* check that it's in the server's list */
4668 for (j = 0; j < srvcount; j++) {
4669 if (req->r_wrongsec[i] == srvflavors[j]) {
4670 break;
4671 }
4672 }
4673 if (j < srvcount) { /* found */
4674 break;
4675 }
4676 /* not found in server list */
4677 req->r_wrongsec[i] = RPCAUTH_INVALID;
4678 }
4679 }
4680 if (i == NX_MAX_SEC_FLAVORS) {
4681 /* nothing left to try! */
4682 error = EACCES;
4683 goto nfsmout;
4684 }
4685
4686 /* retry with the next auth flavor */
4687 req->r_auth = req->r_wrongsec[i];
4688 req->r_xid = 0; // get a new XID
4689 req->r_flags |= R_RESTART;
4690 req->r_start = 0;
4691 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_WRONGSEC);
4692 return 0;
4693 }
4694 if ((nmp->nm_vers >= NFS_VER4) && req->r_wrongsec) {
4695 /*
4696 * We renegotiated security for this request; so update the
4697 * default security flavor for the associated node.
4698 */
4699 if (req->r_np) {
4700 req->r_np->n_auth = req->r_auth;
4701 }
4702 }
4703 #endif /* CONFIG_NFS4 */
4704 if (*status == NFS_OK) {
4705 /*
4706 * Successful NFS request
4707 */
4708 *nmrepp = nmrep;
4709 req->r_nmrep.nmc_mhead = NULL;
4710 break;
4711 }
4712 /* Got an NFS error of some kind */
4713
4714 /*
4715 * If the File Handle was stale, invalidate the
4716 * lookup cache, just in case.
4717 */
4718 if ((*status == ESTALE) && req->r_np) {
4719 cache_purge(NFSTOV(req->r_np));
4720 /* if monitored, also send delete event */
4721 if (vnode_ismonitored(NFSTOV(req->r_np))) {
4722 nfs_vnode_notify(req->r_np, (VNODE_EVENT_ATTRIB | VNODE_EVENT_DELETE));
4723 }
4724 }
4725 if (nmp->nm_vers == NFS_VER2) {
4726 mbuf_freem(mrep);
4727 } else {
4728 *nmrepp = nmrep;
4729 }
4730 req->r_nmrep.nmc_mhead = NULL;
4731 error = 0;
4732 break;
4733 case RPC_PROGUNAVAIL:
4734 error = EPROGUNAVAIL;
4735 break;
4736 case RPC_PROGMISMATCH:
4737 error = ERPCMISMATCH;
4738 break;
4739 case RPC_PROCUNAVAIL:
4740 error = EPROCUNAVAIL;
4741 break;
4742 case RPC_GARBAGE:
4743 error = EBADRPC;
4744 break;
4745 case RPC_SYSTEM_ERR:
4746 default:
4747 error = EIO;
4748 break;
4749 }
4750 nfsmout:
4751 if (req->r_flags & R_JBTPRINTFMSG) {
4752 req->r_flags &= ~R_JBTPRINTFMSG;
4753 lck_mtx_lock(&nmp->nm_lock);
4754 nmp->nm_jbreqs--;
4755 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
4756 lck_mtx_unlock(&nmp->nm_lock);
4757 if (clearjbtimeo) {
4758 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL);
4759 }
4760 }
4761 FSDBG(273, R_XID32(req->r_xid), nmp, req,
4762 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error);
4763 return error;
4764 }
4765
4766 /*
4767 * NFS request using a GSS/Kerberos security flavor?
4768 */
4769 int
4770 nfs_request_using_gss(struct nfsreq *req)
4771 {
4772 if (!req->r_gss_ctx) {
4773 return 0;
4774 }
4775 switch (req->r_auth) {
4776 case RPCAUTH_KRB5:
4777 case RPCAUTH_KRB5I:
4778 case RPCAUTH_KRB5P:
4779 return 1;
4780 }
4781 return 0;
4782 }
4783
4784 /*
4785 * Perform an NFS request synchronously.
4786 */
4787
4788 int
4789 nfs_request(
4790 nfsnode_t np,
4791 mount_t mp, /* used only if !np */
4792 struct nfsm_chain *nmrest,
4793 int procnum,
4794 vfs_context_t ctx,
4795 struct nfsreq_secinfo_args *si,
4796 struct nfsm_chain *nmrepp,
4797 u_int64_t *xidp,
4798 int *status)
4799 {
4800 return nfs_request2(np, mp, nmrest, procnum,
4801 vfs_context_thread(ctx), vfs_context_ucred(ctx),
4802 si, 0, nmrepp, xidp, status);
4803 }
4804
4805 int
4806 nfs_request2(
4807 nfsnode_t np,
4808 mount_t mp, /* used only if !np */
4809 struct nfsm_chain *nmrest,
4810 int procnum,
4811 thread_t thd,
4812 kauth_cred_t cred,
4813 struct nfsreq_secinfo_args *si,
4814 int flags,
4815 struct nfsm_chain *nmrepp,
4816 u_int64_t *xidp,
4817 int *status)
4818 {
4819 struct nfsreq rq, *req = &rq;
4820 int error;
4821
4822 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req))) {
4823 return error;
4824 }
4825 req->r_flags |= (flags & (R_OPTMASK | R_SOFT));
4826 if (si) {
4827 req->r_secinfo = *si;
4828 }
4829
4830 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0);
4831 do {
4832 req->r_error = 0;
4833 req->r_flags &= ~R_RESTART;
4834 if ((error = nfs_request_add_header(req))) {
4835 break;
4836 }
4837 if (xidp) {
4838 *xidp = req->r_xid;
4839 }
4840 if ((error = nfs_request_send(req, 1))) {
4841 break;
4842 }
4843 nfs_request_wait(req);
4844 if ((error = nfs_request_finish(req, nmrepp, status))) {
4845 break;
4846 }
4847 } while (req->r_flags & R_RESTART);
4848
4849 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error);
4850 nfs_request_rele(req);
4851 return error;
4852 }
4853
4854
4855 #if CONFIG_NFS_GSS
4856 /*
4857 * Set up a new null proc request to exchange GSS context tokens with the
4858 * server. Associate the context that we are setting up with the request that we
4859 * are sending.
4860 */
4861
4862 int
4863 nfs_request_gss(
4864 mount_t mp,
4865 struct nfsm_chain *nmrest,
4866 thread_t thd,
4867 kauth_cred_t cred,
4868 int flags,
4869 struct nfs_gss_clnt_ctx *cp, /* Set to gss context to renew or setup */
4870 struct nfsm_chain *nmrepp,
4871 int *status)
4872 {
4873 struct nfsreq rq, *req = &rq;
4874 int error, wait = 1;
4875
4876 if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req))) {
4877 return error;
4878 }
4879 req->r_flags |= (flags & R_OPTMASK);
4880
4881 if (cp == NULL) {
4882 printf("nfs_request_gss request has no context\n");
4883 nfs_request_rele(req);
4884 return NFSERR_EAUTH;
4885 }
4886 nfs_gss_clnt_ctx_ref(req, cp);
4887
4888 /*
4889 * Don't wait for a reply to a context destroy advisory
4890 * to avoid hanging on a dead server.
4891 */
4892 if (cp->gss_clnt_proc == RPCSEC_GSS_DESTROY) {
4893 wait = 0;
4894 }
4895
4896 FSDBG_TOP(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, 0);
4897 do {
4898 req->r_error = 0;
4899 req->r_flags &= ~R_RESTART;
4900 if ((error = nfs_request_add_header(req))) {
4901 break;
4902 }
4903
4904 if ((error = nfs_request_send(req, wait))) {
4905 break;
4906 }
4907 if (!wait) {
4908 break;
4909 }
4910
4911 nfs_request_wait(req);
4912 if ((error = nfs_request_finish(req, nmrepp, status))) {
4913 break;
4914 }
4915 } while (req->r_flags & R_RESTART);
4916
4917 FSDBG_BOT(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, error);
4918
4919 nfs_gss_clnt_ctx_unref(req);
4920 nfs_request_rele(req);
4921
4922 return error;
4923 }
4924 #endif /* CONFIG_NFS_GSS */
4925
4926 /*
4927 * Create and start an asynchronous NFS request.
4928 */
4929 int
4930 nfs_request_async(
4931 nfsnode_t np,
4932 mount_t mp, /* used only if !np */
4933 struct nfsm_chain *nmrest,
4934 int procnum,
4935 thread_t thd,
4936 kauth_cred_t cred,
4937 struct nfsreq_secinfo_args *si,
4938 int flags,
4939 struct nfsreq_cbinfo *cb,
4940 struct nfsreq **reqp)
4941 {
4942 struct nfsreq *req;
4943 struct nfsmount *nmp;
4944 int error, sent;
4945
4946 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp);
4947 req = *reqp;
4948 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error);
4949 if (error) {
4950 return error;
4951 }
4952 req->r_flags |= (flags & R_OPTMASK);
4953 req->r_flags |= R_ASYNC;
4954 if (si) {
4955 req->r_secinfo = *si;
4956 }
4957 if (cb) {
4958 req->r_callback = *cb;
4959 }
4960 error = nfs_request_add_header(req);
4961 if (!error) {
4962 req->r_flags |= R_WAITSENT;
4963 if (req->r_callback.rcb_func) {
4964 nfs_request_ref(req, 0);
4965 }
4966 error = nfs_request_send(req, 1);
4967 lck_mtx_lock(&req->r_mtx);
4968 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) {
4969 /* make sure to wait until this async I/O request gets sent */
4970 int slpflag = (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
4971 struct timespec ts = { .tv_sec = 2, .tv_nsec = 0 };
4972 while (!(req->r_flags & R_SENT)) {
4973 nmp = req->r_nmp;
4974 if ((req->r_flags & R_RESENDQ) && !nfs_mount_gone(nmp)) {
4975 lck_mtx_lock(&nmp->nm_lock);
4976 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) {
4977 /*
4978 * It's not going to get off the resend queue if we're in recovery.
4979 * So, just take it off ourselves. We could be holding mount state
4980 * busy and thus holding up the start of recovery.
4981 */
4982 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
4983 req->r_rchain.tqe_next = NFSREQNOLIST;
4984 if (req->r_flags & R_RESENDQ) {
4985 req->r_flags &= ~R_RESENDQ;
4986 }
4987 lck_mtx_unlock(&nmp->nm_lock);
4988 req->r_flags |= R_SENDING;
4989 lck_mtx_unlock(&req->r_mtx);
4990 error = nfs_send(req, 1);
4991 /* Remove the R_RESENDQ reference */
4992 nfs_request_rele(req);
4993 lck_mtx_lock(&req->r_mtx);
4994 if (error) {
4995 break;
4996 }
4997 continue;
4998 }
4999 lck_mtx_unlock(&nmp->nm_lock);
5000 }
5001 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) {
5002 break;
5003 }
5004 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts);
5005 slpflag = 0;
5006 }
5007 }
5008 sent = req->r_flags & R_SENT;
5009 lck_mtx_unlock(&req->r_mtx);
5010 if (error && req->r_callback.rcb_func && !sent) {
5011 nfs_request_rele(req);
5012 }
5013 }
5014 FSDBG(274, R_XID32(req->r_xid), np, procnum, error);
5015 if (error || req->r_callback.rcb_func) {
5016 nfs_request_rele(req);
5017 }
5018
5019 return error;
5020 }
5021
5022 /*
5023 * Wait for and finish an asynchronous NFS request.
5024 */
5025 int
5026 nfs_request_async_finish(
5027 struct nfsreq *req,
5028 struct nfsm_chain *nmrepp,
5029 u_int64_t *xidp,
5030 int *status)
5031 {
5032 int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0;
5033 struct nfsmount *nmp;
5034
5035 lck_mtx_lock(&req->r_mtx);
5036 if (!asyncio) {
5037 req->r_flags |= R_ASYNCWAIT;
5038 }
5039 while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */
5040 struct timespec ts = { .tv_sec = 2, .tv_nsec = 0 };
5041
5042 if ((nmp = req->r_nmp)) {
5043 lck_mtx_lock(&nmp->nm_lock);
5044 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) {
5045 /*
5046 * It's not going to get off the resend queue if we're in recovery.
5047 * So, just take it off ourselves. We could be holding mount state
5048 * busy and thus holding up the start of recovery.
5049 */
5050 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
5051 req->r_rchain.tqe_next = NFSREQNOLIST;
5052 if (req->r_flags & R_RESENDQ) {
5053 req->r_flags &= ~R_RESENDQ;
5054 }
5055 /* Remove the R_RESENDQ reference */
5056 assert(req->r_refs > 0);
5057 req->r_refs--;
5058 lck_mtx_unlock(&nmp->nm_lock);
5059 break;
5060 }
5061 lck_mtx_unlock(&nmp->nm_lock);
5062 }
5063 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) {
5064 break;
5065 }
5066 msleep(req, &req->r_mtx, PZERO - 1, "nfsresendqwait", &ts);
5067 }
5068 lck_mtx_unlock(&req->r_mtx);
5069
5070 if (!error) {
5071 nfs_request_wait(req);
5072 error = nfs_request_finish(req, nmrepp, status);
5073 }
5074
5075 while (!error && (req->r_flags & R_RESTART)) {
5076 if (asyncio) {
5077 assert(req->r_achain.tqe_next == NFSREQNOLIST);
5078 lck_mtx_lock(&req->r_mtx);
5079 req->r_flags &= ~R_IOD;
5080 if (req->r_resendtime) { /* send later */
5081 nfs_asyncio_resend(req);
5082 lck_mtx_unlock(&req->r_mtx);
5083 return EINPROGRESS;
5084 }
5085 lck_mtx_unlock(&req->r_mtx);
5086 }
5087 req->r_error = 0;
5088 req->r_flags &= ~R_RESTART;
5089 if ((error = nfs_request_add_header(req))) {
5090 break;
5091 }
5092 if ((error = nfs_request_send(req, !asyncio))) {
5093 break;
5094 }
5095 if (asyncio) {
5096 return EINPROGRESS;
5097 }
5098 nfs_request_wait(req);
5099 if ((error = nfs_request_finish(req, nmrepp, status))) {
5100 break;
5101 }
5102 }
5103 if (xidp) {
5104 *xidp = req->r_xid;
5105 }
5106
5107 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error);
5108 nfs_request_rele(req);
5109 return error;
5110 }
5111
5112 /*
5113 * Cancel a pending asynchronous NFS request.
5114 */
5115 void
5116 nfs_request_async_cancel(struct nfsreq *req)
5117 {
5118 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E);
5119 nfs_request_rele(req);
5120 }
5121
5122 /*
5123 * Flag a request as being terminated.
5124 */
5125 void
5126 nfs_softterm(struct nfsreq *req)
5127 {
5128 struct nfsmount *nmp = req->r_nmp;
5129 req->r_flags |= R_SOFTTERM;
5130 req->r_error = ETIMEDOUT;
5131 if (!(req->r_flags & R_CWND) || nfs_mount_gone(nmp)) {
5132 return;
5133 }
5134 /* update congestion window */
5135 req->r_flags &= ~R_CWND;
5136 lck_mtx_lock(&nmp->nm_lock);
5137 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
5138 nmp->nm_sent -= NFS_CWNDSCALE;
5139 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
5140 /* congestion window is open, poke the cwnd queue */
5141 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
5142 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
5143 req2->r_cchain.tqe_next = NFSREQNOLIST;
5144 wakeup(req2);
5145 }
5146 lck_mtx_unlock(&nmp->nm_lock);
5147 }
5148
5149 /*
5150 * Ensure req isn't in use by the timer, then dequeue it.
5151 */
5152 void
5153 nfs_reqdequeue(struct nfsreq *req)
5154 {
5155 lck_mtx_lock(nfs_request_mutex);
5156 while (req->r_lflags & RL_BUSY) {
5157 req->r_lflags |= RL_WAITING;
5158 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL);
5159 }
5160 if (req->r_lflags & RL_QUEUED) {
5161 TAILQ_REMOVE(&nfs_reqq, req, r_chain);
5162 req->r_lflags &= ~RL_QUEUED;
5163 }
5164 lck_mtx_unlock(nfs_request_mutex);
5165 }
5166
5167 /*
5168 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
5169 * free()'d out from under it.
5170 */
5171 void
5172 nfs_reqbusy(struct nfsreq *req)
5173 {
5174 if (req->r_lflags & RL_BUSY) {
5175 panic("req locked");
5176 }
5177 req->r_lflags |= RL_BUSY;
5178 }
5179
5180 /*
5181 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
5182 */
5183 struct nfsreq *
5184 nfs_reqnext(struct nfsreq *req)
5185 {
5186 struct nfsreq * nextreq;
5187
5188 if (req == NULL) {
5189 return NULL;
5190 }
5191 /*
5192 * We need to get and busy the next req before signalling the
5193 * current one, otherwise wakeup() may block us and we'll race to
5194 * grab the next req.
5195 */
5196 nextreq = TAILQ_NEXT(req, r_chain);
5197 if (nextreq != NULL) {
5198 nfs_reqbusy(nextreq);
5199 }
5200 /* unbusy and signal. */
5201 req->r_lflags &= ~RL_BUSY;
5202 if (req->r_lflags & RL_WAITING) {
5203 req->r_lflags &= ~RL_WAITING;
5204 wakeup(&req->r_lflags);
5205 }
5206 return nextreq;
5207 }
5208
5209 /*
5210 * NFS request queue timer routine
5211 *
5212 * Scan the NFS request queue for any requests that have timed out.
5213 *
5214 * Alert the system of unresponsive servers.
5215 * Mark expired requests on soft mounts as terminated.
5216 * For UDP, mark/signal requests for retransmission.
5217 */
5218 void
5219 nfs_request_timer(__unused void *param0, __unused void *param1)
5220 {
5221 struct nfsreq *req;
5222 struct nfsmount *nmp;
5223 int timeo, maxtime, finish_asyncio, error;
5224 struct timeval now;
5225 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue;
5226 TAILQ_INIT(&nfs_mount_poke_queue);
5227
5228 restart:
5229 lck_mtx_lock(nfs_request_mutex);
5230 req = TAILQ_FIRST(&nfs_reqq);
5231 if (req == NULL) { /* no requests - turn timer off */
5232 nfs_request_timer_on = 0;
5233 lck_mtx_unlock(nfs_request_mutex);
5234 return;
5235 }
5236
5237 nfs_reqbusy(req);
5238
5239 microuptime(&now);
5240 for (; req != NULL; req = nfs_reqnext(req)) {
5241 nmp = req->r_nmp;
5242 if (nmp == NULL) {
5243 NFS_SOCK_DBG("Found a request with out a mount!\n");
5244 continue;
5245 }
5246 if (req->r_error || req->r_nmrep.nmc_mhead) {
5247 continue;
5248 }
5249 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) {
5250 if (req->r_callback.rcb_func != NULL) {
5251 /* async I/O RPC needs to be finished */
5252 lck_mtx_lock(&req->r_mtx);
5253 req->r_error = error;
5254 finish_asyncio = !(req->r_flags & R_WAITSENT);
5255 wakeup(req);
5256 lck_mtx_unlock(&req->r_mtx);
5257 if (finish_asyncio) {
5258 nfs_asyncio_finish(req);
5259 }
5260 }
5261 continue;
5262 }
5263
5264 lck_mtx_lock(&req->r_mtx);
5265
5266 if (nmp->nm_tprintf_initial_delay &&
5267 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) &&
5268 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
5269 req->r_lastmsg = now.tv_sec;
5270 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
5271 "not responding", 1);
5272 req->r_flags |= R_TPRINTFMSG;
5273 lck_mtx_lock(&nmp->nm_lock);
5274 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
5275 lck_mtx_unlock(&nmp->nm_lock);
5276 /* we're not yet completely mounted and */
5277 /* we can't complete an RPC, so we fail */
5278 OSAddAtomic64(1, &nfsstats.rpctimeouts);
5279 nfs_softterm(req);
5280 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
5281 wakeup(req);
5282 lck_mtx_unlock(&req->r_mtx);
5283 if (finish_asyncio) {
5284 nfs_asyncio_finish(req);
5285 }
5286 continue;
5287 }
5288 lck_mtx_unlock(&nmp->nm_lock);
5289 }
5290
5291 /*
5292 * Put a reasonable limit on the maximum timeout,
5293 * and reduce that limit when soft mounts get timeouts or are in reconnect.
5294 */
5295 if (!(NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && !nfs_can_squish(nmp)) {
5296 maxtime = NFS_MAXTIMEO;
5297 } else if ((req->r_flags & (R_SETUP | R_RECOVER)) ||
5298 ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8))) {
5299 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts + 1)) / 2;
5300 } else {
5301 maxtime = NFS_MINTIMEO / 4;
5302 }
5303
5304 /*
5305 * Check for request timeout.
5306 */
5307 if (req->r_rtt >= 0) {
5308 req->r_rtt++;
5309 lck_mtx_lock(&nmp->nm_lock);
5310 if (req->r_flags & R_RESENDERR) {
5311 /* with resend errors, retry every few seconds */
5312 timeo = 4 * hz;
5313 } else {
5314 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL) {
5315 timeo = NFS_MINIDEMTIMEO; // gss context setup
5316 } else if (NMFLAG(nmp, DUMBTIMER)) {
5317 timeo = nmp->nm_timeo;
5318 } else {
5319 timeo = NFS_RTO(nmp, proct[req->r_procnum]);
5320 }
5321
5322 /* ensure 62.5 ms floor */
5323 while (16 * timeo < hz) {
5324 timeo *= 2;
5325 }
5326 if (nmp->nm_timeouts > 0) {
5327 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
5328 }
5329 }
5330 /* limit timeout to max */
5331 if (timeo > maxtime) {
5332 timeo = maxtime;
5333 }
5334 if (req->r_rtt <= timeo) {
5335 NFS_SOCK_DBG("nfs timeout: req time %d and timeo is %d continue\n", req->r_rtt, timeo);
5336 lck_mtx_unlock(&nmp->nm_lock);
5337 lck_mtx_unlock(&req->r_mtx);
5338 continue;
5339 }
5340 /* The request has timed out */
5341 NFS_SOCK_DBG("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
5342 req->r_procnum, proct[req->r_procnum],
5343 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts,
5344 (now.tv_sec - req->r_start) * NFS_HZ, maxtime);
5345 if (nmp->nm_timeouts < 8) {
5346 nmp->nm_timeouts++;
5347 }
5348 if (nfs_mount_check_dead_timeout(nmp)) {
5349 /* Unbusy this request */
5350 req->r_lflags &= ~RL_BUSY;
5351 if (req->r_lflags & RL_WAITING) {
5352 req->r_lflags &= ~RL_WAITING;
5353 wakeup(&req->r_lflags);
5354 }
5355 lck_mtx_unlock(&req->r_mtx);
5356
5357 /* No need to poke this mount */
5358 if (nmp->nm_sockflags & NMSOCK_POKE) {
5359 nmp->nm_sockflags &= ~NMSOCK_POKE;
5360 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
5361 }
5362 /* Release our lock state, so we can become a zombie */
5363 lck_mtx_unlock(nfs_request_mutex);
5364
5365 /*
5366 * Note nfs_mount_make zombie(nmp) must be
5367 * called with nm_lock held. After doing some
5368 * work we release nm_lock in
5369 * nfs_make_mount_zombie with out acquiring any
5370 * other locks. (Later, in nfs_mount_zombie we
5371 * will acquire nfs_request_mutex, r_mtx,
5372 * nm_lock in that order). So we should not be
5373 * introducing deadlock here. We take a reference
5374 * on the mount so that its still there when we
5375 * release the lock.
5376 */
5377 nmp->nm_ref++;
5378 nfs_mount_make_zombie(nmp);
5379 lck_mtx_unlock(&nmp->nm_lock);
5380 nfs_mount_rele(nmp);
5381
5382 /*
5383 * All the request for this mount have now been
5384 * removed from the request queue. Restart to
5385 * process the remaining mounts
5386 */
5387 goto restart;
5388 }
5389
5390 /* if it's been a few seconds, try poking the socket */
5391 if ((nmp->nm_sotype == SOCK_STREAM) &&
5392 ((now.tv_sec - req->r_start) >= 3) &&
5393 !(nmp->nm_sockflags & (NMSOCK_POKE | NMSOCK_UNMOUNT)) &&
5394 (nmp->nm_sockflags & NMSOCK_READY)) {
5395 nmp->nm_sockflags |= NMSOCK_POKE;
5396 /*
5397 * We take a ref on the mount so that we know the mount will still be there
5398 * when we process the nfs_mount_poke_queue. An unmount request will block
5399 * in nfs_mount_drain_and_cleanup until after the poke is finished. We release
5400 * the reference after calling nfs_sock_poke below;
5401 */
5402 nmp->nm_ref++;
5403 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq);
5404 }
5405 lck_mtx_unlock(&nmp->nm_lock);
5406 }
5407
5408 /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */
5409 if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP | R_RECOVER | R_SOFT))) &&
5410 ((req->r_rexmit >= req->r_retry) || /* too many */
5411 ((now.tv_sec - req->r_start) * NFS_HZ > maxtime))) { /* too long */
5412 OSAddAtomic64(1, &nfsstats.rpctimeouts);
5413 lck_mtx_lock(&nmp->nm_lock);
5414 if (!(nmp->nm_state & NFSSTA_TIMEO)) {
5415 lck_mtx_unlock(&nmp->nm_lock);
5416 /* make sure we note the unresponsive server */
5417 /* (maxtime may be less than tprintf delay) */
5418 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
5419 "not responding", 1);
5420 req->r_lastmsg = now.tv_sec;
5421 req->r_flags |= R_TPRINTFMSG;
5422 } else {
5423 lck_mtx_unlock(&nmp->nm_lock);
5424 }
5425 if (req->r_flags & R_NOINTR) {
5426 /* don't terminate nointr requests on timeout */
5427 lck_mtx_unlock(&req->r_mtx);
5428 continue;
5429 }
5430 NFS_SOCK_DBG("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
5431 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt,
5432 now.tv_sec - req->r_start);
5433 nfs_softterm(req);
5434 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
5435 wakeup(req);
5436 lck_mtx_unlock(&req->r_mtx);
5437 if (finish_asyncio) {
5438 nfs_asyncio_finish(req);
5439 }
5440 continue;
5441 }
5442
5443 /* for TCP, only resend if explicitly requested */
5444 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) {
5445 if (++req->r_rexmit > NFS_MAXREXMIT) {
5446 req->r_rexmit = NFS_MAXREXMIT;
5447 }
5448 req->r_rtt = 0;
5449 lck_mtx_unlock(&req->r_mtx);
5450 continue;
5451 }
5452
5453 /*
5454 * The request needs to be (re)sent. Kick the requester to resend it.
5455 * (unless it's already marked as needing a resend)
5456 */
5457 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) {
5458 lck_mtx_unlock(&req->r_mtx);
5459 continue;
5460 }
5461 NFS_SOCK_DBG("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
5462 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt);
5463 req->r_flags |= R_MUSTRESEND;
5464 req->r_rtt = -1;
5465 wakeup(req);
5466 if ((req->r_flags & (R_IOD | R_ASYNC | R_ASYNCWAIT | R_SENDING)) == R_ASYNC) {
5467 nfs_asyncio_resend(req);
5468 }
5469 lck_mtx_unlock(&req->r_mtx);
5470 }
5471
5472 lck_mtx_unlock(nfs_request_mutex);
5473
5474 /* poke any sockets */
5475 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) {
5476 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
5477 nfs_sock_poke(nmp);
5478 nfs_mount_rele(nmp);
5479 }
5480
5481 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY);
5482 }
5483
5484 /*
5485 * check a thread's proc for the "noremotehang" flag.
5486 */
5487 int
5488 nfs_noremotehang(thread_t thd)
5489 {
5490 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
5491 return p && proc_noremotehang(p);
5492 }
5493
5494 /*
5495 * Test for a termination condition pending on the process.
5496 * This is used to determine if we need to bail on a mount.
5497 * ETIMEDOUT is returned if there has been a soft timeout.
5498 * EINTR is returned if there is a signal pending that is not being ignored
5499 * and the mount is interruptable, or if we are a thread that is in the process
5500 * of cancellation (also SIGKILL posted).
5501 */
5502 extern int sigprop[NSIG + 1];
5503 int
5504 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked)
5505 {
5506 proc_t p;
5507 int error = 0;
5508
5509 if (!nmp) {
5510 return ENXIO;
5511 }
5512
5513 if (req && (req->r_flags & R_SOFTTERM)) {
5514 return ETIMEDOUT; /* request has been terminated. */
5515 }
5516 if (req && (req->r_flags & R_NOINTR)) {
5517 thd = NULL; /* don't check for signal on R_NOINTR */
5518 }
5519 if (!nmplocked) {
5520 lck_mtx_lock(&nmp->nm_lock);
5521 }
5522 if (nmp->nm_state & NFSSTA_FORCE) {
5523 /* If a force unmount is in progress then fail. */
5524 error = EIO;
5525 } else if (vfs_isforce(nmp->nm_mountp)) {
5526 /* Someone is unmounting us, go soft and mark it. */
5527 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT);
5528 nmp->nm_state |= NFSSTA_FORCE;
5529 }
5530
5531 /* Check if the mount is marked dead. */
5532 if (!error && (nmp->nm_state & NFSSTA_DEAD)) {
5533 error = ENXIO;
5534 }
5535
5536 /*
5537 * If the mount is hung and we've requested not to hang
5538 * on remote filesystems, then bail now.
5539 */
5540 if (current_proc() != kernproc &&
5541 !error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd)) {
5542 error = EIO;
5543 }
5544
5545 if (!nmplocked) {
5546 lck_mtx_unlock(&nmp->nm_lock);
5547 }
5548 if (error) {
5549 return error;
5550 }
5551
5552 /* may not have a thread for async I/O */
5553 if (thd == NULL || current_proc() == kernproc) {
5554 return 0;
5555 }
5556
5557 /*
5558 * Check if the process is aborted, but don't interrupt if we
5559 * were killed by a signal and this is the exiting thread which
5560 * is attempting to dump core.
5561 */
5562 if (((p = current_proc()) != kernproc) && current_thread_aborted() &&
5563 (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) ||
5564 (p->p_sigacts == NULL) ||
5565 (p->p_sigacts->ps_sig < 1) || (p->p_sigacts->ps_sig > NSIG) ||
5566 !(sigprop[p->p_sigacts->ps_sig] & SA_CORE))) {
5567 return EINTR;
5568 }
5569
5570 /* mask off thread and process blocked signals. */
5571 if (NMFLAG(nmp, INTR) && ((p = get_bsdthreadtask_info(thd))) &&
5572 proc_pendingsignals(p, NFSINT_SIGMASK)) {
5573 return EINTR;
5574 }
5575 return 0;
5576 }
5577
5578 /*
5579 * Lock a socket against others.
5580 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
5581 * and also to avoid race conditions between the processes with nfs requests
5582 * in progress when a reconnect is necessary.
5583 */
5584 int
5585 nfs_sndlock(struct nfsreq *req)
5586 {
5587 struct nfsmount *nmp = req->r_nmp;
5588 int *statep;
5589 int error = 0, slpflag = 0;
5590 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 };
5591
5592 if (nfs_mount_gone(nmp)) {
5593 return ENXIO;
5594 }
5595
5596 lck_mtx_lock(&nmp->nm_lock);
5597 statep = &nmp->nm_state;
5598
5599 if (NMFLAG(nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) {
5600 slpflag = PCATCH;
5601 }
5602 while (*statep & NFSSTA_SNDLOCK) {
5603 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1))) {
5604 break;
5605 }
5606 *statep |= NFSSTA_WANTSND;
5607 if (nfs_noremotehang(req->r_thread)) {
5608 ts.tv_sec = 1;
5609 }
5610 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts);
5611 if (slpflag == PCATCH) {
5612 slpflag = 0;
5613 ts.tv_sec = 2;
5614 }
5615 }
5616 if (!error) {
5617 *statep |= NFSSTA_SNDLOCK;
5618 }
5619 lck_mtx_unlock(&nmp->nm_lock);
5620 return error;
5621 }
5622
5623 /*
5624 * Unlock the stream socket for others.
5625 */
5626 void
5627 nfs_sndunlock(struct nfsreq *req)
5628 {
5629 struct nfsmount *nmp = req->r_nmp;
5630 int *statep, wake = 0;
5631
5632 if (!nmp) {
5633 return;
5634 }
5635 lck_mtx_lock(&nmp->nm_lock);
5636 statep = &nmp->nm_state;
5637 if ((*statep & NFSSTA_SNDLOCK) == 0) {
5638 panic("nfs sndunlock");
5639 }
5640 *statep &= ~(NFSSTA_SNDLOCK | NFSSTA_SENDING);
5641 if (*statep & NFSSTA_WANTSND) {
5642 *statep &= ~NFSSTA_WANTSND;
5643 wake = 1;
5644 }
5645 lck_mtx_unlock(&nmp->nm_lock);
5646 if (wake) {
5647 wakeup(statep);
5648 }
5649 }
5650
5651 int
5652 nfs_aux_request(
5653 struct nfsmount *nmp,
5654 thread_t thd,
5655 struct sockaddr *saddr,
5656 socket_t so,
5657 int sotype,
5658 mbuf_t mreq,
5659 uint32_t xid,
5660 int bindresv,
5661 int timeo,
5662 struct nfsm_chain *nmrep)
5663 {
5664 int error = 0, on = 1, try, sendat = 2, soproto, recv, optlen, restoreto = 0;
5665 socket_t newso = NULL;
5666 struct sockaddr_storage ss;
5667 struct timeval orig_rcvto, orig_sndto, tv = { .tv_sec = 1, .tv_usec = 0 };
5668 mbuf_t m, mrep = NULL;
5669 struct msghdr msg;
5670 uint32_t rxid = 0, reply = 0, reply_status, rejected_status;
5671 uint32_t verf_type, verf_len, accepted_status;
5672 size_t readlen, sentlen;
5673 struct nfs_rpc_record_state nrrs;
5674
5675 if (!so) {
5676 /* create socket and set options */
5677 if (saddr->sa_family == AF_LOCAL) {
5678 soproto = 0;
5679 } else {
5680 soproto = (sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP;
5681 }
5682 if ((error = sock_socket(saddr->sa_family, sotype, soproto, NULL, NULL, &newso))) {
5683 goto nfsmout;
5684 }
5685
5686 if (bindresv && saddr->sa_family != AF_LOCAL) {
5687 int level = (saddr->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
5688 int optname = (saddr->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE;
5689 int portrange = IP_PORTRANGE_LOW;
5690 error = sock_setsockopt(newso, level, optname, &portrange, sizeof(portrange));
5691 nfsmout_if(error);
5692 ss.ss_len = saddr->sa_len;
5693 ss.ss_family = saddr->sa_family;
5694 if (ss.ss_family == AF_INET) {
5695 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY;
5696 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
5697 } else if (ss.ss_family == AF_INET6) {
5698 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any;
5699 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
5700 } else {
5701 error = EINVAL;
5702 }
5703 if (!error) {
5704 error = sock_bind(newso, (struct sockaddr *)&ss);
5705 }
5706 nfsmout_if(error);
5707 }
5708
5709 if (sotype == SOCK_STREAM) {
5710 # define NFS_AUX_CONNECTION_TIMEOUT 4 /* 4 second timeout for connections */
5711 int count = 0;
5712
5713 error = sock_connect(newso, saddr, MSG_DONTWAIT);
5714 if (error == EINPROGRESS) {
5715 error = 0;
5716 }
5717 nfsmout_if(error);
5718
5719 while ((error = sock_connectwait(newso, &tv)) == EINPROGRESS) {
5720 /* After NFS_AUX_CONNECTION_TIMEOUT bail */
5721 if (++count >= NFS_AUX_CONNECTION_TIMEOUT) {
5722 error = ETIMEDOUT;
5723 break;
5724 }
5725 }
5726 nfsmout_if(error);
5727 }
5728 if (((error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) ||
5729 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) ||
5730 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on))))) {
5731 goto nfsmout;
5732 }
5733 so = newso;
5734 } else {
5735 /* make sure socket is using a one second timeout in this function */
5736 optlen = sizeof(orig_rcvto);
5737 error = sock_getsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, &optlen);
5738 if (!error) {
5739 optlen = sizeof(orig_sndto);
5740 error = sock_getsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, &optlen);
5741 }
5742 if (!error) {
5743 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
5744 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
5745 restoreto = 1;
5746 }
5747 }
5748
5749 if (sotype == SOCK_STREAM) {
5750 sendat = 0; /* we only resend the request for UDP */
5751 nfs_rpc_record_state_init(&nrrs);
5752 }
5753
5754 for (try = 0; try < timeo; try++) {
5755 if ((error = nfs_sigintr(nmp, NULL, !try ? NULL : thd, 0))) {
5756 break;
5757 }
5758 if (!try || (try == sendat)) {
5759 /* send the request (resending periodically for UDP) */
5760 if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m))) {
5761 goto nfsmout;
5762 }
5763 bzero(&msg, sizeof(msg));
5764 if ((sotype == SOCK_DGRAM) && !sock_isconnected(so)) {
5765 msg.msg_name = saddr;
5766 msg.msg_namelen = saddr->sa_len;
5767 }
5768 if ((error = sock_sendmbuf(so, &msg, m, 0, &sentlen))) {
5769 goto nfsmout;
5770 }
5771 sendat *= 2;
5772 if (sendat > 30) {
5773 sendat = 30;
5774 }
5775 }
5776 /* wait for the response */
5777 if (sotype == SOCK_STREAM) {
5778 /* try to read (more of) record */
5779 error = nfs_rpc_record_read(so, &nrrs, 0, &recv, &mrep);
5780 /* if we don't have the whole record yet, we'll keep trying */
5781 } else {
5782 readlen = 1 << 18;
5783 bzero(&msg, sizeof(msg));
5784 error = sock_receivembuf(so, &msg, &mrep, 0, &readlen);
5785 }
5786 if (error == EWOULDBLOCK) {
5787 continue;
5788 }
5789 nfsmout_if(error);
5790 /* parse the response */
5791 nfsm_chain_dissect_init(error, nmrep, mrep);
5792 nfsm_chain_get_32(error, nmrep, rxid);
5793 nfsm_chain_get_32(error, nmrep, reply);
5794 nfsmout_if(error);
5795 if ((rxid != xid) || (reply != RPC_REPLY)) {
5796 error = EBADRPC;
5797 }
5798 nfsm_chain_get_32(error, nmrep, reply_status);
5799 nfsmout_if(error);
5800 if (reply_status == RPC_MSGDENIED) {
5801 nfsm_chain_get_32(error, nmrep, rejected_status);
5802 nfsmout_if(error);
5803 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES;
5804 goto nfsmout;
5805 }
5806 nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */
5807 nfsm_chain_get_32(error, nmrep, verf_len); /* verifier length */
5808 nfsmout_if(error);
5809 if (verf_len) {
5810 nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len));
5811 }
5812 nfsm_chain_get_32(error, nmrep, accepted_status);
5813 nfsmout_if(error);
5814 switch (accepted_status) {
5815 case RPC_SUCCESS:
5816 error = 0;
5817 break;
5818 case RPC_PROGUNAVAIL:
5819 error = EPROGUNAVAIL;
5820 break;
5821 case RPC_PROGMISMATCH:
5822 error = EPROGMISMATCH;
5823 break;
5824 case RPC_PROCUNAVAIL:
5825 error = EPROCUNAVAIL;
5826 break;
5827 case RPC_GARBAGE:
5828 error = EBADRPC;
5829 break;
5830 case RPC_SYSTEM_ERR:
5831 default:
5832 error = EIO;
5833 break;
5834 }
5835 break;
5836 }
5837 nfsmout:
5838 if (restoreto) {
5839 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, sizeof(tv));
5840 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, sizeof(tv));
5841 }
5842 if (newso) {
5843 sock_shutdown(newso, SHUT_RDWR);
5844 sock_close(newso);
5845 }
5846 mbuf_freem(mreq);
5847 return error;
5848 }
5849
5850 int
5851 nfs_portmap_lookup(
5852 struct nfsmount *nmp,
5853 vfs_context_t ctx,
5854 struct sockaddr *sa,
5855 socket_t so,
5856 uint32_t protocol,
5857 uint32_t vers,
5858 uint32_t stype,
5859 int timeo)
5860 {
5861 thread_t thd = vfs_context_thread(ctx);
5862 kauth_cred_t cred = vfs_context_ucred(ctx);
5863 struct sockaddr_storage ss;
5864 struct sockaddr *saddr = (struct sockaddr*)&ss;
5865 static struct sockaddr_un rpcbind_cots = {
5866 sizeof(struct sockaddr_un),
5867 AF_LOCAL,
5868 RPCB_TICOTSORD_PATH
5869 };
5870 static struct sockaddr_un rpcbind_clts = {
5871 sizeof(struct sockaddr_un),
5872 AF_LOCAL,
5873 RPCB_TICLTS_PATH
5874 };
5875 struct nfsm_chain nmreq, nmrep;
5876 mbuf_t mreq;
5877 int error = 0, ip, pmprog, pmvers, pmproc;
5878 uint32_t ualen = 0;
5879 uint32_t port;
5880 uint64_t xid = 0;
5881 char uaddr[MAX_IPv6_STR_LEN + 16];
5882
5883 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5884 if (saddr->sa_family == AF_INET) {
5885 ip = 4;
5886 pmprog = PMAPPROG;
5887 pmvers = PMAPVERS;
5888 pmproc = PMAPPROC_GETPORT;
5889 } else if (saddr->sa_family == AF_INET6) {
5890 ip = 6;
5891 pmprog = RPCBPROG;
5892 pmvers = RPCBVERS4;
5893 pmproc = RPCBPROC_GETVERSADDR;
5894 } else if (saddr->sa_family == AF_LOCAL) {
5895 ip = 0;
5896 pmprog = RPCBPROG;
5897 pmvers = RPCBVERS4;
5898 pmproc = RPCBPROC_GETVERSADDR;
5899 NFS_SOCK_DBG("%s\n", ((struct sockaddr_un*)sa)->sun_path);
5900 saddr = (struct sockaddr*)((stype == SOCK_STREAM) ? &rpcbind_cots : &rpcbind_clts);
5901 } else {
5902 return EINVAL;
5903 }
5904 nfsm_chain_null(&nmreq);
5905 nfsm_chain_null(&nmrep);
5906
5907 tryagain:
5908 /* send portmapper request to get port/uaddr */
5909 if (ip == 4) {
5910 ((struct sockaddr_in*)saddr)->sin_port = htons(PMAPPORT);
5911 } else if (ip == 6) {
5912 ((struct sockaddr_in6*)saddr)->sin6_port = htons(PMAPPORT);
5913 }
5914 nfsm_chain_build_alloc_init(error, &nmreq, 8 * NFSX_UNSIGNED);
5915 nfsm_chain_add_32(error, &nmreq, protocol);
5916 nfsm_chain_add_32(error, &nmreq, vers);
5917 if (ip == 4) {
5918 nfsm_chain_add_32(error, &nmreq, stype == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP);
5919 nfsm_chain_add_32(error, &nmreq, 0);
5920 } else {
5921 if (stype == SOCK_STREAM) {
5922 if (ip == 6) {
5923 nfsm_chain_add_string(error, &nmreq, "tcp6", 4);
5924 } else {
5925 nfsm_chain_add_string(error, &nmreq, "ticotsord", 9);
5926 }
5927 } else {
5928 if (ip == 6) {
5929 nfsm_chain_add_string(error, &nmreq, "udp6", 4);
5930 } else {
5931 nfsm_chain_add_string(error, &nmreq, "ticlts", 6);
5932 }
5933 }
5934 nfsm_chain_add_string(error, &nmreq, "", 0); /* uaddr */
5935 nfsm_chain_add_string(error, &nmreq, "", 0); /* owner */
5936 }
5937 nfsm_chain_build_done(error, &nmreq);
5938 nfsmout_if(error);
5939 error = nfsm_rpchead2(nmp, stype, pmprog, pmvers, pmproc,
5940 RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
5941 nfsmout_if(error);
5942 nmreq.nmc_mhead = NULL;
5943
5944 NFS_SOCK_DUMP_MBUF("nfs_portmap_loockup request", mreq);
5945 error = nfs_aux_request(nmp, thd, saddr, so,
5946 stype, mreq, R_XID32(xid), 0, timeo, &nmrep);
5947 NFS_SOCK_DUMP_MBUF("nfs_portmap_lookup reply", nmrep.nmc_mhead);
5948 NFS_SOCK_DBG("rpcbind request returned %d for program %u vers %u: %s\n", error, protocol, vers,
5949 (saddr->sa_family == AF_LOCAL) ? ((struct sockaddr_un *)saddr)->sun_path :
5950 (saddr->sa_family == AF_INET6) ? "INET6 socket" : "INET socket");
5951
5952 /* grab port from portmap response */
5953 if (ip == 4) {
5954 nfsm_chain_get_32(error, &nmrep, port);
5955 if (!error) {
5956 ((struct sockaddr_in*)sa)->sin_port = htons(port);
5957 }
5958 } else {
5959 /* get uaddr string and convert to sockaddr */
5960 nfsm_chain_get_32(error, &nmrep, ualen);
5961 if (!error) {
5962 if (ualen > (sizeof(uaddr) - 1)) {
5963 error = EIO;
5964 }
5965 if (ualen < 1) {
5966 /* program is not available, just return a zero port */
5967 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5968 if (ip == 6) {
5969 ((struct sockaddr_in6*)saddr)->sin6_port = htons(0);
5970 } else {
5971 ((struct sockaddr_un*)saddr)->sun_path[0] = '\0';
5972 }
5973 NFS_SOCK_DBG("Program %u version %u unavailable", protocol, vers);
5974 } else {
5975 nfsm_chain_get_opaque(error, &nmrep, ualen, uaddr);
5976 NFS_SOCK_DBG("Got uaddr %s\n", uaddr);
5977 if (!error) {
5978 uaddr[ualen] = '\0';
5979 if (!nfs_uaddr2sockaddr(uaddr, saddr)) {
5980 error = EIO;
5981 }
5982 }
5983 }
5984 }
5985 if ((error == EPROGMISMATCH) || (error == EPROCUNAVAIL) || (error == EIO) || (error == EBADRPC)) {
5986 /* remote doesn't support rpcbind version or proc (or we couldn't parse uaddr) */
5987 if (pmvers == RPCBVERS4) {
5988 /* fall back to v3 and GETADDR */
5989 pmvers = RPCBVERS3;
5990 pmproc = RPCBPROC_GETADDR;
5991 nfsm_chain_cleanup(&nmreq);
5992 nfsm_chain_cleanup(&nmrep);
5993 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5994 xid = 0;
5995 error = 0;
5996 goto tryagain;
5997 }
5998 }
5999 if (!error) {
6000 bcopy(saddr, sa, min(saddr->sa_len, sa->sa_len));
6001 }
6002 }
6003 nfsmout:
6004 nfsm_chain_cleanup(&nmreq);
6005 nfsm_chain_cleanup(&nmrep);
6006 NFS_SOCK_DBG("Returned %d\n", error);
6007
6008 return error;
6009 }
6010
6011 int
6012 nfs_msg(thread_t thd,
6013 const char *server,
6014 const char *msg,
6015 int error)
6016 {
6017 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
6018 tpr_t tpr;
6019
6020 if (p) {
6021 tpr = tprintf_open(p);
6022 } else {
6023 tpr = NULL;
6024 }
6025 if (error) {
6026 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error);
6027 } else {
6028 tprintf(tpr, "nfs server %s: %s\n", server, msg);
6029 }
6030 tprintf_close(tpr);
6031 return 0;
6032 }
6033
6034 #define NFS_SQUISH_MOBILE_ONLY 0x0001 /* Squish mounts only on mobile machines */
6035 #define NFS_SQUISH_AUTOMOUNTED_ONLY 0x0002 /* Squish mounts only if the are automounted */
6036 #define NFS_SQUISH_SOFT 0x0004 /* Treat all soft mounts as though they were on a mobile machine */
6037 #define NFS_SQUISH_QUICK 0x0008 /* Try to squish mounts more quickly. */
6038 #define NFS_SQUISH_SHUTDOWN 0x1000 /* Squish all mounts on shutdown. Currently not implemented */
6039
6040 uint32_t nfs_squishy_flags = NFS_SQUISH_MOBILE_ONLY | NFS_SQUISH_AUTOMOUNTED_ONLY | NFS_SQUISH_QUICK;
6041 int32_t nfs_is_mobile;
6042
6043 #define NFS_SQUISHY_DEADTIMEOUT 8 /* Dead time out for squishy mounts */
6044 #define NFS_SQUISHY_QUICKTIMEOUT 4 /* Quicker dead time out when nfs_squish_flags NFS_SQUISH_QUICK bit is set*/
6045
6046 /*
6047 * Could this mount be squished?
6048 */
6049 int
6050 nfs_can_squish(struct nfsmount *nmp)
6051 {
6052 uint64_t flags = vfs_flags(nmp->nm_mountp);
6053 int softsquish = ((nfs_squishy_flags & NFS_SQUISH_SOFT) & NMFLAG(nmp, SOFT));
6054
6055 if (!softsquish && (nfs_squishy_flags & NFS_SQUISH_MOBILE_ONLY) && nfs_is_mobile == 0) {
6056 return 0;
6057 }
6058
6059 if ((nfs_squishy_flags & NFS_SQUISH_AUTOMOUNTED_ONLY) && (flags & MNT_AUTOMOUNTED) == 0) {
6060 return 0;
6061 }
6062
6063 return 1;
6064 }
6065
6066 /*
6067 * NFS mounts default to "rw,hard" - but frequently on mobile clients
6068 * the mount may become "not responding". It's desirable to be able
6069 * to unmount these dead mounts, but only if there is no risk of
6070 * losing data or crashing applications. A "squishy" NFS mount is one
6071 * that can be force unmounted with little risk of harm.
6072 *
6073 * nfs_is_squishy checks if a mount is in a squishy state. A mount is
6074 * in a squishy state iff it is allowed to be squishy and there are no
6075 * dirty pages and there are no mmapped files and there are no files
6076 * open for write. Mounts are allowed to be squishy is controlled by
6077 * the settings of the nfs_squishy_flags and its mobility state. These
6078 * flags can be set by sysctls.
6079 *
6080 * If nfs_is_squishy determines that we are in a squishy state we will
6081 * update the current dead timeout to at least NFS_SQUISHY_DEADTIMEOUT
6082 * (or NFS_SQUISHY_QUICKTIMEOUT if NFS_SQUISH_QUICK is set) (see
6083 * above) or 1/8th of the mount's nm_deadtimeout value, otherwise we just
6084 * update the current dead timeout with the mount's nm_deadtimeout
6085 * value set at mount time.
6086 *
6087 * Assumes that nm_lock is held.
6088 *
6089 * Note this routine is racey, but its effects on setting the
6090 * dead timeout only have effects when we're in trouble and are likely
6091 * to stay that way. Since by default its only for automounted
6092 * volumes on mobile machines; this is a reasonable trade off between
6093 * data integrity and user experience. It can be disabled or set via
6094 * nfs.conf file.
6095 */
6096
6097 int
6098 nfs_is_squishy(struct nfsmount *nmp)
6099 {
6100 mount_t mp = nmp->nm_mountp;
6101 int squishy = 0;
6102 int timeo = (nfs_squishy_flags & NFS_SQUISH_QUICK) ? NFS_SQUISHY_QUICKTIMEOUT : NFS_SQUISHY_DEADTIMEOUT;
6103
6104 NFS_SOCK_DBG("%s: nm_curdeadtimeout = %d, nfs_is_mobile = %d\n",
6105 vfs_statfs(mp)->f_mntfromname, nmp->nm_curdeadtimeout, nfs_is_mobile);
6106
6107 if (!nfs_can_squish(nmp)) {
6108 goto out;
6109 }
6110
6111 timeo = (nmp->nm_deadtimeout > timeo) ? max(nmp->nm_deadtimeout / 8, timeo) : timeo;
6112 NFS_SOCK_DBG("nm_writers = %d nm_mappers = %d timeo = %d\n", nmp->nm_writers, nmp->nm_mappers, timeo);
6113
6114 if (nmp->nm_writers == 0 && nmp->nm_mappers == 0) {
6115 uint64_t flags = mp ? vfs_flags(mp) : 0;
6116 squishy = 1;
6117
6118 /*
6119 * Walk the nfs nodes and check for dirty buffers it we're not
6120 * RDONLY and we've not already been declared as squishy since
6121 * this can be a bit expensive.
6122 */
6123 if (!(flags & MNT_RDONLY) && !(nmp->nm_state & NFSSTA_SQUISHY)) {
6124 squishy = !nfs_mount_is_dirty(mp);
6125 }
6126 }
6127
6128 out:
6129 if (squishy) {
6130 nmp->nm_state |= NFSSTA_SQUISHY;
6131 } else {
6132 nmp->nm_state &= ~NFSSTA_SQUISHY;
6133 }
6134
6135 nmp->nm_curdeadtimeout = squishy ? timeo : nmp->nm_deadtimeout;
6136
6137 NFS_SOCK_DBG("nm_curdeadtimeout = %d\n", nmp->nm_curdeadtimeout);
6138
6139 return squishy;
6140 }
6141
6142 /*
6143 * On a send operation, if we can't reach the server and we've got only one server to talk to
6144 * and NFS_SQUISH_QUICK flag is set and we are in a squishy state then mark the mount as dead
6145 * and ask to be forcibly unmounted. Return 1 if we're dead and 0 otherwise.
6146 */
6147 int
6148 nfs_is_dead(int error, struct nfsmount *nmp)
6149 {
6150 fsid_t fsid;
6151
6152 lck_mtx_lock(&nmp->nm_lock);
6153 if (nmp->nm_state & NFSSTA_DEAD) {
6154 lck_mtx_unlock(&nmp->nm_lock);
6155 return 1;
6156 }
6157
6158 if ((error != ENETUNREACH && error != EHOSTUNREACH && error != EADDRNOTAVAIL) ||
6159 !(nmp->nm_locations.nl_numlocs == 1 && nmp->nm_locations.nl_locations[0]->nl_servcount == 1)) {
6160 lck_mtx_unlock(&nmp->nm_lock);
6161 return 0;
6162 }
6163
6164 if ((nfs_squishy_flags & NFS_SQUISH_QUICK) && nfs_is_squishy(nmp)) {
6165 printf("nfs_is_dead: nfs server %s: unreachable. Squished dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
6166 fsid = vfs_statfs(nmp->nm_mountp)->f_fsid;
6167 lck_mtx_unlock(&nmp->nm_lock);
6168 nfs_mount_zombie(nmp, NFSSTA_DEAD);
6169 vfs_event_signal(&fsid, VQ_DEAD, 0);
6170 return 1;
6171 }
6172 lck_mtx_unlock(&nmp->nm_lock);
6173 return 0;
6174 }
6175
6176 /*
6177 * If we've experienced timeouts and we're not really a
6178 * classic hard mount, then just return cached data to
6179 * the caller instead of likely hanging on an RPC.
6180 */
6181 int
6182 nfs_use_cache(struct nfsmount *nmp)
6183 {
6184 /*
6185 *%%% We always let mobile users goto the cache,
6186 * perhaps we should not even require them to have
6187 * a timeout?
6188 */
6189 int cache_ok = (nfs_is_mobile || NMFLAG(nmp, SOFT) ||
6190 nfs_can_squish(nmp) || nmp->nm_deadtimeout);
6191
6192 int timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
6193
6194 /*
6195 * So if we have a timeout and we're not really a hard hard-mount,
6196 * return 1 to not get things out of the cache.
6197 */
6198
6199 return (nmp->nm_state & timeoutmask) && cache_ok;
6200 }
6201
6202 /*
6203 * Log a message that nfs or lockd server is unresponsive. Check if we
6204 * can be squished and if we can, or that our dead timeout has
6205 * expired, and we're not holding state, set our mount as dead, remove
6206 * our mount state and ask to be unmounted. If we are holding state
6207 * we're being called from the nfs_request_timer and will soon detect
6208 * that we need to unmount.
6209 */
6210 void
6211 nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg, int holding_state)
6212 {
6213 int timeoutmask, wasunresponsive, unresponsive, softnobrowse;
6214 uint32_t do_vfs_signal = 0;
6215 struct timeval now;
6216
6217 if (nfs_mount_gone(nmp)) {
6218 return;
6219 }
6220
6221 lck_mtx_lock(&nmp->nm_lock);
6222
6223 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
6224 if (NMFLAG(nmp, MUTEJUKEBOX)) { /* jukebox timeouts don't count as unresponsive if muted */
6225 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
6226 }
6227 wasunresponsive = (nmp->nm_state & timeoutmask);
6228
6229 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
6230 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
6231
6232 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
6233 nmp->nm_state |= NFSSTA_TIMEO;
6234 }
6235 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
6236 nmp->nm_state |= NFSSTA_LOCKTIMEO;
6237 }
6238 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) {
6239 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO;
6240 }
6241
6242 unresponsive = (nmp->nm_state & timeoutmask);
6243
6244 nfs_is_squishy(nmp);
6245
6246 if (unresponsive && (nmp->nm_curdeadtimeout > 0)) {
6247 microuptime(&now);
6248 if (!wasunresponsive) {
6249 nmp->nm_deadto_start = now.tv_sec;
6250 nfs_mount_sock_thread_wake(nmp);
6251 } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_curdeadtimeout && !holding_state) {
6252 if (!(nmp->nm_state & NFSSTA_DEAD)) {
6253 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname,
6254 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : "");
6255 }
6256 do_vfs_signal = VQ_DEAD;
6257 }
6258 }
6259 lck_mtx_unlock(&nmp->nm_lock);
6260
6261 if (do_vfs_signal == VQ_DEAD && !(nmp->nm_state & NFSSTA_DEAD)) {
6262 nfs_mount_zombie(nmp, NFSSTA_DEAD);
6263 } else if (softnobrowse || wasunresponsive || !unresponsive) {
6264 do_vfs_signal = 0;
6265 } else {
6266 do_vfs_signal = VQ_NOTRESP;
6267 }
6268 if (do_vfs_signal) {
6269 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, do_vfs_signal, 0);
6270 }
6271
6272 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
6273 }
6274
6275 void
6276 nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
6277 {
6278 int timeoutmask, wasunresponsive, unresponsive, softnobrowse;
6279 int do_vfs_signal;
6280
6281 if (nfs_mount_gone(nmp)) {
6282 return;
6283 }
6284
6285 if (msg) {
6286 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
6287 }
6288
6289 lck_mtx_lock(&nmp->nm_lock);
6290
6291 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
6292 if (NMFLAG(nmp, MUTEJUKEBOX)) { /* jukebox timeouts don't count as unresponsive if muted */
6293 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
6294 }
6295 wasunresponsive = (nmp->nm_state & timeoutmask);
6296
6297 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
6298 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
6299
6300 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
6301 nmp->nm_state &= ~NFSSTA_TIMEO;
6302 }
6303 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
6304 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
6305 }
6306 if ((flags & NFSSTA_JUKEBOXTIMEO) && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) {
6307 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO;
6308 }
6309
6310 unresponsive = (nmp->nm_state & timeoutmask);
6311
6312 nmp->nm_deadto_start = 0;
6313 nmp->nm_curdeadtimeout = nmp->nm_deadtimeout;
6314 nmp->nm_state &= ~NFSSTA_SQUISHY;
6315 lck_mtx_unlock(&nmp->nm_lock);
6316
6317 if (softnobrowse) {
6318 do_vfs_signal = 0;
6319 } else {
6320 do_vfs_signal = (wasunresponsive && !unresponsive);
6321 }
6322 if (do_vfs_signal) {
6323 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
6324 }
6325 }
6326
6327
6328 #endif /* CONFIG_NFS_CLIENT */
6329
6330 #if CONFIG_NFS_SERVER
6331
6332 /*
6333 * Generate the rpc reply header
6334 * siz arg. is used to decide if adding a cluster is worthwhile
6335 */
6336 int
6337 nfsrv_rephead(
6338 struct nfsrv_descript *nd,
6339 __unused struct nfsrv_sock *slp,
6340 struct nfsm_chain *nmrepp,
6341 size_t siz)
6342 {
6343 mbuf_t mrep;
6344 u_int32_t *tl;
6345 struct nfsm_chain nmrep;
6346 int err, error;
6347
6348 err = nd->nd_repstat;
6349 if (err && (nd->nd_vers == NFS_VER2)) {
6350 siz = 0;
6351 }
6352
6353 /*
6354 * If this is a big reply, use a cluster else
6355 * try and leave leading space for the lower level headers.
6356 */
6357 siz += RPC_REPLYSIZ;
6358 if (siz >= nfs_mbuf_minclsize) {
6359 error = mbuf_getpacket(MBUF_WAITOK, &mrep);
6360 } else {
6361 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
6362 }
6363 if (error) {
6364 /* unable to allocate packet */
6365 /* XXX should we keep statistics for these errors? */
6366 return error;
6367 }
6368 if (siz < nfs_mbuf_minclsize) {
6369 /* leave space for lower level headers */
6370 tl = mbuf_data(mrep);
6371 tl += 80 / sizeof(*tl); /* XXX max_hdr? XXX */
6372 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
6373 }
6374 nfsm_chain_init(&nmrep, mrep);
6375 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
6376 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
6377 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
6378 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
6379 if (err & NFSERR_AUTHERR) {
6380 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
6381 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
6382 } else {
6383 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
6384 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
6385 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
6386 }
6387 } else {
6388 /* reply status */
6389 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
6390 if (nd->nd_gss_context != NULL) {
6391 /* RPCSEC_GSS verifier */
6392 error = nfs_gss_svc_verf_put(nd, &nmrep);
6393 if (error) {
6394 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
6395 goto done;
6396 }
6397 } else {
6398 /* RPCAUTH_NULL verifier */
6399 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
6400 nfsm_chain_add_32(error, &nmrep, 0);
6401 }
6402 /* accepted status */
6403 switch (err) {
6404 case EPROGUNAVAIL:
6405 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
6406 break;
6407 case EPROGMISMATCH:
6408 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
6409 /* XXX hard coded versions? */
6410 nfsm_chain_add_32(error, &nmrep, NFS_VER2);
6411 nfsm_chain_add_32(error, &nmrep, NFS_VER3);
6412 break;
6413 case EPROCUNAVAIL:
6414 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
6415 break;
6416 case EBADRPC:
6417 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
6418 break;
6419 default:
6420 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
6421 if (nd->nd_gss_context != NULL) {
6422 error = nfs_gss_svc_prepare_reply(nd, &nmrep);
6423 }
6424 if (err != NFSERR_RETVOID) {
6425 nfsm_chain_add_32(error, &nmrep,
6426 (err ? nfsrv_errmap(nd, err) : 0));
6427 }
6428 break;
6429 }
6430 }
6431
6432 done:
6433 nfsm_chain_build_done(error, &nmrep);
6434 if (error) {
6435 /* error composing reply header */
6436 /* XXX should we keep statistics for these errors? */
6437 mbuf_freem(mrep);
6438 return error;
6439 }
6440
6441 *nmrepp = nmrep;
6442 if ((err != 0) && (err != NFSERR_RETVOID)) {
6443 OSAddAtomic64(1, &nfsstats.srvrpc_errs);
6444 }
6445 return 0;
6446 }
6447
6448 /*
6449 * The nfs server send routine.
6450 *
6451 * - return EINTR or ERESTART if interrupted by a signal
6452 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
6453 * - do any cleanup required by recoverable socket errors (???)
6454 */
6455 int
6456 nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
6457 {
6458 int error;
6459 socket_t so = slp->ns_so;
6460 struct sockaddr *sendnam;
6461 struct msghdr msg;
6462
6463 bzero(&msg, sizeof(msg));
6464 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
6465 if ((sendnam = mbuf_data(nam))) {
6466 msg.msg_name = (caddr_t)sendnam;
6467 msg.msg_namelen = sendnam->sa_len;
6468 }
6469 }
6470 if (NFS_IS_DBG(NFS_FAC_SRV, 15)) {
6471 nfs_dump_mbuf(__func__, __LINE__, "nfsrv_send\n", top);
6472 }
6473 error = sock_sendmbuf(so, &msg, top, 0, NULL);
6474 if (!error) {
6475 return 0;
6476 }
6477 log(LOG_INFO, "nfsd send error %d\n", error);
6478
6479 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM)) {
6480 error = EPIPE; /* zap TCP sockets if they time out on send */
6481 }
6482 /* Handle any recoverable (soft) socket errors here. (???) */
6483 if (error != EINTR && error != ERESTART && error != EIO &&
6484 error != EWOULDBLOCK && error != EPIPE) {
6485 error = 0;
6486 }
6487
6488 return error;
6489 }
6490
6491 /*
6492 * Socket upcall routine for the nfsd sockets.
6493 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
6494 * Essentially do as much as possible non-blocking, else punt and it will
6495 * be called with MBUF_WAITOK from an nfsd.
6496 */
6497 void
6498 nfsrv_rcv(socket_t so, void *arg, int waitflag)
6499 {
6500 struct nfsrv_sock *slp = arg;
6501
6502 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID)) {
6503 return;
6504 }
6505
6506 lck_rw_lock_exclusive(&slp->ns_rwlock);
6507 nfsrv_rcv_locked(so, slp, waitflag);
6508 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
6509 }
6510 void
6511 nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
6512 {
6513 mbuf_t m, mp, mhck, m2;
6514 int ns_flag = 0, error;
6515 struct msghdr msg;
6516 size_t bytes_read;
6517
6518 if ((slp->ns_flag & SLP_VALID) == 0) {
6519 if (waitflag == MBUF_DONTWAIT) {
6520 lck_rw_done(&slp->ns_rwlock);
6521 }
6522 return;
6523 }
6524
6525 #ifdef notdef
6526 /*
6527 * Define this to test for nfsds handling this under heavy load.
6528 */
6529 if (waitflag == MBUF_DONTWAIT) {
6530 ns_flag = SLP_NEEDQ;
6531 goto dorecs;
6532 }
6533 #endif
6534 if (slp->ns_sotype == SOCK_STREAM) {
6535 /*
6536 * If there are already records on the queue, defer soreceive()
6537 * to an(other) nfsd so that there is feedback to the TCP layer that
6538 * the nfs servers are heavily loaded.
6539 */
6540 if (slp->ns_rec) {
6541 ns_flag = SLP_NEEDQ;
6542 goto dorecs;
6543 }
6544
6545 /*
6546 * Do soreceive().
6547 */
6548 bytes_read = 1000000000;
6549 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
6550 if (error || mp == NULL) {
6551 if (error == EWOULDBLOCK) {
6552 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
6553 } else {
6554 ns_flag = SLP_DISCONN;
6555 }
6556 goto dorecs;
6557 }
6558 m = mp;
6559 if (slp->ns_rawend) {
6560 if ((error = mbuf_setnext(slp->ns_rawend, m))) {
6561 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
6562 }
6563 slp->ns_cc += bytes_read;
6564 } else {
6565 slp->ns_raw = m;
6566 slp->ns_cc = bytes_read;
6567 }
6568 while ((m2 = mbuf_next(m))) {
6569 m = m2;
6570 }
6571 slp->ns_rawend = m;
6572
6573 /*
6574 * Now try and parse record(s) out of the raw stream data.
6575 */
6576 error = nfsrv_getstream(slp, waitflag);
6577 if (error) {
6578 if (error == EPERM) {
6579 ns_flag = SLP_DISCONN;
6580 } else {
6581 ns_flag = SLP_NEEDQ;
6582 }
6583 }
6584 } else {
6585 struct sockaddr_storage nam;
6586
6587 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
6588 /* already have max # RPC records queued on this socket */
6589 ns_flag = SLP_NEEDQ;
6590 goto dorecs;
6591 }
6592
6593 bzero(&msg, sizeof(msg));
6594 msg.msg_name = (caddr_t)&nam;
6595 msg.msg_namelen = sizeof(nam);
6596
6597 do {
6598 bytes_read = 1000000000;
6599 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
6600 if (mp) {
6601 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
6602 mbuf_setlen(mhck, nam.ss_len);
6603 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
6604 m = mhck;
6605 if (mbuf_setnext(m, mp)) {
6606 /* trouble... just drop it */
6607 printf("nfsrv_rcv: mbuf_setnext failed\n");
6608 mbuf_free(mhck);
6609 m = mp;
6610 }
6611 } else {
6612 m = mp;
6613 }
6614 if (slp->ns_recend) {
6615 mbuf_setnextpkt(slp->ns_recend, m);
6616 } else {
6617 slp->ns_rec = m;
6618 slp->ns_flag |= SLP_DOREC;
6619 }
6620 slp->ns_recend = m;
6621 mbuf_setnextpkt(m, NULL);
6622 slp->ns_reccnt++;
6623 }
6624 } while (mp);
6625 }
6626
6627 /*
6628 * Now try and process the request records, non-blocking.
6629 */
6630 dorecs:
6631 if (ns_flag) {
6632 slp->ns_flag |= ns_flag;
6633 }
6634 if (waitflag == MBUF_DONTWAIT) {
6635 int wake = (slp->ns_flag & SLP_WORKTODO);
6636 lck_rw_done(&slp->ns_rwlock);
6637 if (wake && nfsd_thread_count) {
6638 lck_mtx_lock(nfsd_mutex);
6639 nfsrv_wakenfsd(slp);
6640 lck_mtx_unlock(nfsd_mutex);
6641 }
6642 }
6643 }
6644
6645 /*
6646 * Try and extract an RPC request from the mbuf data list received on a
6647 * stream socket. The "waitflag" argument indicates whether or not it
6648 * can sleep.
6649 */
6650 int
6651 nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
6652 {
6653 mbuf_t m;
6654 char *cp1, *cp2, *mdata;
6655 int len, mlen, error;
6656 mbuf_t om, m2, recm;
6657 u_int32_t recmark;
6658
6659 if (slp->ns_flag & SLP_GETSTREAM) {
6660 panic("nfs getstream");
6661 }
6662 slp->ns_flag |= SLP_GETSTREAM;
6663 for (;;) {
6664 if (slp->ns_reclen == 0) {
6665 if (slp->ns_cc < NFSX_UNSIGNED) {
6666 slp->ns_flag &= ~SLP_GETSTREAM;
6667 return 0;
6668 }
6669 m = slp->ns_raw;
6670 mdata = mbuf_data(m);
6671 mlen = mbuf_len(m);
6672 if (mlen >= NFSX_UNSIGNED) {
6673 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
6674 mdata += NFSX_UNSIGNED;
6675 mlen -= NFSX_UNSIGNED;
6676 mbuf_setdata(m, mdata, mlen);
6677 } else {
6678 cp1 = (caddr_t)&recmark;
6679 cp2 = mdata;
6680 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
6681 while (mlen == 0) {
6682 m = mbuf_next(m);
6683 cp2 = mbuf_data(m);
6684 mlen = mbuf_len(m);
6685 }
6686 *cp1++ = *cp2++;
6687 mlen--;
6688 mbuf_setdata(m, cp2, mlen);
6689 }
6690 }
6691 slp->ns_cc -= NFSX_UNSIGNED;
6692 recmark = ntohl(recmark);
6693 slp->ns_reclen = recmark & ~0x80000000;
6694 if (recmark & 0x80000000) {
6695 slp->ns_flag |= SLP_LASTFRAG;
6696 } else {
6697 slp->ns_flag &= ~SLP_LASTFRAG;
6698 }
6699 if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) {
6700 slp->ns_flag &= ~SLP_GETSTREAM;
6701 return EPERM;
6702 }
6703 }
6704
6705 /*
6706 * Now get the record part.
6707 *
6708 * Note that slp->ns_reclen may be 0. Linux sometimes
6709 * generates 0-length RPCs
6710 */
6711 recm = NULL;
6712 if (slp->ns_cc == slp->ns_reclen) {
6713 recm = slp->ns_raw;
6714 slp->ns_raw = slp->ns_rawend = NULL;
6715 slp->ns_cc = slp->ns_reclen = 0;
6716 } else if (slp->ns_cc > slp->ns_reclen) {
6717 len = 0;
6718 m = slp->ns_raw;
6719 mlen = mbuf_len(m);
6720 mdata = mbuf_data(m);
6721 om = NULL;
6722 while (len < slp->ns_reclen) {
6723 if ((len + mlen) > slp->ns_reclen) {
6724 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
6725 slp->ns_flag &= ~SLP_GETSTREAM;
6726 return EWOULDBLOCK;
6727 }
6728 if (om) {
6729 if (mbuf_setnext(om, m2)) {
6730 /* trouble... just drop it */
6731 printf("nfsrv_getstream: mbuf_setnext failed\n");
6732 mbuf_freem(m2);
6733 slp->ns_flag &= ~SLP_GETSTREAM;
6734 return EWOULDBLOCK;
6735 }
6736 recm = slp->ns_raw;
6737 } else {
6738 recm = m2;
6739 }
6740 mdata += slp->ns_reclen - len;
6741 mlen -= slp->ns_reclen - len;
6742 mbuf_setdata(m, mdata, mlen);
6743 len = slp->ns_reclen;
6744 } else if ((len + mlen) == slp->ns_reclen) {
6745 om = m;
6746 len += mlen;
6747 m = mbuf_next(m);
6748 recm = slp->ns_raw;
6749 if (mbuf_setnext(om, NULL)) {
6750 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
6751 slp->ns_flag &= ~SLP_GETSTREAM;
6752 return EWOULDBLOCK;
6753 }
6754 mlen = mbuf_len(m);
6755 mdata = mbuf_data(m);
6756 } else {
6757 om = m;
6758 len += mlen;
6759 m = mbuf_next(m);
6760 mlen = mbuf_len(m);
6761 mdata = mbuf_data(m);
6762 }
6763 }
6764 slp->ns_raw = m;
6765 slp->ns_cc -= len;
6766 slp->ns_reclen = 0;
6767 } else {
6768 slp->ns_flag &= ~SLP_GETSTREAM;
6769 return 0;
6770 }
6771
6772 /*
6773 * Accumulate the fragments into a record.
6774 */
6775 if (slp->ns_frag == NULL) {
6776 slp->ns_frag = recm;
6777 } else {
6778 m = slp->ns_frag;
6779 while ((m2 = mbuf_next(m))) {
6780 m = m2;
6781 }
6782 if ((error = mbuf_setnext(m, recm))) {
6783 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
6784 }
6785 }
6786 if (slp->ns_flag & SLP_LASTFRAG) {
6787 if (slp->ns_recend) {
6788 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
6789 } else {
6790 slp->ns_rec = slp->ns_frag;
6791 slp->ns_flag |= SLP_DOREC;
6792 }
6793 slp->ns_recend = slp->ns_frag;
6794 slp->ns_frag = NULL;
6795 }
6796 }
6797 }
6798
6799 /*
6800 * Parse an RPC header.
6801 */
6802 int
6803 nfsrv_dorec(
6804 struct nfsrv_sock *slp,
6805 struct nfsd *nfsd,
6806 struct nfsrv_descript **ndp)
6807 {
6808 mbuf_t m;
6809 mbuf_t nam;
6810 struct nfsrv_descript *nd;
6811 int error = 0;
6812
6813 *ndp = NULL;
6814 if (!(slp->ns_flag & (SLP_VALID | SLP_DOREC)) || (slp->ns_rec == NULL)) {
6815 return ENOBUFS;
6816 }
6817 MALLOC_ZONE(nd, struct nfsrv_descript *,
6818 sizeof(struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
6819 if (!nd) {
6820 return ENOMEM;
6821 }
6822 m = slp->ns_rec;
6823 slp->ns_rec = mbuf_nextpkt(m);
6824 if (slp->ns_rec) {
6825 mbuf_setnextpkt(m, NULL);
6826 } else {
6827 slp->ns_flag &= ~SLP_DOREC;
6828 slp->ns_recend = NULL;
6829 }
6830 slp->ns_reccnt--;
6831 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
6832 nam = m;
6833 m = mbuf_next(m);
6834 if ((error = mbuf_setnext(nam, NULL))) {
6835 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
6836 }
6837 } else {
6838 nam = NULL;
6839 }
6840 nd->nd_nam2 = nam;
6841 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
6842 if (!error) {
6843 error = nfsrv_getreq(nd);
6844 }
6845 if (error) {
6846 if (nam) {
6847 mbuf_freem(nam);
6848 }
6849 if (nd->nd_gss_context) {
6850 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
6851 }
6852 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
6853 return error;
6854 }
6855 nd->nd_mrep = NULL;
6856 *ndp = nd;
6857 nfsd->nfsd_nd = nd;
6858 return 0;
6859 }
6860
6861 /*
6862 * Parse an RPC request
6863 * - verify it
6864 * - fill in the cred struct.
6865 */
6866 int
6867 nfsrv_getreq(struct nfsrv_descript *nd)
6868 {
6869 struct nfsm_chain *nmreq;
6870 int len, i;
6871 u_int32_t nfsvers, auth_type;
6872 int error = 0;
6873 uid_t user_id;
6874 gid_t group_id;
6875 int ngroups;
6876 uint32_t val;
6877
6878 nd->nd_cr = NULL;
6879 nd->nd_gss_context = NULL;
6880 nd->nd_gss_seqnum = 0;
6881 nd->nd_gss_mb = NULL;
6882
6883 user_id = group_id = -2;
6884 val = auth_type = len = 0;
6885
6886 nmreq = &nd->nd_nmreq;
6887 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
6888 nfsm_chain_get_32(error, nmreq, val); // RPC Call
6889 if (!error && (val != RPC_CALL)) {
6890 error = EBADRPC;
6891 }
6892 nfsmout_if(error);
6893 nd->nd_repstat = 0;
6894 nfsm_chain_get_32(error, nmreq, val); // RPC Version
6895 nfsmout_if(error);
6896 if (val != RPC_VER2) {
6897 nd->nd_repstat = ERPCMISMATCH;
6898 nd->nd_procnum = NFSPROC_NOOP;
6899 return 0;
6900 }
6901 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number
6902 nfsmout_if(error);
6903 if (val != NFS_PROG) {
6904 nd->nd_repstat = EPROGUNAVAIL;
6905 nd->nd_procnum = NFSPROC_NOOP;
6906 return 0;
6907 }
6908 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
6909 nfsmout_if(error);
6910 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
6911 nd->nd_repstat = EPROGMISMATCH;
6912 nd->nd_procnum = NFSPROC_NOOP;
6913 return 0;
6914 }
6915 nd->nd_vers = nfsvers;
6916 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
6917 nfsmout_if(error);
6918 if ((nd->nd_procnum >= NFS_NPROCS) ||
6919 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
6920 nd->nd_repstat = EPROCUNAVAIL;
6921 nd->nd_procnum = NFSPROC_NOOP;
6922 return 0;
6923 }
6924 if (nfsvers != NFS_VER3) {
6925 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
6926 }
6927 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor
6928 nfsm_chain_get_32(error, nmreq, len); // Auth Length
6929 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ)) {
6930 error = EBADRPC;
6931 }
6932 nfsmout_if(error);
6933
6934 /* Handle authentication */
6935 if (auth_type == RPCAUTH_SYS) {
6936 struct posix_cred temp_pcred;
6937 if (nd->nd_procnum == NFSPROC_NULL) {
6938 return 0;
6939 }
6940 nd->nd_sec = RPCAUTH_SYS;
6941 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp
6942 nfsm_chain_get_32(error, nmreq, len); // hostname length
6943 if (len < 0 || len > NFS_MAXNAMLEN) {
6944 error = EBADRPC;
6945 }
6946 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname
6947 nfsmout_if(error);
6948
6949 /* create a temporary credential using the bits from the wire */
6950 bzero(&temp_pcred, sizeof(temp_pcred));
6951 nfsm_chain_get_32(error, nmreq, user_id);
6952 nfsm_chain_get_32(error, nmreq, group_id);
6953 temp_pcred.cr_groups[0] = group_id;
6954 nfsm_chain_get_32(error, nmreq, len); // extra GID count
6955 if ((len < 0) || (len > RPCAUTH_UNIXGIDS)) {
6956 error = EBADRPC;
6957 }
6958 nfsmout_if(error);
6959 for (i = 1; i <= len; i++) {
6960 if (i < NGROUPS) {
6961 nfsm_chain_get_32(error, nmreq, temp_pcred.cr_groups[i]);
6962 } else {
6963 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
6964 }
6965 }
6966 nfsmout_if(error);
6967 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
6968 if (ngroups > 1) {
6969 nfsrv_group_sort(&temp_pcred.cr_groups[0], ngroups);
6970 }
6971 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
6972 nfsm_chain_get_32(error, nmreq, len); // verifier length
6973 if (len < 0 || len > RPCAUTH_MAXSIZ) {
6974 error = EBADRPC;
6975 }
6976 if (len > 0) {
6977 nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
6978 }
6979
6980 /* request creation of a real credential */
6981 temp_pcred.cr_uid = user_id;
6982 temp_pcred.cr_ngroups = ngroups;
6983 nd->nd_cr = posix_cred_create(&temp_pcred);
6984 if (nd->nd_cr == NULL) {
6985 nd->nd_repstat = ENOMEM;
6986 nd->nd_procnum = NFSPROC_NOOP;
6987 return 0;
6988 }
6989 } else if (auth_type == RPCSEC_GSS) {
6990 error = nfs_gss_svc_cred_get(nd, nmreq);
6991 if (error) {
6992 if (error == EINVAL) {
6993 goto nfsmout; // drop the request
6994 }
6995 nd->nd_repstat = error;
6996 nd->nd_procnum = NFSPROC_NOOP;
6997 return 0;
6998 }
6999 } else {
7000 if (nd->nd_procnum == NFSPROC_NULL) { // assume it's AUTH_NONE
7001 return 0;
7002 }
7003 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
7004 nd->nd_procnum = NFSPROC_NOOP;
7005 return 0;
7006 }
7007 return 0;
7008 nfsmout:
7009 if (IS_VALID_CRED(nd->nd_cr)) {
7010 kauth_cred_unref(&nd->nd_cr);
7011 }
7012 nfsm_chain_cleanup(nmreq);
7013 return error;
7014 }
7015
7016 /*
7017 * Search for a sleeping nfsd and wake it up.
7018 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
7019 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
7020 * Note: Must be called with nfsd_mutex held.
7021 */
7022 void
7023 nfsrv_wakenfsd(struct nfsrv_sock *slp)
7024 {
7025 struct nfsd *nd;
7026
7027 if ((slp->ns_flag & SLP_VALID) == 0) {
7028 return;
7029 }
7030
7031 lck_rw_lock_exclusive(&slp->ns_rwlock);
7032 /* if there's work to do on this socket, make sure it's queued up */
7033 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
7034 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
7035 slp->ns_flag |= SLP_WAITQ;
7036 }
7037 lck_rw_done(&slp->ns_rwlock);
7038
7039 /* wake up a waiting nfsd, if possible */
7040 nd = TAILQ_FIRST(&nfsd_queue);
7041 if (!nd) {
7042 return;
7043 }
7044
7045 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
7046 nd->nfsd_flag &= ~NFSD_WAITING;
7047 wakeup(nd);
7048 }
7049
7050 #endif /* CONFIG_NFS_SERVER */
7051
7052 #endif /* CONFIG_NFS */