]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66 */
67
68 /*
69 * Socket operations for use by nfs
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/proc.h>
75 #include <sys/signalvar.h>
76 #include <sys/kauth.h>
77 #include <sys/mount_internal.h>
78 #include <sys/kernel.h>
79 #include <sys/kpi_mbuf.h>
80 #include <sys/malloc.h>
81 #include <sys/vnode.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/syslog.h>
86 #include <sys/tprintf.h>
87 #include <libkern/OSAtomic.h>
88
89 #include <sys/time.h>
90 #include <kern/clock.h>
91 #include <kern/task.h>
92 #include <kern/thread.h>
93 #include <kern/thread_call.h>
94 #include <sys/user.h>
95 #include <sys/acct.h>
96
97 #include <netinet/in.h>
98 #include <netinet/tcp.h>
99
100 #include <nfs/rpcv2.h>
101 #include <nfs/krpc.h>
102 #include <nfs/nfsproto.h>
103 #include <nfs/nfs.h>
104 #include <nfs/xdr_subs.h>
105 #include <nfs/nfsm_subs.h>
106 #include <nfs/nfs_gss.h>
107 #include <nfs/nfsmount.h>
108 #include <nfs/nfsnode.h>
109
110 #define NFS_SOCK_DBG(...) NFS_DBG(NFS_FAC_SOCK, 7, ## __VA_ARGS__)
111
112 /* XXX */
113 boolean_t current_thread_aborted(void);
114 kern_return_t thread_terminate(thread_t);
115
116
117 #if NFSSERVER
118 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
119
120 int nfsrv_getstream(struct nfsrv_sock *,int);
121 int nfsrv_getreq(struct nfsrv_descript *);
122 extern int nfsv3_procid[NFS_NPROCS];
123 #endif /* NFSSERVER */
124
125 /*
126 * compare two sockaddr structures
127 */
128 int
129 nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2)
130 {
131 if (!sa1)
132 return (-1);
133 if (!sa2)
134 return (1);
135 if (sa1->sa_family != sa2->sa_family)
136 return ((sa1->sa_family < sa2->sa_family) ? -1 : 1);
137 if (sa1->sa_len != sa2->sa_len)
138 return ((sa1->sa_len < sa2->sa_len) ? -1 : 1);
139 if (sa1->sa_family == AF_INET)
140 return (bcmp(&((struct sockaddr_in*)sa1)->sin_addr,
141 &((struct sockaddr_in*)sa2)->sin_addr, sizeof(((struct sockaddr_in*)sa1)->sin_addr)));
142 if (sa1->sa_family == AF_INET6)
143 return (bcmp(&((struct sockaddr_in6*)sa1)->sin6_addr,
144 &((struct sockaddr_in6*)sa2)->sin6_addr, sizeof(((struct sockaddr_in6*)sa1)->sin6_addr)));
145 return (-1);
146 }
147
148 #if NFSCLIENT
149
150 int nfs_connect_search_new_socket(struct nfsmount *, struct nfs_socket_search *, struct timeval *);
151 int nfs_connect_search_socket_connect(struct nfsmount *, struct nfs_socket *, int);
152 int nfs_connect_search_ping(struct nfsmount *, struct nfs_socket *, struct timeval *);
153 void nfs_connect_search_socket_found(struct nfsmount *, struct nfs_socket_search *, struct nfs_socket *);
154 void nfs_connect_search_socket_reap(struct nfsmount *, struct nfs_socket_search *, struct timeval *);
155 int nfs_connect_search_check(struct nfsmount *, struct nfs_socket_search *, struct timeval *);
156 int nfs_reconnect(struct nfsmount *);
157 int nfs_connect_setup(struct nfsmount *);
158 void nfs_mount_sock_thread(void *, wait_result_t);
159 void nfs_udp_rcv(socket_t, void*, int);
160 void nfs_tcp_rcv(socket_t, void*, int);
161 void nfs_sock_poke(struct nfsmount *);
162 void nfs_request_match_reply(struct nfsmount *, mbuf_t);
163 void nfs_reqdequeue(struct nfsreq *);
164 void nfs_reqbusy(struct nfsreq *);
165 struct nfsreq *nfs_reqnext(struct nfsreq *);
166 int nfs_wait_reply(struct nfsreq *);
167 void nfs_softterm(struct nfsreq *);
168 int nfs_can_squish(struct nfsmount *);
169 int nfs_is_squishy(struct nfsmount *);
170 int nfs_is_dead(int, struct nfsmount *);
171
172 /*
173 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
174 * Use the mean and mean deviation of rtt for the appropriate type of rpc
175 * for the frequent rpcs and a default for the others.
176 * The justification for doing "other" this way is that these rpcs
177 * happen so infrequently that timer est. would probably be stale.
178 * Also, since many of these rpcs are
179 * non-idempotent, a conservative timeout is desired.
180 * getattr, lookup - A+2D
181 * read, write - A+4D
182 * other - nm_timeo
183 */
184 #define NFS_RTO(n, t) \
185 ((t) == 0 ? (n)->nm_timeo : \
186 ((t) < 3 ? \
187 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
188 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
189 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
190 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
191
192 /*
193 * Defines which timer to use for the procnum.
194 * 0 - default
195 * 1 - getattr
196 * 2 - lookup
197 * 3 - read
198 * 4 - write
199 */
200 static int proct[NFS_NPROCS] = {
201 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
202 };
203
204 /*
205 * There is a congestion window for outstanding rpcs maintained per mount
206 * point. The cwnd size is adjusted in roughly the way that:
207 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
208 * SIGCOMM '88". ACM, August 1988.
209 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
210 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
211 * of rpcs is in progress.
212 * (The sent count and cwnd are scaled for integer arith.)
213 * Variants of "slow start" were tried and were found to be too much of a
214 * performance hit (ave. rtt 3 times larger),
215 * I suspect due to the large rtt that nfs rpcs have.
216 */
217 #define NFS_CWNDSCALE 256
218 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
219 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
220
221 /*
222 * Increment location index to next address/server/location.
223 */
224 void
225 nfs_location_next(struct nfs_fs_locations *nlp, struct nfs_location_index *nlip)
226 {
227 uint8_t loc = nlip->nli_loc;
228 uint8_t serv = nlip->nli_serv;
229 uint8_t addr = nlip->nli_addr;
230
231 /* move to next address */
232 addr++;
233 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) {
234 /* no more addresses on current server, go to first address of next server */
235 next_server:
236 addr = 0;
237 serv++;
238 if (serv >= nlp->nl_locations[loc]->nl_servcount) {
239 /* no more servers on current location, go to first server of next location */
240 serv = 0;
241 loc++;
242 if (loc >= nlp->nl_numlocs)
243 loc = 0; /* after last location, wrap back around to first location */
244 }
245 }
246 /*
247 * It's possible for this next server to not have any addresses.
248 * Check for that here and go to the next server.
249 * But bail out if we've managed to come back around to the original
250 * location that was passed in. (That would mean no servers had any
251 * addresses. And we don't want to spin here forever.)
252 */
253 if ((loc == nlip->nli_loc) && (serv == nlip->nli_serv) && (addr == nlip->nli_addr))
254 return;
255 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount)
256 goto next_server;
257
258 nlip->nli_loc = loc;
259 nlip->nli_serv = serv;
260 nlip->nli_addr = addr;
261 }
262
263 /*
264 * Compare two location indices.
265 */
266 int
267 nfs_location_index_cmp(struct nfs_location_index *nlip1, struct nfs_location_index *nlip2)
268 {
269 if (nlip1->nli_loc != nlip2->nli_loc)
270 return (nlip1->nli_loc - nlip2->nli_loc);
271 if (nlip1->nli_serv != nlip2->nli_serv)
272 return (nlip1->nli_serv - nlip2->nli_serv);
273 return (nlip1->nli_addr - nlip2->nli_addr);
274 }
275
276 /*
277 * Get the mntfromname (or path portion only) for a given location.
278 */
279 void
280 nfs_location_mntfromname(struct nfs_fs_locations *locs, struct nfs_location_index idx, char *s, int size, int pathonly)
281 {
282 struct nfs_fs_location *fsl = locs->nl_locations[idx.nli_loc];
283 char *p;
284 int cnt, i;
285
286 p = s;
287 if (!pathonly) {
288 cnt = snprintf(p, size, "%s:", fsl->nl_servers[idx.nli_serv]->ns_name);
289 p += cnt;
290 size -= cnt;
291 }
292 if (fsl->nl_path.np_compcount == 0) {
293 /* mounting root export on server */
294 if (size > 0) {
295 *p++ = '/';
296 *p++ = '\0';
297 }
298 return;
299 }
300 /* append each server path component */
301 for (i=0; (size > 0) && (i < (int)fsl->nl_path.np_compcount); i++) {
302 cnt = snprintf(p, size, "/%s", fsl->nl_path.np_components[i]);
303 p += cnt;
304 size -= cnt;
305 }
306 }
307
308 /*
309 * NFS client connect socket upcall.
310 * (Used only during socket connect/search.)
311 */
312 void
313 nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag)
314 {
315 struct nfs_socket *nso = arg;
316 size_t rcvlen;
317 mbuf_t m;
318 int error = 0, recv = 1;
319
320 if (nso->nso_flags & NSO_CONNECTING) {
321 NFS_SOCK_DBG("nfs connect - socket %p upcall - connecting\n", nso);
322 wakeup(nso->nso_wake);
323 return;
324 }
325
326 lck_mtx_lock(&nso->nso_lock);
327 if ((nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) || !(nso->nso_flags & NSO_PINGING)) {
328 NFS_SOCK_DBG("nfs connect - socket %p upcall - nevermind\n", nso);
329 lck_mtx_unlock(&nso->nso_lock);
330 return;
331 }
332 NFS_SOCK_DBG("nfs connect - socket %p upcall\n", nso);
333 nso->nso_flags |= NSO_UPCALL;
334
335 /* loop while we make error-free progress */
336 while (!error && recv) {
337 /* make sure we're still interested in this socket */
338 if (nso->nso_flags & (NSO_DISCONNECTING|NSO_DEAD))
339 break;
340 lck_mtx_unlock(&nso->nso_lock);
341 m = NULL;
342 if (nso->nso_sotype == SOCK_STREAM) {
343 error = nfs_rpc_record_read(so, &nso->nso_rrs, MSG_DONTWAIT, &recv, &m);
344 } else {
345 rcvlen = 1000000;
346 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
347 recv = m ? 1 : 0;
348 }
349 lck_mtx_lock(&nso->nso_lock);
350 if (m) {
351 /* match response with request */
352 struct nfsm_chain nmrep;
353 uint32_t reply = 0, rxid = 0, verf_type, verf_len;
354 uint32_t reply_status, rejected_status, accepted_status;
355
356 nfsm_chain_dissect_init(error, &nmrep, m);
357 nfsm_chain_get_32(error, &nmrep, rxid);
358 nfsm_chain_get_32(error, &nmrep, reply);
359 if (!error && ((reply != RPC_REPLY) || (rxid != nso->nso_pingxid)))
360 error = EBADRPC;
361 nfsm_chain_get_32(error, &nmrep, reply_status);
362 if (!error && (reply_status == RPC_MSGDENIED)) {
363 nfsm_chain_get_32(error, &nmrep, rejected_status);
364 if (!error)
365 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES;
366 }
367 nfsm_chain_get_32(error, &nmrep, verf_type); /* verifier flavor */
368 nfsm_chain_get_32(error, &nmrep, verf_len); /* verifier length */
369 nfsmout_if(error);
370 if (verf_len)
371 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
372 nfsm_chain_get_32(error, &nmrep, accepted_status);
373 nfsmout_if(error);
374 if ((accepted_status == RPC_PROGMISMATCH) && !nso->nso_version) {
375 uint32_t minvers, maxvers;
376 nfsm_chain_get_32(error, &nmrep, minvers);
377 nfsm_chain_get_32(error, &nmrep, maxvers);
378 nfsmout_if(error);
379 if (nso->nso_protocol == PMAPPROG) {
380 if ((minvers > RPCBVERS4) || (maxvers < PMAPVERS))
381 error = EPROGMISMATCH;
382 else if ((nso->nso_saddr->sa_family == AF_INET) &&
383 (PMAPVERS >= minvers) && (PMAPVERS <= maxvers))
384 nso->nso_version = PMAPVERS;
385 else if (nso->nso_saddr->sa_family == AF_INET6) {
386 if ((RPCBVERS4 >= minvers) && (RPCBVERS4 <= maxvers))
387 nso->nso_version = RPCBVERS4;
388 else if ((RPCBVERS3 >= minvers) && (RPCBVERS3 <= maxvers))
389 nso->nso_version = RPCBVERS3;
390 }
391 } else if (nso->nso_protocol == NFS_PROG) {
392 int vers;
393
394 /*
395 * N.B. Both portmapper and rpcbind V3 are happy to return
396 * addresses for other versions than the one you ask (getport or
397 * getaddr) and thus we may have fallen to this code path. So if
398 * we get a version that we support, use highest supported
399 * version. This assumes that the server supports all versions
400 * between minvers and maxvers. Note for IPv6 we will try and
401 * use rpcbind V4 which has getversaddr and we should not get
402 * here if that was successful.
403 */
404 for (vers = nso->nso_nfs_max_vers; vers >= (int)nso->nso_nfs_min_vers; vers--) {
405 if (vers >= (int)minvers && vers <= (int)maxvers)
406 break;
407 }
408 nso->nso_version = (vers < (int)nso->nso_nfs_min_vers) ? 0 : vers;
409 }
410 if (!error && nso->nso_version)
411 accepted_status = RPC_SUCCESS;
412 }
413 if (!error) {
414 switch (accepted_status) {
415 case RPC_SUCCESS:
416 error = 0;
417 break;
418 case RPC_PROGUNAVAIL:
419 error = EPROGUNAVAIL;
420 break;
421 case RPC_PROGMISMATCH:
422 error = EPROGMISMATCH;
423 break;
424 case RPC_PROCUNAVAIL:
425 error = EPROCUNAVAIL;
426 break;
427 case RPC_GARBAGE:
428 error = EBADRPC;
429 break;
430 case RPC_SYSTEM_ERR:
431 default:
432 error = EIO;
433 break;
434 }
435 }
436 nfsmout:
437 nso->nso_flags &= ~NSO_PINGING;
438 if (error) {
439 nso->nso_error = error;
440 nso->nso_flags |= NSO_DEAD;
441 } else {
442 nso->nso_flags |= NSO_VERIFIED;
443 }
444 mbuf_freem(m);
445 /* wake up search thread */
446 wakeup(nso->nso_wake);
447 break;
448 }
449 }
450
451 nso->nso_flags &= ~NSO_UPCALL;
452 if ((error != EWOULDBLOCK) && (error || !recv)) {
453 /* problems with the socket... */
454 nso->nso_error = error ? error : EPIPE;
455 nso->nso_flags |= NSO_DEAD;
456 wakeup(nso->nso_wake);
457 }
458 if (nso->nso_flags & NSO_DISCONNECTING)
459 wakeup(&nso->nso_flags);
460 lck_mtx_unlock(&nso->nso_lock);
461 }
462
463 /*
464 * Create/initialize an nfs_socket structure.
465 */
466 int
467 nfs_socket_create(
468 struct nfsmount *nmp,
469 struct sockaddr *sa,
470 int sotype,
471 in_port_t port,
472 uint32_t protocol,
473 uint32_t vers,
474 int resvport,
475 struct nfs_socket **nsop)
476 {
477 struct nfs_socket *nso;
478 struct timeval now;
479 int error;
480 #ifdef NFS_SOCKET_DEBUGGING
481 char naddr[MAX_IPv6_STR_LEN];
482 void *sinaddr;
483
484 if (sa->sa_family == AF_INET)
485 sinaddr = &((struct sockaddr_in*)sa)->sin_addr;
486 else
487 sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr;
488 if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr)
489 strlcpy(naddr, "<unknown>", sizeof(naddr));
490 #else
491 char naddr[1] = { 0 };
492 #endif
493
494 *nsop = NULL;
495
496 /* Create the socket. */
497 MALLOC(nso, struct nfs_socket *, sizeof(struct nfs_socket), M_TEMP, M_WAITOK|M_ZERO);
498 if (nso)
499 MALLOC(nso->nso_saddr, struct sockaddr *, sa->sa_len, M_SONAME, M_WAITOK|M_ZERO);
500 if (!nso || !nso->nso_saddr) {
501 if (nso)
502 FREE(nso, M_TEMP);
503 return (ENOMEM);
504 }
505 lck_mtx_init(&nso->nso_lock, nfs_request_grp, LCK_ATTR_NULL);
506 nso->nso_sotype = sotype;
507 if (nso->nso_sotype == SOCK_STREAM)
508 nfs_rpc_record_state_init(&nso->nso_rrs);
509 microuptime(&now);
510 nso->nso_timestamp = now.tv_sec;
511 bcopy(sa, nso->nso_saddr, sa->sa_len);
512 if (sa->sa_family == AF_INET)
513 ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port);
514 else if (sa->sa_family == AF_INET6)
515 ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port);
516 nso->nso_protocol = protocol;
517 nso->nso_version = vers;
518 nso->nso_nfs_min_vers = PVER2MAJOR(nmp->nm_min_vers);
519 nso->nso_nfs_max_vers = PVER2MAJOR(nmp->nm_max_vers);
520
521 error = sock_socket(sa->sa_family, nso->nso_sotype, 0, NULL, NULL, &nso->nso_so);
522
523 /* Some servers require that the client port be a reserved port number. */
524 if (!error && resvport && ((sa->sa_family == AF_INET) || (sa->sa_family == AF_INET6))) {
525 struct sockaddr_storage ss;
526 int level = (sa->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
527 int optname = (sa->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE;
528 int portrange = IP_PORTRANGE_LOW;
529
530 error = sock_setsockopt(nso->nso_so, level, optname, &portrange, sizeof(portrange));
531 if (!error) { /* bind now to check for failure */
532 ss.ss_len = sa->sa_len;
533 ss.ss_family = sa->sa_family;
534 if (ss.ss_family == AF_INET) {
535 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY;
536 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
537 } else if (ss.ss_family == AF_INET6) {
538 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any;
539 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
540 } else {
541 error = EINVAL;
542 }
543 if (!error)
544 error = sock_bind(nso->nso_so, (struct sockaddr*)&ss);
545 }
546 }
547
548 if (error) {
549 NFS_SOCK_DBG("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n",
550 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nso, naddr, sotype,
551 resvport ? "r" : "", port, protocol, vers);
552 nfs_socket_destroy(nso);
553 } else {
554 NFS_SOCK_DBG("nfs connect %s created socket %p %s type %d%s port %d prot %d %d\n",
555 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, naddr,
556 sotype, resvport ? "r" : "", port, protocol, vers);
557 *nsop = nso;
558 }
559 return (error);
560 }
561
562 /*
563 * Destroy an nfs_socket structure.
564 */
565 void
566 nfs_socket_destroy(struct nfs_socket *nso)
567 {
568 struct timespec ts = { 4, 0 };
569
570 lck_mtx_lock(&nso->nso_lock);
571 nso->nso_flags |= NSO_DISCONNECTING;
572 if (nso->nso_flags & NSO_UPCALL) /* give upcall a chance to complete */
573 msleep(&nso->nso_flags, &nso->nso_lock, PZERO-1, "nfswaitupcall", &ts);
574 lck_mtx_unlock(&nso->nso_lock);
575 sock_shutdown(nso->nso_so, SHUT_RDWR);
576 sock_close(nso->nso_so);
577 if (nso->nso_sotype == SOCK_STREAM)
578 nfs_rpc_record_state_cleanup(&nso->nso_rrs);
579 lck_mtx_destroy(&nso->nso_lock, nfs_request_grp);
580 if (nso->nso_saddr)
581 FREE(nso->nso_saddr, M_SONAME);
582 if (nso->nso_saddr2)
583 FREE(nso->nso_saddr2, M_SONAME);
584 NFS_SOCK_DBG("nfs connect - socket %p destroyed\n", nso);
585 FREE(nso, M_TEMP);
586 }
587
588 /*
589 * Set common socket options on an nfs_socket.
590 */
591 void
592 nfs_socket_options(struct nfsmount *nmp, struct nfs_socket *nso)
593 {
594 /*
595 * Set socket send/receive timeouts
596 * - Receive timeout shouldn't matter because most receives are performed
597 * in the socket upcall non-blocking.
598 * - Send timeout should allow us to react to a blocked socket.
599 * Soft mounts will want to abort sooner.
600 */
601 struct timeval timeo;
602 int on = 1, proto;
603
604 timeo.tv_usec = 0;
605 timeo.tv_sec = (NMFLAG(nmp, SOFT) || nfs_can_squish(nmp)) ? 5 : 60;
606 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
607 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
608 if (nso->nso_sotype == SOCK_STREAM) {
609 /* Assume that SOCK_STREAM always requires a connection */
610 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
611 /* set nodelay for TCP */
612 sock_gettype(nso->nso_so, NULL, NULL, &proto);
613 if (proto == IPPROTO_TCP)
614 sock_setsockopt(nso->nso_so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
615 }
616 if (nso->nso_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
617 int reserve = NFS_UDPSOCKBUF;
618 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
619 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
620 }
621 /* set SO_NOADDRERR to detect network changes ASAP */
622 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
623 /* just playin' it safe with upcalls */
624 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
625 /* socket should be interruptible if the mount is */
626 if (!NMFLAG(nmp, INTR))
627 sock_nointerrupt(nso->nso_so, 1);
628 }
629
630 /*
631 * Release resources held in an nfs_socket_search.
632 */
633 void
634 nfs_socket_search_cleanup(struct nfs_socket_search *nss)
635 {
636 struct nfs_socket *nso, *nsonext;
637
638 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) {
639 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
640 nss->nss_sockcnt--;
641 nfs_socket_destroy(nso);
642 }
643 if (nss->nss_sock) {
644 nfs_socket_destroy(nss->nss_sock);
645 nss->nss_sock = NULL;
646 }
647 }
648
649 /*
650 * Prefer returning certain errors over others.
651 * This function returns a ranking of the given error.
652 */
653 int
654 nfs_connect_error_class(int error)
655 {
656 switch (error) {
657 case 0:
658 return (0);
659 case ETIMEDOUT:
660 case EAGAIN:
661 return (1);
662 case EPIPE:
663 case EADDRNOTAVAIL:
664 case ENETDOWN:
665 case ENETUNREACH:
666 case ENETRESET:
667 case ECONNABORTED:
668 case ECONNRESET:
669 case EISCONN:
670 case ENOTCONN:
671 case ESHUTDOWN:
672 case ECONNREFUSED:
673 case EHOSTDOWN:
674 case EHOSTUNREACH:
675 return (2);
676 case ERPCMISMATCH:
677 case EPROCUNAVAIL:
678 case EPROGMISMATCH:
679 case EPROGUNAVAIL:
680 return (3);
681 case EBADRPC:
682 return (4);
683 default:
684 return (5);
685 }
686 }
687
688 /*
689 * Make sure a socket search returns the best error.
690 */
691 void
692 nfs_socket_search_update_error(struct nfs_socket_search *nss, int error)
693 {
694 if (nfs_connect_error_class(error) >= nfs_connect_error_class(nss->nss_error))
695 nss->nss_error = error;
696 }
697
698 /* nfs_connect_search_new_socket:
699 * Given a socket search structure for an nfs mount try to find a new socket from the set of addresses specified
700 * by nss.
701 *
702 * nss_last is set to -1 at initialization to indicate the first time. Its set to -2 if address was found but
703 * could not be used or if a socket timed out.
704 */
705 int
706 nfs_connect_search_new_socket(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now)
707 {
708 struct nfs_fs_location *fsl;
709 struct nfs_fs_server *fss;
710 struct sockaddr_storage ss;
711 struct nfs_socket *nso;
712 char *addrstr;
713 int error = 0;
714
715
716 NFS_SOCK_DBG("nfs connect %s nss_addrcnt = %d\n",
717 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss->nss_addrcnt);
718
719 /*
720 * while there are addresses and:
721 * we have no sockets or
722 * the last address failed and did not produce a socket (nss_last < 0) or
723 * Its been a while (2 seconds) and we have less than the max number of concurrent sockets to search (4)
724 * then attempt to create a socket with the current address.
725 */
726 while (nss->nss_addrcnt > 0 && ((nss->nss_last < 0) || (nss->nss_sockcnt == 0) ||
727 ((nss->nss_sockcnt < 4) && (now->tv_sec >= (nss->nss_last + 2))))) {
728 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
729 return (EINTR);
730 /* Can we convert the address to a sockaddr? */
731 fsl = nmp->nm_locations.nl_locations[nss->nss_nextloc.nli_loc];
732 fss = fsl->nl_servers[nss->nss_nextloc.nli_serv];
733 addrstr = fss->ns_addresses[nss->nss_nextloc.nli_addr];
734 if (!nfs_uaddr2sockaddr(addrstr, (struct sockaddr*)&ss)) {
735 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
736 nss->nss_addrcnt -= 1;
737 nss->nss_last = -2;
738 continue;
739 }
740 /* Check that socket family is acceptable. */
741 if (nmp->nm_sofamily && (ss.ss_family != nmp->nm_sofamily)) {
742 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
743 nss->nss_addrcnt -= 1;
744 nss->nss_last = -2;
745 continue;
746 }
747
748 /* Create the socket. */
749 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nss->nss_sotype,
750 nss->nss_port, nss->nss_protocol, nss->nss_version,
751 ((nss->nss_protocol == NFS_PROG) && NMFLAG(nmp, RESVPORT)), &nso);
752 if (error)
753 return (error);
754
755 nso->nso_location = nss->nss_nextloc;
756 nso->nso_wake = nss;
757 error = sock_setupcall(nso->nso_so, nfs_connect_upcall, nso);
758 if (error) {
759 lck_mtx_lock(&nso->nso_lock);
760 nso->nso_error = error;
761 nso->nso_flags |= NSO_DEAD;
762 lck_mtx_unlock(&nso->nso_lock);
763 }
764
765 TAILQ_INSERT_TAIL(&nss->nss_socklist, nso, nso_link);
766 nss->nss_sockcnt++;
767 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
768 nss->nss_addrcnt -= 1;
769
770 nss->nss_last = now->tv_sec;
771 }
772
773 if (nss->nss_addrcnt == 0 && nss->nss_last < 0)
774 nss->nss_last = now->tv_sec;
775
776 return (error);
777 }
778
779 /*
780 * nfs_connect_search_socket_connect: Connect an nfs socket nso for nfsmount nmp.
781 * If successful set the socket options for the socket as require from the mount.
782 *
783 * Assumes: nso->nso_lock is held on entry and return.
784 */
785 int
786 nfs_connect_search_socket_connect(struct nfsmount *nmp, struct nfs_socket *nso, int verbose)
787 {
788 int error;
789
790 if ((nso->nso_sotype != SOCK_STREAM) && NMFLAG(nmp, NOCONNECT)) {
791 /* no connection needed, just say it's already connected */
792 NFS_SOCK_DBG("nfs connect %s UDP socket %p noconnect\n",
793 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
794 nso->nso_flags |= NSO_CONNECTED;
795 nfs_socket_options(nmp, nso);
796 return (1); /* Socket is connected and setup */
797 } else if (!(nso->nso_flags & NSO_CONNECTING)) {
798 /* initiate the connection */
799 nso->nso_flags |= NSO_CONNECTING;
800 lck_mtx_unlock(&nso->nso_lock);
801 NFS_SOCK_DBG("nfs connect %s connecting socket %p\n",
802 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
803 error = sock_connect(nso->nso_so, nso->nso_saddr, MSG_DONTWAIT);
804 lck_mtx_lock(&nso->nso_lock);
805 if (error && (error != EINPROGRESS)) {
806 nso->nso_error = error;
807 nso->nso_flags |= NSO_DEAD;
808 return (0);
809 }
810 }
811 if (nso->nso_flags & NSO_CONNECTING) {
812 /* check the connection */
813 if (sock_isconnected(nso->nso_so)) {
814 NFS_SOCK_DBG("nfs connect %s socket %p is connected\n",
815 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
816 nso->nso_flags &= ~NSO_CONNECTING;
817 nso->nso_flags |= NSO_CONNECTED;
818 nfs_socket_options(nmp, nso);
819 return (1); /* Socket is connected and setup */
820 } else {
821 int optlen = sizeof(error);
822 error = 0;
823 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &error, &optlen);
824 if (error) { /* we got an error on the socket */
825 NFS_SOCK_DBG("nfs connect %s socket %p connection error %d\n",
826 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error);
827 if (verbose)
828 printf("nfs connect socket error %d for %s\n",
829 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
830 nso->nso_error = error;
831 nso->nso_flags |= NSO_DEAD;
832 return (0);
833 }
834 }
835 }
836
837 return (0); /* Waiting to be connected */
838 }
839
840 /*
841 * nfs_connect_search_ping: Send a null proc on the nso socket.
842 */
843 int
844 nfs_connect_search_ping(struct nfsmount *nmp, struct nfs_socket *nso, struct timeval *now)
845 {
846 /* initiate a NULL RPC request */
847 uint64_t xid = nso->nso_pingxid;
848 mbuf_t m, mreq = NULL;
849 struct msghdr msg;
850 size_t reqlen, sentlen;
851 uint32_t vers = nso->nso_version;
852 int error;
853
854 if (!vers) {
855 if (nso->nso_protocol == PMAPPROG)
856 vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
857 else if (nso->nso_protocol == NFS_PROG)
858 vers = PVER2MAJOR(nmp->nm_max_vers);
859 }
860 lck_mtx_unlock(&nso->nso_lock);
861 error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS,
862 vfs_context_ucred(vfs_context_kernel()), NULL, NULL, &xid, &mreq);
863 lck_mtx_lock(&nso->nso_lock);
864 if (!error) {
865 nso->nso_flags |= NSO_PINGING;
866 nso->nso_pingxid = R_XID32(xid);
867 nso->nso_reqtimestamp = now->tv_sec;
868 bzero(&msg, sizeof(msg));
869 if ((nso->nso_sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so)) {
870 msg.msg_name = nso->nso_saddr;
871 msg.msg_namelen = nso->nso_saddr->sa_len;
872 }
873 for (reqlen=0, m=mreq; m; m = mbuf_next(m))
874 reqlen += mbuf_len(m);
875 lck_mtx_unlock(&nso->nso_lock);
876 error = sock_sendmbuf(nso->nso_so, &msg, mreq, 0, &sentlen);
877 NFS_SOCK_DBG("nfs connect %s verifying socket %p send rv %d\n",
878 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error);
879 lck_mtx_lock(&nso->nso_lock);
880 if (!error && (sentlen != reqlen))
881 error = ETIMEDOUT;
882 }
883 if (error) {
884 nso->nso_error = error;
885 nso->nso_flags |= NSO_DEAD;
886 return (0);
887 }
888
889 return (1);
890 }
891
892 /*
893 * nfs_connect_search_socket_found: Take the found socket of the socket search list and assign it to the searched socket.
894 * Set the nfs socket protocol and version if needed.
895 */
896 void
897 nfs_connect_search_socket_found(struct nfsmount *nmp, struct nfs_socket_search *nss, struct nfs_socket *nso)
898 {
899 NFS_SOCK_DBG("nfs connect %s socket %p verified\n",
900 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
901 if (!nso->nso_version) {
902 /* If the version isn't set, the default must have worked. */
903 if (nso->nso_protocol == PMAPPROG)
904 nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
905 if (nso->nso_protocol == NFS_PROG)
906 nso->nso_version = PVER2MAJOR(nmp->nm_max_vers);
907 }
908 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
909 nss->nss_sockcnt--;
910 nss->nss_sock = nso;
911 }
912
913 /*
914 * nfs_connect_search_socket_reap: For each socket in the search list mark any timed out socket as dead and remove from
915 * the list. Dead socket are then destroyed.
916 */
917 void
918 nfs_connect_search_socket_reap(struct nfsmount *nmp __unused, struct nfs_socket_search *nss, struct timeval *now)
919 {
920 struct nfs_socket *nso, *nsonext;
921
922 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) {
923 lck_mtx_lock(&nso->nso_lock);
924 if (now->tv_sec >= (nso->nso_timestamp + nss->nss_timeo)) {
925 /* took too long */
926 NFS_SOCK_DBG("nfs connect %s socket %p timed out\n",
927 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
928 nso->nso_error = ETIMEDOUT;
929 nso->nso_flags |= NSO_DEAD;
930 }
931 if (!(nso->nso_flags & NSO_DEAD)) {
932 lck_mtx_unlock(&nso->nso_lock);
933 continue;
934 }
935 lck_mtx_unlock(&nso->nso_lock);
936 NFS_SOCK_DBG("nfs connect %s reaping socket %p %d\n",
937 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, nso->nso_error);
938 nfs_socket_search_update_error(nss, nso->nso_error);
939 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
940 nss->nss_sockcnt--;
941 nfs_socket_destroy(nso);
942 /* If there are more sockets to try, force the starting of another socket */
943 if (nss->nss_addrcnt > 0)
944 nss->nss_last = -2;
945 }
946 }
947
948 /*
949 * nfs_connect_search_check: Check on the status of search and wait for replies if needed.
950 */
951 int
952 nfs_connect_search_check(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now)
953 {
954 int error;
955
956 /* log a warning if connect is taking a while */
957 if (((now->tv_sec - nss->nss_timestamp) >= 8) && ((nss->nss_flags & (NSS_VERBOSE|NSS_WARNED)) == NSS_VERBOSE)) {
958 printf("nfs_connect: socket connect taking a while for %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
959 nss->nss_flags |= NSS_WARNED;
960 }
961 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
962 return (EINTR);
963 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0)))
964 return (error);
965
966 /* If we were succesfull at sending a ping, wait up to a second for a reply */
967 if (nss->nss_last >= 0)
968 tsleep(nss, PSOCK, "nfs_connect_search_wait", hz);
969
970 return (0);
971 }
972
973
974 /*
975 * Continue the socket search until we have something to report.
976 */
977 int
978 nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss)
979 {
980 struct nfs_socket *nso;
981 struct timeval now;
982 int error;
983 int verbose = (nss->nss_flags & NSS_VERBOSE);
984
985 loop:
986 microuptime(&now);
987 NFS_SOCK_DBG("nfs connect %s search %ld\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, now.tv_sec);
988
989 /* add a new socket to the socket list if needed and available */
990 error = nfs_connect_search_new_socket(nmp, nss, &now);
991 if (error) {
992 NFS_SOCK_DBG("nfs connect returned %d\n", error);
993 return (error);
994 }
995
996 /* check each active socket on the list and try to push it along */
997 TAILQ_FOREACH(nso, &nss->nss_socklist, nso_link) {
998 lck_mtx_lock(&nso->nso_lock);
999
1000 /* If not connected connect it */
1001 if (!(nso->nso_flags & NSO_CONNECTED)) {
1002 if (!nfs_connect_search_socket_connect(nmp, nso, verbose)) {
1003 lck_mtx_unlock(&nso->nso_lock);
1004 continue;
1005 }
1006 }
1007
1008 /* If the socket hasn't been verified or in a ping, ping it. We also handle UDP retransmits */
1009 if (!(nso->nso_flags & (NSO_PINGING|NSO_VERIFIED)) ||
1010 ((nso->nso_sotype == SOCK_DGRAM) && (now.tv_sec >= nso->nso_reqtimestamp+2))) {
1011 if (!nfs_connect_search_ping(nmp, nso, &now)) {
1012 lck_mtx_unlock(&nso->nso_lock);
1013 continue;
1014 }
1015 }
1016
1017 /* Has the socket been verified by the up call routine? */
1018 if (nso->nso_flags & NSO_VERIFIED) {
1019 /* WOOHOO!! This socket looks good! */
1020 nfs_connect_search_socket_found(nmp, nss, nso);
1021 lck_mtx_unlock(&nso->nso_lock);
1022 break;
1023 }
1024 lck_mtx_unlock(&nso->nso_lock);
1025 }
1026
1027 /* Check for timed out sockets and mark as dead and then remove all dead sockets. */
1028 nfs_connect_search_socket_reap(nmp, nss, &now);
1029
1030 /*
1031 * Keep looping if we haven't found a socket yet and we have more
1032 * sockets to (continue to) try.
1033 */
1034 error = 0;
1035 if (!nss->nss_sock && (!TAILQ_EMPTY(&nss->nss_socklist) || nss->nss_addrcnt)) {
1036 error = nfs_connect_search_check(nmp, nss, &now);
1037 if (!error)
1038 goto loop;
1039 }
1040
1041 NFS_SOCK_DBG("nfs connect %s returning %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, error);
1042 return (error);
1043 }
1044
1045 /*
1046 * Initialize a new NFS connection.
1047 *
1048 * Search for a location to connect a socket to and initialize the connection.
1049 *
1050 * An NFS mount may have multiple locations/servers/addresses available.
1051 * We attempt to connect to each one asynchronously and will start
1052 * several sockets in parallel if other locations are slow to answer.
1053 * We'll use the first NFS socket we can successfully set up.
1054 *
1055 * The search may involve contacting the portmapper service first.
1056 *
1057 * A mount's initial connection may require negotiating some parameters such
1058 * as socket type and NFS version.
1059 */
1060
1061 int
1062 nfs_connect(struct nfsmount *nmp, int verbose, int timeo)
1063 {
1064 struct nfs_socket_search nss;
1065 struct nfs_socket *nso, *nsonfs;
1066 struct sockaddr_storage ss;
1067 struct sockaddr *saddr, *oldsaddr;
1068 sock_upcall upcall;
1069 struct timeval now, start;
1070 int error, savederror, nfsvers;
1071 int tryv4 = 1;
1072 uint8_t sotype = nmp->nm_sotype ? nmp->nm_sotype : SOCK_STREAM;
1073 fhandle_t *fh = NULL;
1074 char *path = NULL;
1075 in_port_t port;
1076 int addrtotal = 0;
1077
1078 /* paranoia... check that we have at least one address in the locations */
1079 uint32_t loc, serv;
1080 for (loc=0; loc < nmp->nm_locations.nl_numlocs; loc++) {
1081 for (serv=0; serv < nmp->nm_locations.nl_locations[loc]->nl_servcount; serv++) {
1082 addrtotal += nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount;
1083 if (nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount == 0)
1084 NFS_SOCK_DBG("nfs connect %s search, server %s has no addresses\n",
1085 vfs_statfs(nmp->nm_mountp)->f_mntfromname,
1086 nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name);
1087 }
1088 }
1089
1090 if (addrtotal == 0) {
1091 NFS_SOCK_DBG("nfs connect %s search failed, no addresses\n",
1092 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1093 return (EINVAL);
1094 } else
1095 NFS_SOCK_DBG("nfs connect %s has %d addresses\n",
1096 vfs_statfs(nmp->nm_mountp)->f_mntfromname, addrtotal);
1097
1098 lck_mtx_lock(&nmp->nm_lock);
1099 nmp->nm_sockflags |= NMSOCK_CONNECTING;
1100 nmp->nm_nss = &nss;
1101 lck_mtx_unlock(&nmp->nm_lock);
1102 microuptime(&start);
1103 savederror = error = 0;
1104
1105 tryagain:
1106 /* initialize socket search state */
1107 bzero(&nss, sizeof(nss));
1108 nss.nss_addrcnt = addrtotal;
1109 nss.nss_error = savederror;
1110 TAILQ_INIT(&nss.nss_socklist);
1111 nss.nss_sotype = sotype;
1112 nss.nss_startloc = nmp->nm_locations.nl_current;
1113 nss.nss_timestamp = start.tv_sec;
1114 nss.nss_timeo = timeo;
1115 if (verbose)
1116 nss.nss_flags |= NSS_VERBOSE;
1117
1118 /* First time connecting, we may need to negotiate some things */
1119 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1120 if (!nmp->nm_vers) {
1121 /* No NFS version specified... */
1122 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
1123 if (PVER2MAJOR(nmp->nm_max_vers) >= NFS_VER4 && tryv4) {
1124 nss.nss_port = NFS_PORT;
1125 nss.nss_protocol = NFS_PROG;
1126 nss.nss_version = 4;
1127 nss.nss_flags |= NSS_FALLBACK2PMAP;
1128 } else {
1129 /* ...connect to portmapper first if we (may) need any ports. */
1130 nss.nss_port = PMAPPORT;
1131 nss.nss_protocol = PMAPPROG;
1132 nss.nss_version = 0;
1133 }
1134 } else {
1135 /* ...connect to NFS port first. */
1136 nss.nss_port = nmp->nm_nfsport;
1137 nss.nss_protocol = NFS_PROG;
1138 nss.nss_version = 0;
1139 }
1140 } else if (nmp->nm_vers >= NFS_VER4) {
1141 if (tryv4) {
1142 /* For NFSv4, we use the given (or default) port. */
1143 nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT;
1144 nss.nss_protocol = NFS_PROG;
1145 nss.nss_version = 4;
1146 /*
1147 * set NSS_FALLBACK2PMAP here to pick up any non standard port
1148 * if no port is specified on the mount;
1149 * Note nm_vers is set so we will only try NFS_VER4.
1150 */
1151 if (!nmp->nm_nfsport)
1152 nss.nss_flags |= NSS_FALLBACK2PMAP;
1153 } else {
1154 nss.nss_port = PMAPPORT;
1155 nss.nss_protocol = PMAPPROG;
1156 nss.nss_version = 0;
1157 }
1158 } else {
1159 /* For NFSv3/v2... */
1160 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
1161 /* ...connect to portmapper first if we need any ports. */
1162 nss.nss_port = PMAPPORT;
1163 nss.nss_protocol = PMAPPROG;
1164 nss.nss_version = 0;
1165 } else {
1166 /* ...connect to NFS port first. */
1167 nss.nss_port = nmp->nm_nfsport;
1168 nss.nss_protocol = NFS_PROG;
1169 nss.nss_version = nmp->nm_vers;
1170 }
1171 }
1172 NFS_SOCK_DBG("nfs connect first %s, so type %d port %d prot %d %d\n",
1173 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port,
1174 nss.nss_protocol, nss.nss_version);
1175 } else {
1176 /* we've connected before, just connect to NFS port */
1177 if (!nmp->nm_nfsport) {
1178 /* need to ask portmapper which port that would be */
1179 nss.nss_port = PMAPPORT;
1180 nss.nss_protocol = PMAPPROG;
1181 nss.nss_version = 0;
1182 } else {
1183 nss.nss_port = nmp->nm_nfsport;
1184 nss.nss_protocol = NFS_PROG;
1185 nss.nss_version = nmp->nm_vers;
1186 }
1187 NFS_SOCK_DBG("nfs connect %s, so type %d port %d prot %d %d\n",
1188 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port,
1189 nss.nss_protocol, nss.nss_version);
1190 }
1191
1192 /* Set next location to first valid location. */
1193 /* If start location is invalid, find next location. */
1194 nss.nss_nextloc = nss.nss_startloc;
1195 if ((nss.nss_nextloc.nli_serv >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servcount) ||
1196 (nss.nss_nextloc.nli_addr >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servers[nss.nss_nextloc.nli_serv]->ns_addrcount)) {
1197 nfs_location_next(&nmp->nm_locations, &nss.nss_nextloc);
1198 if (!nfs_location_index_cmp(&nss.nss_nextloc, &nss.nss_startloc)) {
1199 NFS_SOCK_DBG("nfs connect %s search failed, couldn't find a valid location index\n",
1200 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1201 return (ENOENT);
1202 }
1203 }
1204 nss.nss_last = -1;
1205
1206 keepsearching:
1207
1208 error = nfs_connect_search_loop(nmp, &nss);
1209 if (error || !nss.nss_sock) {
1210 /* search failed */
1211 nfs_socket_search_cleanup(&nss);
1212 if (nss.nss_flags & NSS_FALLBACK2PMAP) {
1213 tryv4 = 0;
1214 NFS_SOCK_DBG("nfs connect %s TCP failed for V4 %d %d, trying PORTMAP\n",
1215 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error);
1216 goto tryagain;
1217 }
1218
1219 if (!error && (nss.nss_sotype == SOCK_STREAM) && !nmp->nm_sotype && (nmp->nm_vers < NFS_VER4)) {
1220 /* Try using UDP */
1221 sotype = SOCK_DGRAM;
1222 savederror = nss.nss_error;
1223 NFS_SOCK_DBG("nfs connect %s TCP failed %d %d, trying UDP\n",
1224 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error);
1225 goto tryagain;
1226 }
1227 if (!error)
1228 error = nss.nss_error ? nss.nss_error : ETIMEDOUT;
1229 lck_mtx_lock(&nmp->nm_lock);
1230 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
1231 nmp->nm_nss = NULL;
1232 lck_mtx_unlock(&nmp->nm_lock);
1233 if (nss.nss_flags & NSS_WARNED)
1234 log(LOG_INFO, "nfs_connect: socket connect aborted for %s\n",
1235 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1236 if (fh)
1237 FREE(fh, M_TEMP);
1238 if (path)
1239 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1240 NFS_SOCK_DBG("nfs connect %s search failed, returning %d\n",
1241 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error);
1242 return (error);
1243 }
1244
1245 /* try to use nss_sock */
1246 nso = nss.nss_sock;
1247 nss.nss_sock = NULL;
1248
1249 /* We may be speaking to portmap first... to determine port(s). */
1250 if (nso->nso_saddr->sa_family == AF_INET)
1251 port = ntohs(((struct sockaddr_in*)nso->nso_saddr)->sin_port);
1252 else
1253 port = ntohs(((struct sockaddr_in6*)nso->nso_saddr)->sin6_port);
1254 if (port == PMAPPORT) {
1255 /* Use this portmapper port to get the port #s we need. */
1256 NFS_SOCK_DBG("nfs connect %s got portmapper socket %p\n",
1257 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
1258
1259 /* remove the connect upcall so nfs_portmap_lookup() can use this socket */
1260 sock_setupcall(nso->nso_so, NULL, NULL);
1261
1262 /* Set up socket address and port for NFS socket. */
1263 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1264
1265 /* If NFS version not set, try nm_max_vers down to nm_min_vers */
1266 nfsvers = nmp->nm_vers ? nmp->nm_vers : PVER2MAJOR(nmp->nm_max_vers);
1267 if (!(port = nmp->nm_nfsport)) {
1268 if (ss.ss_family == AF_INET)
1269 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
1270 else if (ss.ss_family == AF_INET6)
1271 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
1272 for (; nfsvers >= (int)PVER2MAJOR(nmp->nm_min_vers); nfsvers--) {
1273 if (nmp->nm_vers && nmp->nm_vers != nfsvers)
1274 continue; /* Wrong version */
1275 if (nfsvers == NFS_VER4 && nso->nso_sotype == SOCK_DGRAM)
1276 continue; /* NFSv4 does not do UDP */
1277 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1278 nso->nso_so, NFS_PROG, nfsvers,
1279 (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
1280 if (!error) {
1281 if (ss.ss_family == AF_INET)
1282 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1283 else if (ss.ss_family == AF_INET6)
1284 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1285 if (!port)
1286 error = EPROGUNAVAIL;
1287 if (port == NFS_PORT && nfsvers == NFS_VER4 && tryv4 == 0)
1288 continue; /* We already tried this */
1289 }
1290 if (!error)
1291 break;
1292 }
1293 if (nfsvers < (int)PVER2MAJOR(nmp->nm_min_vers) && error == 0)
1294 error = EPROGUNAVAIL;
1295 if (error) {
1296 nfs_socket_search_update_error(&nss, error);
1297 nfs_socket_destroy(nso);
1298 goto keepsearching;
1299 }
1300 }
1301 /* Create NFS protocol socket and add it to the list of sockets. */
1302 /* N.B. If nfsvers is NFS_VER4 at this point then we're on a non standard port */
1303 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nso->nso_sotype, port,
1304 NFS_PROG, nfsvers, NMFLAG(nmp, RESVPORT), &nsonfs);
1305 if (error) {
1306 nfs_socket_search_update_error(&nss, error);
1307 nfs_socket_destroy(nso);
1308 goto keepsearching;
1309 }
1310 nsonfs->nso_location = nso->nso_location;
1311 nsonfs->nso_wake = &nss;
1312 error = sock_setupcall(nsonfs->nso_so, nfs_connect_upcall, nsonfs);
1313 if (error) {
1314 nfs_socket_search_update_error(&nss, error);
1315 nfs_socket_destroy(nsonfs);
1316 nfs_socket_destroy(nso);
1317 goto keepsearching;
1318 }
1319 TAILQ_INSERT_TAIL(&nss.nss_socklist, nsonfs, nso_link);
1320 nss.nss_sockcnt++;
1321 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) {
1322 /* Set up socket address and port for MOUNT socket. */
1323 error = 0;
1324 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1325 port = nmp->nm_mountport;
1326 if (ss.ss_family == AF_INET)
1327 ((struct sockaddr_in*)&ss)->sin_port = htons(port);
1328 else if (ss.ss_family == AF_INET6)
1329 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port);
1330 if (!port) {
1331 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */
1332 /* If NFS version is unknown, optimistically choose for NFSv3. */
1333 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
1334 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
1335 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1336 nso->nso_so, RPCPROG_MNT, mntvers, mntproto, timeo);
1337 }
1338 if (!error) {
1339 if (ss.ss_family == AF_INET)
1340 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1341 else if (ss.ss_family == AF_INET6)
1342 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1343 if (!port)
1344 error = EPROGUNAVAIL;
1345 }
1346 /* create sockaddr for MOUNT */
1347 if (!error)
1348 MALLOC(nsonfs->nso_saddr2, struct sockaddr *, ss.ss_len, M_SONAME, M_WAITOK|M_ZERO);
1349 if (!error && !nsonfs->nso_saddr2)
1350 error = ENOMEM;
1351 if (!error)
1352 bcopy(&ss, nsonfs->nso_saddr2, ss.ss_len);
1353 if (error) {
1354 lck_mtx_lock(&nsonfs->nso_lock);
1355 nsonfs->nso_error = error;
1356 nsonfs->nso_flags |= NSO_DEAD;
1357 lck_mtx_unlock(&nsonfs->nso_lock);
1358 }
1359 }
1360 nfs_socket_destroy(nso);
1361 goto keepsearching;
1362 }
1363
1364 /* nso is an NFS socket */
1365 NFS_SOCK_DBG("nfs connect %s got NFS socket %p\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
1366
1367 /* If NFS version wasn't specified, it was determined during the connect. */
1368 nfsvers = nmp->nm_vers ? nmp->nm_vers : (int)nso->nso_version;
1369
1370 /* Perform MOUNT call for initial NFSv2/v3 connection/mount. */
1371 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) {
1372 error = 0;
1373 saddr = nso->nso_saddr2;
1374 if (!saddr) {
1375 /* Need sockaddr for MOUNT port */
1376 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1377 port = nmp->nm_mountport;
1378 if (ss.ss_family == AF_INET)
1379 ((struct sockaddr_in*)&ss)->sin_port = htons(port);
1380 else if (ss.ss_family == AF_INET6)
1381 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port);
1382 if (!port) {
1383 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */
1384 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
1385 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
1386 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1387 NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
1388 if (ss.ss_family == AF_INET)
1389 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1390 else if (ss.ss_family == AF_INET6)
1391 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1392 }
1393 if (!error) {
1394 if (port)
1395 saddr = (struct sockaddr*)&ss;
1396 else
1397 error = EPROGUNAVAIL;
1398 }
1399 }
1400 if (saddr)
1401 MALLOC(fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO);
1402 if (saddr && fh)
1403 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1404 if (!saddr || !fh || !path) {
1405 if (!error)
1406 error = ENOMEM;
1407 if (fh)
1408 FREE(fh, M_TEMP);
1409 if (path)
1410 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1411 fh = NULL;
1412 path = NULL;
1413 nfs_socket_search_update_error(&nss, error);
1414 nfs_socket_destroy(nso);
1415 goto keepsearching;
1416 }
1417 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, path, MAXPATHLEN, 1);
1418 error = nfs3_mount_rpc(nmp, saddr, nso->nso_sotype, nfsvers,
1419 path, vfs_context_current(), timeo, fh, &nmp->nm_servsec);
1420 NFS_SOCK_DBG("nfs connect %s socket %p mount %d\n",
1421 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error);
1422 if (!error) {
1423 /* Make sure we can agree on a security flavor. */
1424 int o, s; /* indices into mount option and server security flavor lists */
1425 int found = 0;
1426
1427 if ((nfsvers == NFS_VER3) && !nmp->nm_servsec.count) {
1428 /* Some servers return an empty list to indicate RPCAUTH_SYS? */
1429 nmp->nm_servsec.count = 1;
1430 nmp->nm_servsec.flavors[0] = RPCAUTH_SYS;
1431 }
1432 if (nmp->nm_sec.count) {
1433 /* Choose the first flavor in our list that the server supports. */
1434 if (!nmp->nm_servsec.count) {
1435 /* we don't know what the server supports, just use our first choice */
1436 nmp->nm_auth = nmp->nm_sec.flavors[0];
1437 found = 1;
1438 }
1439 for (o=0; !found && (o < nmp->nm_sec.count); o++)
1440 for (s=0; !found && (s < nmp->nm_servsec.count); s++)
1441 if (nmp->nm_sec.flavors[o] == nmp->nm_servsec.flavors[s]) {
1442 nmp->nm_auth = nmp->nm_sec.flavors[o];
1443 found = 1;
1444 }
1445 } else {
1446 /* Choose the first one we support from the server's list. */
1447 if (!nmp->nm_servsec.count) {
1448 nmp->nm_auth = RPCAUTH_SYS;
1449 found = 1;
1450 }
1451 for (s=0; s < nmp->nm_servsec.count; s++)
1452 switch (nmp->nm_servsec.flavors[s]) {
1453 case RPCAUTH_SYS:
1454 /* prefer RPCAUTH_SYS to RPCAUTH_NONE */
1455 if (found && (nmp->nm_auth == RPCAUTH_NONE))
1456 found = 0;
1457 case RPCAUTH_NONE:
1458 case RPCAUTH_KRB5:
1459 case RPCAUTH_KRB5I:
1460 case RPCAUTH_KRB5P:
1461 if (!found) {
1462 nmp->nm_auth = nmp->nm_servsec.flavors[s];
1463 found = 1;
1464 }
1465 break;
1466 }
1467 }
1468 error = !found ? EAUTH : 0;
1469 }
1470 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1471 path = NULL;
1472 if (error) {
1473 nfs_socket_search_update_error(&nss, error);
1474 FREE(fh, M_TEMP);
1475 fh = NULL;
1476 nfs_socket_destroy(nso);
1477 goto keepsearching;
1478 }
1479 if (nmp->nm_fh)
1480 FREE(nmp->nm_fh, M_TEMP);
1481 nmp->nm_fh = fh;
1482 fh = NULL;
1483 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_CALLUMNT);
1484 }
1485
1486 /* put the real upcall in place */
1487 upcall = (nso->nso_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
1488 error = sock_setupcall(nso->nso_so, upcall, nmp);
1489 if (error) {
1490 nfs_socket_search_update_error(&nss, error);
1491 nfs_socket_destroy(nso);
1492 goto keepsearching;
1493 }
1494
1495 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1496 /* set mntfromname to this location */
1497 if (!NM_OMATTR_GIVEN(nmp, MNTFROM))
1498 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location,
1499 vfs_statfs(nmp->nm_mountp)->f_mntfromname,
1500 sizeof(vfs_statfs(nmp->nm_mountp)->f_mntfromname), 0);
1501 /* some negotiated values need to remain unchanged for the life of the mount */
1502 if (!nmp->nm_sotype)
1503 nmp->nm_sotype = nso->nso_sotype;
1504 if (!nmp->nm_vers) {
1505 nmp->nm_vers = nfsvers;
1506 /* If we negotiated NFSv4, set nm_nfsport if we ended up on the standard NFS port */
1507 if ((nfsvers >= NFS_VER4) && !NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) {
1508 if (nso->nso_saddr->sa_family == AF_INET)
1509 port = ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port);
1510 else if (nso->nso_saddr->sa_family == AF_INET6)
1511 port = ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port);
1512 else
1513 port = 0;
1514 if (port == NFS_PORT)
1515 nmp->nm_nfsport = NFS_PORT;
1516 }
1517 }
1518 /* do some version-specific pre-mount set up */
1519 if (nmp->nm_vers >= NFS_VER4) {
1520 microtime(&now);
1521 nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec;
1522 if (!NMFLAG(nmp, NOCALLBACK))
1523 nfs4_mount_callback_setup(nmp);
1524 }
1525 }
1526
1527 /* Initialize NFS socket state variables */
1528 lck_mtx_lock(&nmp->nm_lock);
1529 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
1530 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
1531 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
1532 nmp->nm_sdrtt[3] = 0;
1533 if (nso->nso_sotype == SOCK_DGRAM) {
1534 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
1535 nmp->nm_sent = 0;
1536 } else if (nso->nso_sotype == SOCK_STREAM) {
1537 nmp->nm_timeouts = 0;
1538 }
1539 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
1540 nmp->nm_sockflags |= NMSOCK_SETUP;
1541 /* move the socket to the mount structure */
1542 nmp->nm_nso = nso;
1543 oldsaddr = nmp->nm_saddr;
1544 nmp->nm_saddr = nso->nso_saddr;
1545 lck_mtx_unlock(&nmp->nm_lock);
1546 error = nfs_connect_setup(nmp);
1547 lck_mtx_lock(&nmp->nm_lock);
1548 nmp->nm_sockflags &= ~NMSOCK_SETUP;
1549 if (!error) {
1550 nmp->nm_sockflags |= NMSOCK_READY;
1551 wakeup(&nmp->nm_sockflags);
1552 }
1553 if (error) {
1554 NFS_SOCK_DBG("nfs connect %s socket %p setup failed %d\n",
1555 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error);
1556 nfs_socket_search_update_error(&nss, error);
1557 nmp->nm_saddr = oldsaddr;
1558 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1559 /* undo settings made prior to setup */
1560 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_SOCKET_TYPE))
1561 nmp->nm_sotype = 0;
1562 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_VERSION)) {
1563 if (nmp->nm_vers >= NFS_VER4) {
1564 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT))
1565 nmp->nm_nfsport = 0;
1566 if (nmp->nm_cbid)
1567 nfs4_mount_callback_shutdown(nmp);
1568 if (IS_VALID_CRED(nmp->nm_mcred))
1569 kauth_cred_unref(&nmp->nm_mcred);
1570 bzero(&nmp->nm_un, sizeof(nmp->nm_un));
1571 }
1572 nmp->nm_vers = 0;
1573 }
1574 }
1575 lck_mtx_unlock(&nmp->nm_lock);
1576 nmp->nm_nso = NULL;
1577 nfs_socket_destroy(nso);
1578 goto keepsearching;
1579 }
1580
1581 /* update current location */
1582 if ((nmp->nm_locations.nl_current.nli_flags & NLI_VALID) &&
1583 (nmp->nm_locations.nl_current.nli_serv != nso->nso_location.nli_serv)) {
1584 /* server has changed, we should initiate failover/recovery */
1585 // XXX
1586 }
1587 nmp->nm_locations.nl_current = nso->nso_location;
1588 nmp->nm_locations.nl_current.nli_flags |= NLI_VALID;
1589
1590 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1591 /* We have now successfully connected... make a note of it. */
1592 nmp->nm_sockflags |= NMSOCK_HASCONNECTED;
1593 }
1594
1595 lck_mtx_unlock(&nmp->nm_lock);
1596 if (oldsaddr)
1597 FREE(oldsaddr, M_SONAME);
1598
1599 if (nss.nss_flags & NSS_WARNED)
1600 log(LOG_INFO, "nfs_connect: socket connect completed for %s\n",
1601 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1602
1603 nmp->nm_nss = NULL;
1604 nfs_socket_search_cleanup(&nss);
1605 if (fh)
1606 FREE(fh, M_TEMP);
1607 if (path)
1608 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1609 NFS_SOCK_DBG("nfs connect %s success\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1610 return (0);
1611 }
1612
1613
1614 /* setup & confirm socket connection is functional */
1615 int
1616 nfs_connect_setup(struct nfsmount *nmp)
1617 {
1618 int error = 0;
1619
1620 if (nmp->nm_vers >= NFS_VER4) {
1621 if (nmp->nm_state & NFSSTA_CLIENTID) {
1622 /* first, try to renew our current state */
1623 error = nfs4_renew(nmp, R_SETUP);
1624 if ((error == NFSERR_ADMIN_REVOKED) ||
1625 (error == NFSERR_CB_PATH_DOWN) ||
1626 (error == NFSERR_EXPIRED) ||
1627 (error == NFSERR_LEASE_MOVED) ||
1628 (error == NFSERR_STALE_CLIENTID)) {
1629 lck_mtx_lock(&nmp->nm_lock);
1630 nfs_need_recover(nmp, error);
1631 lck_mtx_unlock(&nmp->nm_lock);
1632 }
1633 }
1634 error = nfs4_setclientid(nmp);
1635 }
1636 return (error);
1637 }
1638
1639 /*
1640 * NFS socket reconnect routine:
1641 * Called when a connection is broken.
1642 * - disconnect the old socket
1643 * - nfs_connect() again
1644 * - set R_MUSTRESEND for all outstanding requests on mount point
1645 * If this fails the mount point is DEAD!
1646 */
1647 int
1648 nfs_reconnect(struct nfsmount *nmp)
1649 {
1650 struct nfsreq *rq;
1651 struct timeval now;
1652 thread_t thd = current_thread();
1653 int error, wentdown = 0, verbose = 1;
1654 time_t lastmsg;
1655 int timeo;
1656
1657 microuptime(&now);
1658 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay);
1659
1660 nfs_disconnect(nmp);
1661
1662
1663 lck_mtx_lock(&nmp->nm_lock);
1664 timeo = nfs_is_squishy(nmp) ? 8 : 30;
1665 lck_mtx_unlock(&nmp->nm_lock);
1666
1667 while ((error = nfs_connect(nmp, verbose, timeo))) {
1668 verbose = 0;
1669 nfs_disconnect(nmp);
1670 if ((error == EINTR) || (error == ERESTART))
1671 return (EINTR);
1672 if (error == EIO)
1673 return (EIO);
1674 microuptime(&now);
1675 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) {
1676 lastmsg = now.tv_sec;
1677 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect", 0);
1678 wentdown = 1;
1679 }
1680 lck_mtx_lock(&nmp->nm_lock);
1681 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
1682 /* we're not yet completely mounted and */
1683 /* we can't reconnect, so we fail */
1684 lck_mtx_unlock(&nmp->nm_lock);
1685 NFS_SOCK_DBG("Not mounted returning %d\n", error);
1686 return (error);
1687 }
1688
1689 if (nfs_mount_check_dead_timeout(nmp)) {
1690 nfs_mount_make_zombie(nmp);
1691 lck_mtx_unlock(&nmp->nm_lock);
1692 return (ENXIO);
1693 }
1694
1695 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) {
1696 lck_mtx_unlock(&nmp->nm_lock);
1697 return (error);
1698 }
1699 lck_mtx_unlock(&nmp->nm_lock);
1700 tsleep(nfs_reconnect, PSOCK, "nfs_reconnect_delay", 2*hz);
1701 if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
1702 return (error);
1703 }
1704
1705 if (wentdown)
1706 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected");
1707
1708 /*
1709 * Loop through outstanding request list and mark all requests
1710 * as needing a resend. (Though nfs_need_reconnect() probably
1711 * marked them all already.)
1712 */
1713 lck_mtx_lock(nfs_request_mutex);
1714 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
1715 if (rq->r_nmp == nmp) {
1716 lck_mtx_lock(&rq->r_mtx);
1717 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
1718 rq->r_flags |= R_MUSTRESEND;
1719 rq->r_rtt = -1;
1720 wakeup(rq);
1721 if ((rq->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
1722 nfs_asyncio_resend(rq);
1723 }
1724 lck_mtx_unlock(&rq->r_mtx);
1725 }
1726 }
1727 lck_mtx_unlock(nfs_request_mutex);
1728 return (0);
1729 }
1730
1731 /*
1732 * NFS disconnect. Clean up and unlink.
1733 */
1734 void
1735 nfs_disconnect(struct nfsmount *nmp)
1736 {
1737 struct nfs_socket *nso;
1738
1739 lck_mtx_lock(&nmp->nm_lock);
1740 tryagain:
1741 if (nmp->nm_nso) {
1742 struct timespec ts = { 1, 0 };
1743 if (nmp->nm_state & NFSSTA_SENDING) { /* wait for sending to complete */
1744 nmp->nm_state |= NFSSTA_WANTSND;
1745 msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitsending", &ts);
1746 goto tryagain;
1747 }
1748 if (nmp->nm_sockflags & NMSOCK_POKE) { /* wait for poking to complete */
1749 msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO-1, "nfswaitpoke", &ts);
1750 goto tryagain;
1751 }
1752 nmp->nm_sockflags |= NMSOCK_DISCONNECTING;
1753 nmp->nm_sockflags &= ~NMSOCK_READY;
1754 nso = nmp->nm_nso;
1755 nmp->nm_nso = NULL;
1756 if (nso->nso_saddr == nmp->nm_saddr)
1757 nso->nso_saddr = NULL;
1758 lck_mtx_unlock(&nmp->nm_lock);
1759 nfs_socket_destroy(nso);
1760 lck_mtx_lock(&nmp->nm_lock);
1761 nmp->nm_sockflags &= ~NMSOCK_DISCONNECTING;
1762 lck_mtx_unlock(&nmp->nm_lock);
1763 } else {
1764 lck_mtx_unlock(&nmp->nm_lock);
1765 }
1766 }
1767
1768 /*
1769 * mark an NFS mount as needing a reconnect/resends.
1770 */
1771 void
1772 nfs_need_reconnect(struct nfsmount *nmp)
1773 {
1774 struct nfsreq *rq;
1775
1776 lck_mtx_lock(&nmp->nm_lock);
1777 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP);
1778 lck_mtx_unlock(&nmp->nm_lock);
1779
1780 /*
1781 * Loop through outstanding request list and
1782 * mark all requests as needing a resend.
1783 */
1784 lck_mtx_lock(nfs_request_mutex);
1785 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
1786 if (rq->r_nmp == nmp) {
1787 lck_mtx_lock(&rq->r_mtx);
1788 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
1789 rq->r_flags |= R_MUSTRESEND;
1790 rq->r_rtt = -1;
1791 wakeup(rq);
1792 if ((rq->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
1793 nfs_asyncio_resend(rq);
1794 }
1795 lck_mtx_unlock(&rq->r_mtx);
1796 }
1797 }
1798 lck_mtx_unlock(nfs_request_mutex);
1799 }
1800
1801
1802 /*
1803 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
1804 */
1805 void
1806 nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
1807 {
1808 struct nfsmount *nmp = arg;
1809 struct timespec ts = { 30, 0 };
1810 thread_t thd = current_thread();
1811 struct nfsreq *req;
1812 struct timeval now;
1813 int error, dofinish;
1814 nfsnode_t np;
1815 int do_reconnect_sleep = 0;
1816
1817 lck_mtx_lock(&nmp->nm_lock);
1818 while (!(nmp->nm_sockflags & NMSOCK_READY) ||
1819 !TAILQ_EMPTY(&nmp->nm_resendq) ||
1820 !LIST_EMPTY(&nmp->nm_monlist) ||
1821 nmp->nm_deadto_start ||
1822 (nmp->nm_state & NFSSTA_RECOVER) ||
1823 ((nmp->nm_vers >= NFS_VER4) && !TAILQ_EMPTY(&nmp->nm_dreturnq)))
1824 {
1825 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
1826 break;
1827 /* do reconnect, if necessary */
1828 if (!(nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) {
1829 if (nmp->nm_reconnect_start <= 0) {
1830 microuptime(&now);
1831 nmp->nm_reconnect_start = now.tv_sec;
1832 }
1833 lck_mtx_unlock(&nmp->nm_lock);
1834 NFS_SOCK_DBG("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1835 /*
1836 * XXX We don't want to call reconnect again right away if returned errors
1837 * before that may not have blocked. This has caused spamming null procs
1838 * from machines in the pass.
1839 */
1840 if (do_reconnect_sleep)
1841 tsleep(nfs_mount_sock_thread, PSOCK, "nfs_reconnect_sock_thread_delay", hz);
1842 error = nfs_reconnect(nmp);
1843 if (error) {
1844 int lvl = 7;
1845 if (error == EIO || error == EINTR) {
1846 lvl = (do_reconnect_sleep++ % 600) ? 7 : 0;
1847 }
1848 nfs_printf(NFS_FAC_SOCK, lvl, "nfs reconnect %s: returned %d\n",
1849 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error);
1850 } else {
1851 nmp->nm_reconnect_start = 0;
1852 do_reconnect_sleep = 0;
1853 }
1854 lck_mtx_lock(&nmp->nm_lock);
1855 }
1856 if ((nmp->nm_sockflags & NMSOCK_READY) &&
1857 (nmp->nm_state & NFSSTA_RECOVER) &&
1858 !(nmp->nm_sockflags & NMSOCK_UNMOUNT) &&
1859 !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) {
1860 /* perform state recovery */
1861 lck_mtx_unlock(&nmp->nm_lock);
1862 nfs_recover(nmp);
1863 lck_mtx_lock(&nmp->nm_lock);
1864 }
1865 /* handle NFSv4 delegation returns */
1866 while ((nmp->nm_vers >= NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) &&
1867 (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) &&
1868 ((np = TAILQ_FIRST(&nmp->nm_dreturnq)))) {
1869 lck_mtx_unlock(&nmp->nm_lock);
1870 nfs4_delegation_return(np, R_RECOVER, thd, nmp->nm_mcred);
1871 lck_mtx_lock(&nmp->nm_lock);
1872 }
1873 /* do resends, if necessary/possible */
1874 while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) ||
1875 (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) &&
1876 ((req = TAILQ_FIRST(&nmp->nm_resendq)))) {
1877 if (req->r_resendtime)
1878 microuptime(&now);
1879 while (req && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && req->r_resendtime && (now.tv_sec < req->r_resendtime))
1880 req = TAILQ_NEXT(req, r_rchain);
1881 if (!req)
1882 break;
1883 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
1884 req->r_rchain.tqe_next = NFSREQNOLIST;
1885 lck_mtx_unlock(&nmp->nm_lock);
1886 lck_mtx_lock(&req->r_mtx);
1887 /* Note that we have a reference on the request that was taken nfs_asyncio_resend */
1888 if (req->r_error || req->r_nmrep.nmc_mhead) {
1889 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
1890 req->r_flags &= ~R_RESENDQ;
1891 wakeup(req);
1892 lck_mtx_unlock(&req->r_mtx);
1893 if (dofinish)
1894 nfs_asyncio_finish(req);
1895 nfs_request_rele(req);
1896 lck_mtx_lock(&nmp->nm_lock);
1897 continue;
1898 }
1899 if ((req->r_flags & R_RESTART) || nfs_request_using_gss(req)) {
1900 req->r_flags &= ~R_RESTART;
1901 req->r_resendtime = 0;
1902 lck_mtx_unlock(&req->r_mtx);
1903 /* async RPCs on GSS mounts need to be rebuilt and resent. */
1904 nfs_reqdequeue(req);
1905 if (nfs_request_using_gss(req)) {
1906 nfs_gss_clnt_rpcdone(req);
1907 error = nfs_gss_clnt_args_restore(req);
1908 if (error == ENEEDAUTH)
1909 req->r_xid = 0;
1910 }
1911 NFS_SOCK_DBG("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
1912 nfs_request_using_gss(req) ? " gss" : "", req->r_procnum, req->r_xid,
1913 req->r_flags, req->r_rtt);
1914 error = nfs_sigintr(nmp, req, req->r_thread, 0);
1915 if (!error)
1916 error = nfs_request_add_header(req);
1917 if (!error)
1918 error = nfs_request_send(req, 0);
1919 lck_mtx_lock(&req->r_mtx);
1920 if (req->r_flags & R_RESENDQ)
1921 req->r_flags &= ~R_RESENDQ;
1922 if (error)
1923 req->r_error = error;
1924 wakeup(req);
1925 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
1926 lck_mtx_unlock(&req->r_mtx);
1927 if (dofinish)
1928 nfs_asyncio_finish(req);
1929 nfs_request_rele(req);
1930 lck_mtx_lock(&nmp->nm_lock);
1931 error = 0;
1932 continue;
1933 }
1934 NFS_SOCK_DBG("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
1935 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt);
1936 error = nfs_sigintr(nmp, req, req->r_thread, 0);
1937 if (!error) {
1938 req->r_flags |= R_SENDING;
1939 lck_mtx_unlock(&req->r_mtx);
1940 error = nfs_send(req, 0);
1941 lck_mtx_lock(&req->r_mtx);
1942 if (!error) {
1943 if (req->r_flags & R_RESENDQ)
1944 req->r_flags &= ~R_RESENDQ;
1945 wakeup(req);
1946 lck_mtx_unlock(&req->r_mtx);
1947 nfs_request_rele(req);
1948 lck_mtx_lock(&nmp->nm_lock);
1949 continue;
1950 }
1951 }
1952 req->r_error = error;
1953 if (req->r_flags & R_RESENDQ)
1954 req->r_flags &= ~R_RESENDQ;
1955 wakeup(req);
1956 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
1957 lck_mtx_unlock(&req->r_mtx);
1958 if (dofinish)
1959 nfs_asyncio_finish(req);
1960 nfs_request_rele(req);
1961 lck_mtx_lock(&nmp->nm_lock);
1962 }
1963 if (nfs_mount_check_dead_timeout(nmp)) {
1964 nfs_mount_make_zombie(nmp);
1965 break;
1966 }
1967
1968 if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))
1969 break;
1970 /* check monitored nodes, if necessary/possible */
1971 if (!LIST_EMPTY(&nmp->nm_monlist)) {
1972 nmp->nm_state |= NFSSTA_MONITOR_SCAN;
1973 LIST_FOREACH(np, &nmp->nm_monlist, n_monlink) {
1974 if (!(nmp->nm_sockflags & NMSOCK_READY) ||
1975 (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE|NFSSTA_DEAD)))
1976 break;
1977 np->n_mflag |= NMMONSCANINPROG;
1978 lck_mtx_unlock(&nmp->nm_lock);
1979 error = nfs_getattr(np, NULL, vfs_context_kernel(), (NGA_UNCACHED|NGA_MONITOR));
1980 if (!error && ISSET(np->n_flag, NUPDATESIZE)) /* update quickly to avoid multiple events */
1981 nfs_data_update_size(np, 0);
1982 lck_mtx_lock(&nmp->nm_lock);
1983 np->n_mflag &= ~NMMONSCANINPROG;
1984 if (np->n_mflag & NMMONSCANWANT) {
1985 np->n_mflag &= ~NMMONSCANWANT;
1986 wakeup(&np->n_mflag);
1987 }
1988 if (error || !(nmp->nm_sockflags & NMSOCK_READY) ||
1989 (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE|NFSSTA_DEAD)))
1990 break;
1991 }
1992 nmp->nm_state &= ~NFSSTA_MONITOR_SCAN;
1993 if (nmp->nm_state & NFSSTA_UNMOUNTING)
1994 wakeup(&nmp->nm_state); /* let unmounting thread know scan is done */
1995 }
1996 if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING))) {
1997 if (nmp->nm_deadto_start || !TAILQ_EMPTY(&nmp->nm_resendq) ||
1998 (nmp->nm_state & NFSSTA_RECOVER))
1999 ts.tv_sec = 1;
2000 else
2001 ts.tv_sec = 5;
2002 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts);
2003 }
2004 }
2005
2006 /* If we're unmounting, send the unmount RPC, if requested/appropriate. */
2007 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) &&
2008 (nmp->nm_state & NFSSTA_MOUNTED) && NMFLAG(nmp, CALLUMNT) &&
2009 (nmp->nm_vers < NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) {
2010 lck_mtx_unlock(&nmp->nm_lock);
2011 nfs3_umount_rpc(nmp, vfs_context_kernel(),
2012 (nmp->nm_sockflags & NMSOCK_READY) ? 6 : 2);
2013 lck_mtx_lock(&nmp->nm_lock);
2014 }
2015
2016 if (nmp->nm_sockthd == thd)
2017 nmp->nm_sockthd = NULL;
2018 lck_mtx_unlock(&nmp->nm_lock);
2019 wakeup(&nmp->nm_sockthd);
2020 thread_terminate(thd);
2021 }
2022
2023 /* start or wake a mount's socket thread */
2024 void
2025 nfs_mount_sock_thread_wake(struct nfsmount *nmp)
2026 {
2027 if (nmp->nm_sockthd)
2028 wakeup(&nmp->nm_sockthd);
2029 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS)
2030 thread_deallocate(nmp->nm_sockthd);
2031 }
2032
2033 /*
2034 * Check if we should mark the mount dead because the
2035 * unresponsive mount has reached the dead timeout.
2036 * (must be called with nmp locked)
2037 */
2038 int
2039 nfs_mount_check_dead_timeout(struct nfsmount *nmp)
2040 {
2041 struct timeval now;
2042
2043 if (nmp->nm_state & NFSSTA_DEAD)
2044 return 1;
2045 if (nmp->nm_deadto_start == 0)
2046 return 0;
2047 nfs_is_squishy(nmp);
2048 if (nmp->nm_curdeadtimeout <= 0)
2049 return 0;
2050 microuptime(&now);
2051 if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_curdeadtimeout)
2052 return 0;
2053 return 1;
2054 }
2055
2056 /*
2057 * Call nfs_mount_zombie to remove most of the
2058 * nfs state for the mount, and then ask to be forcibly unmounted.
2059 *
2060 * Assumes the nfs mount structure lock nm_lock is held.
2061 */
2062
2063 void
2064 nfs_mount_make_zombie(struct nfsmount *nmp)
2065 {
2066 fsid_t fsid;
2067
2068 if (!nmp)
2069 return;
2070
2071 if (nmp->nm_state & NFSSTA_DEAD)
2072 return;
2073
2074 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname,
2075 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : "");
2076 fsid = vfs_statfs(nmp->nm_mountp)->f_fsid;
2077 lck_mtx_unlock(&nmp->nm_lock);
2078 nfs_mount_zombie(nmp, NFSSTA_DEAD);
2079 vfs_event_signal(&fsid, VQ_DEAD, 0);
2080 lck_mtx_lock(&nmp->nm_lock);
2081 }
2082
2083
2084 /*
2085 * NFS callback channel socket state
2086 */
2087 struct nfs_callback_socket
2088 {
2089 TAILQ_ENTRY(nfs_callback_socket) ncbs_link;
2090 socket_t ncbs_so; /* the socket */
2091 struct sockaddr_storage ncbs_saddr; /* socket address */
2092 struct nfs_rpc_record_state ncbs_rrs; /* RPC record parsing state */
2093 time_t ncbs_stamp; /* last accessed at */
2094 uint32_t ncbs_flags; /* see below */
2095 };
2096 #define NCBSOCK_UPCALL 0x0001
2097 #define NCBSOCK_UPCALLWANT 0x0002
2098 #define NCBSOCK_DEAD 0x0004
2099
2100 /*
2101 * NFS callback channel state
2102 *
2103 * One listening socket for accepting socket connections from servers and
2104 * a list of connected sockets to handle callback requests on.
2105 * Mounts registered with the callback channel are assigned IDs and
2106 * put on a list so that the callback request handling code can match
2107 * the requests up with mounts.
2108 */
2109 socket_t nfs4_cb_so = NULL;
2110 socket_t nfs4_cb_so6 = NULL;
2111 in_port_t nfs4_cb_port = 0;
2112 in_port_t nfs4_cb_port6 = 0;
2113 uint32_t nfs4_cb_id = 0;
2114 uint32_t nfs4_cb_so_usecount = 0;
2115 TAILQ_HEAD(nfs4_cb_sock_list,nfs_callback_socket) nfs4_cb_socks;
2116 TAILQ_HEAD(nfs4_cb_mount_list,nfsmount) nfs4_cb_mounts;
2117
2118 int nfs4_cb_handler(struct nfs_callback_socket *, mbuf_t);
2119
2120 /*
2121 * Set up the callback channel for the NFS mount.
2122 *
2123 * Initializes the callback channel socket state and
2124 * assigns a callback ID to the mount.
2125 */
2126 void
2127 nfs4_mount_callback_setup(struct nfsmount *nmp)
2128 {
2129 struct sockaddr_in sin;
2130 struct sockaddr_in6 sin6;
2131 socket_t so = NULL;
2132 socket_t so6 = NULL;
2133 struct timeval timeo;
2134 int error, on = 1;
2135 in_port_t port;
2136
2137 lck_mtx_lock(nfs_global_mutex);
2138 if (nfs4_cb_id == 0) {
2139 TAILQ_INIT(&nfs4_cb_mounts);
2140 TAILQ_INIT(&nfs4_cb_socks);
2141 nfs4_cb_id++;
2142 }
2143 nmp->nm_cbid = nfs4_cb_id++;
2144 if (nmp->nm_cbid == 0)
2145 nmp->nm_cbid = nfs4_cb_id++;
2146 nfs4_cb_so_usecount++;
2147 TAILQ_INSERT_HEAD(&nfs4_cb_mounts, nmp, nm_cblink);
2148
2149 if (nfs4_cb_so) {
2150 lck_mtx_unlock(nfs_global_mutex);
2151 return;
2152 }
2153
2154 /* IPv4 */
2155 error = sock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so);
2156 if (error) {
2157 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv4 socket\n", error);
2158 goto fail;
2159 }
2160 so = nfs4_cb_so;
2161
2162 sock_setsockopt(so, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
2163 sin.sin_len = sizeof(struct sockaddr_in);
2164 sin.sin_family = AF_INET;
2165 sin.sin_addr.s_addr = htonl(INADDR_ANY);
2166 sin.sin_port = htons(nfs_callback_port); /* try to use specified port */
2167 error = sock_bind(so, (struct sockaddr *)&sin);
2168 if (error) {
2169 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv4 socket\n", error);
2170 goto fail;
2171 }
2172 error = sock_getsockname(so, (struct sockaddr *)&sin, sin.sin_len);
2173 if (error) {
2174 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv4 socket port\n", error);
2175 goto fail;
2176 }
2177 nfs4_cb_port = ntohs(sin.sin_port);
2178
2179 error = sock_listen(so, 32);
2180 if (error) {
2181 log(LOG_INFO, "nfs callback setup: error %d on IPv4 listen\n", error);
2182 goto fail;
2183 }
2184
2185 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2186 timeo.tv_usec = 0;
2187 timeo.tv_sec = 60;
2188 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
2189 if (error)
2190 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket rx timeout\n", error);
2191 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
2192 if (error)
2193 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket tx timeout\n", error);
2194 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2195 sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
2196 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
2197 error = 0;
2198
2199 /* IPv6 */
2200 error = sock_socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so6);
2201 if (error) {
2202 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv6 socket\n", error);
2203 goto fail;
2204 }
2205 so6 = nfs4_cb_so6;
2206
2207 sock_setsockopt(so6, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
2208 sock_setsockopt(so6, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on));
2209 /* try to use specified port or same port as IPv4 */
2210 port = nfs_callback_port ? nfs_callback_port : nfs4_cb_port;
2211 ipv6_bind_again:
2212 sin6.sin6_len = sizeof(struct sockaddr_in6);
2213 sin6.sin6_family = AF_INET6;
2214 sin6.sin6_addr = in6addr_any;
2215 sin6.sin6_port = htons(port);
2216 error = sock_bind(so6, (struct sockaddr *)&sin6);
2217 if (error) {
2218 if (port != nfs_callback_port) {
2219 /* if we simply tried to match the IPv4 port, then try any port */
2220 port = 0;
2221 goto ipv6_bind_again;
2222 }
2223 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv6 socket\n", error);
2224 goto fail;
2225 }
2226 error = sock_getsockname(so6, (struct sockaddr *)&sin6, sin6.sin6_len);
2227 if (error) {
2228 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv6 socket port\n", error);
2229 goto fail;
2230 }
2231 nfs4_cb_port6 = ntohs(sin6.sin6_port);
2232
2233 error = sock_listen(so6, 32);
2234 if (error) {
2235 log(LOG_INFO, "nfs callback setup: error %d on IPv6 listen\n", error);
2236 goto fail;
2237 }
2238
2239 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2240 timeo.tv_usec = 0;
2241 timeo.tv_sec = 60;
2242 error = sock_setsockopt(so6, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
2243 if (error)
2244 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket rx timeout\n", error);
2245 error = sock_setsockopt(so6, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
2246 if (error)
2247 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket tx timeout\n", error);
2248 sock_setsockopt(so6, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2249 sock_setsockopt(so6, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
2250 sock_setsockopt(so6, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
2251 error = 0;
2252
2253 fail:
2254 if (error) {
2255 nfs4_cb_so = nfs4_cb_so6 = NULL;
2256 lck_mtx_unlock(nfs_global_mutex);
2257 if (so) {
2258 sock_shutdown(so, SHUT_RDWR);
2259 sock_close(so);
2260 }
2261 if (so6) {
2262 sock_shutdown(so6, SHUT_RDWR);
2263 sock_close(so6);
2264 }
2265 } else {
2266 lck_mtx_unlock(nfs_global_mutex);
2267 }
2268 }
2269
2270 /*
2271 * Shut down the callback channel for the NFS mount.
2272 *
2273 * Clears the mount's callback ID and releases the mounts
2274 * reference on the callback socket. Last reference dropped
2275 * will also shut down the callback socket(s).
2276 */
2277 void
2278 nfs4_mount_callback_shutdown(struct nfsmount *nmp)
2279 {
2280 struct nfs_callback_socket *ncbsp;
2281 socket_t so, so6;
2282 struct nfs4_cb_sock_list cb_socks;
2283 struct timespec ts = {1,0};
2284
2285 lck_mtx_lock(nfs_global_mutex);
2286 TAILQ_REMOVE(&nfs4_cb_mounts, nmp, nm_cblink);
2287 /* wait for any callbacks in progress to complete */
2288 while (nmp->nm_cbrefs)
2289 msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait", &ts);
2290 nmp->nm_cbid = 0;
2291 if (--nfs4_cb_so_usecount) {
2292 lck_mtx_unlock(nfs_global_mutex);
2293 return;
2294 }
2295 so = nfs4_cb_so;
2296 so6 = nfs4_cb_so6;
2297 nfs4_cb_so = nfs4_cb_so6 = NULL;
2298 TAILQ_INIT(&cb_socks);
2299 TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link);
2300 lck_mtx_unlock(nfs_global_mutex);
2301 if (so) {
2302 sock_shutdown(so, SHUT_RDWR);
2303 sock_close(so);
2304 }
2305 if (so6) {
2306 sock_shutdown(so6, SHUT_RDWR);
2307 sock_close(so6);
2308 }
2309 while ((ncbsp = TAILQ_FIRST(&cb_socks))) {
2310 TAILQ_REMOVE(&cb_socks, ncbsp, ncbs_link);
2311 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
2312 sock_close(ncbsp->ncbs_so);
2313 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs);
2314 FREE(ncbsp, M_TEMP);
2315 }
2316 }
2317
2318 /*
2319 * Check periodically for stale/unused nfs callback sockets
2320 */
2321 #define NFS4_CB_TIMER_PERIOD 30
2322 #define NFS4_CB_IDLE_MAX 300
2323 void
2324 nfs4_callback_timer(__unused void *param0, __unused void *param1)
2325 {
2326 struct nfs_callback_socket *ncbsp, *nextncbsp;
2327 struct timeval now;
2328
2329 loop:
2330 lck_mtx_lock(nfs_global_mutex);
2331 if (TAILQ_EMPTY(&nfs4_cb_socks)) {
2332 nfs4_callback_timer_on = 0;
2333 lck_mtx_unlock(nfs_global_mutex);
2334 return;
2335 }
2336 microuptime(&now);
2337 TAILQ_FOREACH_SAFE(ncbsp, &nfs4_cb_socks, ncbs_link, nextncbsp) {
2338 if (!(ncbsp->ncbs_flags & NCBSOCK_DEAD) &&
2339 (now.tv_sec < (ncbsp->ncbs_stamp + NFS4_CB_IDLE_MAX)))
2340 continue;
2341 TAILQ_REMOVE(&nfs4_cb_socks, ncbsp, ncbs_link);
2342 lck_mtx_unlock(nfs_global_mutex);
2343 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
2344 sock_close(ncbsp->ncbs_so);
2345 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs);
2346 FREE(ncbsp, M_TEMP);
2347 goto loop;
2348 }
2349 nfs4_callback_timer_on = 1;
2350 nfs_interval_timer_start(nfs4_callback_timer_call,
2351 NFS4_CB_TIMER_PERIOD * 1000);
2352 lck_mtx_unlock(nfs_global_mutex);
2353 }
2354
2355 /*
2356 * Accept a new callback socket.
2357 */
2358 void
2359 nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag)
2360 {
2361 socket_t newso = NULL;
2362 struct nfs_callback_socket *ncbsp;
2363 struct nfsmount *nmp;
2364 struct timeval timeo, now;
2365 int error, on = 1, ip;
2366
2367 if (so == nfs4_cb_so)
2368 ip = 4;
2369 else if (so == nfs4_cb_so6)
2370 ip = 6;
2371 else
2372 return;
2373
2374 /* allocate/initialize a new nfs_callback_socket */
2375 MALLOC(ncbsp, struct nfs_callback_socket *, sizeof(struct nfs_callback_socket), M_TEMP, M_WAITOK);
2376 if (!ncbsp) {
2377 log(LOG_ERR, "nfs callback accept: no memory for new socket\n");
2378 return;
2379 }
2380 bzero(ncbsp, sizeof(*ncbsp));
2381 ncbsp->ncbs_saddr.ss_len = (ip == 4) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
2382 nfs_rpc_record_state_init(&ncbsp->ncbs_rrs);
2383
2384 /* accept a new socket */
2385 error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_saddr,
2386 ncbsp->ncbs_saddr.ss_len, MSG_DONTWAIT,
2387 nfs4_cb_rcv, ncbsp, &newso);
2388 if (error) {
2389 log(LOG_INFO, "nfs callback accept: error %d accepting IPv%d socket\n", error, ip);
2390 FREE(ncbsp, M_TEMP);
2391 return;
2392 }
2393
2394 /* set up the new socket */
2395 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2396 timeo.tv_usec = 0;
2397 timeo.tv_sec = 60;
2398 error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
2399 if (error)
2400 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket rx timeout\n", error, ip);
2401 error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
2402 if (error)
2403 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket tx timeout\n", error, ip);
2404 sock_setsockopt(newso, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2405 sock_setsockopt(newso, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
2406 sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
2407 sock_setsockopt(newso, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
2408
2409 ncbsp->ncbs_so = newso;
2410 microuptime(&now);
2411 ncbsp->ncbs_stamp = now.tv_sec;
2412
2413 lck_mtx_lock(nfs_global_mutex);
2414
2415 /* add it to the list */
2416 TAILQ_INSERT_HEAD(&nfs4_cb_socks, ncbsp, ncbs_link);
2417
2418 /* verify it's from a host we have mounted */
2419 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
2420 /* check if socket's source address matches this mount's server address */
2421 if (!nmp->nm_saddr)
2422 continue;
2423 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0)
2424 break;
2425 }
2426 if (!nmp) /* we don't want this socket, mark it dead */
2427 ncbsp->ncbs_flags |= NCBSOCK_DEAD;
2428
2429 /* make sure the callback socket cleanup timer is running */
2430 /* (shorten the timer if we've got a socket we don't want) */
2431 if (!nfs4_callback_timer_on) {
2432 nfs4_callback_timer_on = 1;
2433 nfs_interval_timer_start(nfs4_callback_timer_call,
2434 !nmp ? 500 : (NFS4_CB_TIMER_PERIOD * 1000));
2435 } else if (!nmp && (nfs4_callback_timer_on < 2)) {
2436 nfs4_callback_timer_on = 2;
2437 thread_call_cancel(nfs4_callback_timer_call);
2438 nfs_interval_timer_start(nfs4_callback_timer_call, 500);
2439 }
2440
2441 lck_mtx_unlock(nfs_global_mutex);
2442 }
2443
2444 /*
2445 * Receive mbufs from callback sockets into RPC records and process each record.
2446 * Detect connection has been closed and shut down.
2447 */
2448 void
2449 nfs4_cb_rcv(socket_t so, void *arg, __unused int waitflag)
2450 {
2451 struct nfs_callback_socket *ncbsp = arg;
2452 struct timespec ts = {1,0};
2453 struct timeval now;
2454 mbuf_t m;
2455 int error = 0, recv = 1;
2456
2457 lck_mtx_lock(nfs_global_mutex);
2458 while (ncbsp->ncbs_flags & NCBSOCK_UPCALL) {
2459 /* wait if upcall is already in progress */
2460 ncbsp->ncbs_flags |= NCBSOCK_UPCALLWANT;
2461 msleep(ncbsp, nfs_global_mutex, PSOCK, "cbupcall", &ts);
2462 }
2463 ncbsp->ncbs_flags |= NCBSOCK_UPCALL;
2464 lck_mtx_unlock(nfs_global_mutex);
2465
2466 /* loop while we make error-free progress */
2467 while (!error && recv) {
2468 error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, MSG_DONTWAIT, &recv, &m);
2469 if (m) /* handle the request */
2470 error = nfs4_cb_handler(ncbsp, m);
2471 }
2472
2473 /* note: no error and no data indicates server closed its end */
2474 if ((error != EWOULDBLOCK) && (error || !recv)) {
2475 /*
2476 * Socket is either being closed or should be.
2477 * We can't close the socket in the context of the upcall.
2478 * So we mark it as dead and leave it for the cleanup timer to reap.
2479 */
2480 ncbsp->ncbs_stamp = 0;
2481 ncbsp->ncbs_flags |= NCBSOCK_DEAD;
2482 } else {
2483 microuptime(&now);
2484 ncbsp->ncbs_stamp = now.tv_sec;
2485 }
2486
2487 lck_mtx_lock(nfs_global_mutex);
2488 ncbsp->ncbs_flags &= ~NCBSOCK_UPCALL;
2489 lck_mtx_unlock(nfs_global_mutex);
2490 wakeup(ncbsp);
2491 }
2492
2493 /*
2494 * Handle an NFS callback channel request.
2495 */
2496 int
2497 nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
2498 {
2499 socket_t so = ncbsp->ncbs_so;
2500 struct nfsm_chain nmreq, nmrep;
2501 mbuf_t mhead = NULL, mrest = NULL, m;
2502 struct msghdr msg;
2503 struct nfsmount *nmp;
2504 fhandle_t fh;
2505 nfsnode_t np;
2506 nfs_stateid stateid;
2507 uint32_t bitmap[NFS_ATTR_BITMAP_LEN], rbitmap[NFS_ATTR_BITMAP_LEN], bmlen, truncate, attrbytes;
2508 uint32_t val, xid, procnum, taglen, cbid, numops, op, status;
2509 uint32_t auth_type, auth_len;
2510 uint32_t numres, *pnumres;
2511 int error = 0, replen, len;
2512 size_t sentlen = 0;
2513
2514 xid = numops = op = status = procnum = taglen = cbid = 0;
2515
2516 nfsm_chain_dissect_init(error, &nmreq, mreq);
2517 nfsm_chain_get_32(error, &nmreq, xid); // RPC XID
2518 nfsm_chain_get_32(error, &nmreq, val); // RPC Call
2519 nfsm_assert(error, (val == RPC_CALL), EBADRPC);
2520 nfsm_chain_get_32(error, &nmreq, val); // RPC Version
2521 nfsm_assert(error, (val == RPC_VER2), ERPCMISMATCH);
2522 nfsm_chain_get_32(error, &nmreq, val); // RPC Program Number
2523 nfsm_assert(error, (val == NFS4_CALLBACK_PROG), EPROGUNAVAIL);
2524 nfsm_chain_get_32(error, &nmreq, val); // NFS Callback Program Version Number
2525 nfsm_assert(error, (val == NFS4_CALLBACK_PROG_VERSION), EPROGMISMATCH);
2526 nfsm_chain_get_32(error, &nmreq, procnum); // NFS Callback Procedure Number
2527 nfsm_assert(error, (procnum <= NFSPROC4_CB_COMPOUND), EPROCUNAVAIL);
2528
2529 /* Handle authentication */
2530 /* XXX just ignore auth for now - handling kerberos may be tricky */
2531 nfsm_chain_get_32(error, &nmreq, auth_type); // RPC Auth Flavor
2532 nfsm_chain_get_32(error, &nmreq, auth_len); // RPC Auth Length
2533 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC);
2534 if (!error && (auth_len > 0))
2535 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len));
2536 nfsm_chain_adv(error, &nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
2537 nfsm_chain_get_32(error, &nmreq, auth_len); // verifier length
2538 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC);
2539 if (!error && (auth_len > 0))
2540 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len));
2541 if (error) {
2542 status = error;
2543 error = 0;
2544 goto nfsmout;
2545 }
2546
2547 switch (procnum) {
2548 case NFSPROC4_CB_NULL:
2549 status = NFSERR_RETVOID;
2550 break;
2551 case NFSPROC4_CB_COMPOUND:
2552 /* tag, minorversion, cb ident, numops, op array */
2553 nfsm_chain_get_32(error, &nmreq, taglen); /* tag length */
2554 nfsm_assert(error, (val <= NFS4_OPAQUE_LIMIT), EBADRPC);
2555
2556 /* start building the body of the response */
2557 nfsm_mbuf_get(error, &mrest, nfsm_rndup(taglen) + 5*NFSX_UNSIGNED);
2558 nfsm_chain_init(&nmrep, mrest);
2559
2560 /* copy tag from request to response */
2561 nfsm_chain_add_32(error, &nmrep, taglen); /* tag length */
2562 for (len = (int)taglen; !error && (len > 0); len -= NFSX_UNSIGNED) {
2563 nfsm_chain_get_32(error, &nmreq, val);
2564 nfsm_chain_add_32(error, &nmrep, val);
2565 }
2566
2567 /* insert number of results placeholder */
2568 numres = 0;
2569 nfsm_chain_add_32(error, &nmrep, numres);
2570 pnumres = (uint32_t*)(nmrep.nmc_ptr - NFSX_UNSIGNED);
2571
2572 nfsm_chain_get_32(error, &nmreq, val); /* minorversion */
2573 nfsm_assert(error, (val == 0), NFSERR_MINOR_VERS_MISMATCH);
2574 nfsm_chain_get_32(error, &nmreq, cbid); /* callback ID */
2575 nfsm_chain_get_32(error, &nmreq, numops); /* number of operations */
2576 if (error) {
2577 if ((error == EBADRPC) || (error == NFSERR_MINOR_VERS_MISMATCH))
2578 status = error;
2579 else if ((error == ENOBUFS) || (error == ENOMEM))
2580 status = NFSERR_RESOURCE;
2581 else
2582 status = NFSERR_SERVERFAULT;
2583 error = 0;
2584 nfsm_chain_null(&nmrep);
2585 goto nfsmout;
2586 }
2587 /* match the callback ID to a registered mount */
2588 lck_mtx_lock(nfs_global_mutex);
2589 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
2590 if (nmp->nm_cbid != cbid)
2591 continue;
2592 /* verify socket's source address matches this mount's server address */
2593 if (!nmp->nm_saddr)
2594 continue;
2595 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0)
2596 break;
2597 }
2598 /* mark the NFS mount as busy */
2599 if (nmp)
2600 nmp->nm_cbrefs++;
2601 lck_mtx_unlock(nfs_global_mutex);
2602 if (!nmp) {
2603 /* if no mount match, just drop socket. */
2604 error = EPERM;
2605 nfsm_chain_null(&nmrep);
2606 goto out;
2607 }
2608
2609 /* process ops, adding results to mrest */
2610 while (numops > 0) {
2611 numops--;
2612 nfsm_chain_get_32(error, &nmreq, op);
2613 if (error)
2614 break;
2615 switch (op) {
2616 case NFS_OP_CB_GETATTR:
2617 // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS)
2618 np = NULL;
2619 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh);
2620 bmlen = NFS_ATTR_BITMAP_LEN;
2621 nfsm_chain_get_bitmap(error, &nmreq, bitmap, bmlen);
2622 if (error) {
2623 status = error;
2624 error = 0;
2625 numops = 0; /* don't process any more ops */
2626 } else {
2627 /* find the node for the file handle */
2628 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
2629 if (error || !np) {
2630 status = NFSERR_BADHANDLE;
2631 error = 0;
2632 np = NULL;
2633 numops = 0; /* don't process any more ops */
2634 }
2635 }
2636 nfsm_chain_add_32(error, &nmrep, op);
2637 nfsm_chain_add_32(error, &nmrep, status);
2638 if (!error && (status == EBADRPC))
2639 error = status;
2640 if (np) {
2641 /* only allow returning size, change, and mtime attrs */
2642 NFS_CLEAR_ATTRIBUTES(&rbitmap);
2643 attrbytes = 0;
2644 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) {
2645 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_CHANGE);
2646 attrbytes += 2 * NFSX_UNSIGNED;
2647 }
2648 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) {
2649 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_SIZE);
2650 attrbytes += 2 * NFSX_UNSIGNED;
2651 }
2652 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) {
2653 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_TIME_MODIFY);
2654 attrbytes += 3 * NFSX_UNSIGNED;
2655 }
2656 nfsm_chain_add_bitmap(error, &nmrep, rbitmap, NFS_ATTR_BITMAP_LEN);
2657 nfsm_chain_add_32(error, &nmrep, attrbytes);
2658 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE))
2659 nfsm_chain_add_64(error, &nmrep,
2660 np->n_vattr.nva_change + ((np->n_flag & NMODIFIED) ? 1 : 0));
2661 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE))
2662 nfsm_chain_add_64(error, &nmrep, np->n_size);
2663 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) {
2664 nfsm_chain_add_64(error, &nmrep, np->n_vattr.nva_timesec[NFSTIME_MODIFY]);
2665 nfsm_chain_add_32(error, &nmrep, np->n_vattr.nva_timensec[NFSTIME_MODIFY]);
2666 }
2667 nfs_node_unlock(np);
2668 vnode_put(NFSTOV(np));
2669 np = NULL;
2670 }
2671 /*
2672 * If we hit an error building the reply, we can't easily back up.
2673 * So we'll just update the status and hope the server ignores the
2674 * extra garbage.
2675 */
2676 break;
2677 case NFS_OP_CB_RECALL:
2678 // (STATEID, TRUNCATE, FH) -> (STATUS)
2679 np = NULL;
2680 nfsm_chain_get_stateid(error, &nmreq, &stateid);
2681 nfsm_chain_get_32(error, &nmreq, truncate);
2682 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh);
2683 if (error) {
2684 status = error;
2685 error = 0;
2686 numops = 0; /* don't process any more ops */
2687 } else {
2688 /* find the node for the file handle */
2689 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
2690 if (error || !np) {
2691 status = NFSERR_BADHANDLE;
2692 error = 0;
2693 np = NULL;
2694 numops = 0; /* don't process any more ops */
2695 } else if (!(np->n_openflags & N_DELEG_MASK) ||
2696 bcmp(&np->n_dstateid, &stateid, sizeof(stateid))) {
2697 /* delegation stateid state doesn't match */
2698 status = NFSERR_BAD_STATEID;
2699 numops = 0; /* don't process any more ops */
2700 }
2701 if (!status) /* add node to recall queue, and wake socket thread */
2702 nfs4_delegation_return_enqueue(np);
2703 if (np) {
2704 nfs_node_unlock(np);
2705 vnode_put(NFSTOV(np));
2706 }
2707 }
2708 nfsm_chain_add_32(error, &nmrep, op);
2709 nfsm_chain_add_32(error, &nmrep, status);
2710 if (!error && (status == EBADRPC))
2711 error = status;
2712 break;
2713 case NFS_OP_CB_ILLEGAL:
2714 default:
2715 nfsm_chain_add_32(error, &nmrep, NFS_OP_CB_ILLEGAL);
2716 status = NFSERR_OP_ILLEGAL;
2717 nfsm_chain_add_32(error, &nmrep, status);
2718 numops = 0; /* don't process any more ops */
2719 break;
2720 }
2721 numres++;
2722 }
2723
2724 if (!status && error) {
2725 if (error == EBADRPC)
2726 status = error;
2727 else if ((error == ENOBUFS) || (error == ENOMEM))
2728 status = NFSERR_RESOURCE;
2729 else
2730 status = NFSERR_SERVERFAULT;
2731 error = 0;
2732 }
2733
2734 /* Now, set the numres field */
2735 *pnumres = txdr_unsigned(numres);
2736 nfsm_chain_build_done(error, &nmrep);
2737 nfsm_chain_null(&nmrep);
2738
2739 /* drop the callback reference on the mount */
2740 lck_mtx_lock(nfs_global_mutex);
2741 nmp->nm_cbrefs--;
2742 if (!nmp->nm_cbid)
2743 wakeup(&nmp->nm_cbrefs);
2744 lck_mtx_unlock(nfs_global_mutex);
2745 break;
2746 }
2747
2748 nfsmout:
2749 if (status == EBADRPC)
2750 OSAddAtomic64(1, &nfsstats.rpcinvalid);
2751
2752 /* build reply header */
2753 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mhead);
2754 nfsm_chain_init(&nmrep, mhead);
2755 nfsm_chain_add_32(error, &nmrep, 0); /* insert space for an RPC record mark */
2756 nfsm_chain_add_32(error, &nmrep, xid);
2757 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
2758 if ((status == ERPCMISMATCH) || (status & NFSERR_AUTHERR)) {
2759 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
2760 if (status & NFSERR_AUTHERR) {
2761 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
2762 nfsm_chain_add_32(error, &nmrep, (status & ~NFSERR_AUTHERR));
2763 } else {
2764 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
2765 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2766 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2767 }
2768 } else {
2769 /* reply status */
2770 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
2771 /* XXX RPCAUTH_NULL verifier */
2772 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
2773 nfsm_chain_add_32(error, &nmrep, 0);
2774 /* accepted status */
2775 switch (status) {
2776 case EPROGUNAVAIL:
2777 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
2778 break;
2779 case EPROGMISMATCH:
2780 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
2781 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION);
2782 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION);
2783 break;
2784 case EPROCUNAVAIL:
2785 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
2786 break;
2787 case EBADRPC:
2788 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
2789 break;
2790 default:
2791 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
2792 if (status != NFSERR_RETVOID)
2793 nfsm_chain_add_32(error, &nmrep, status);
2794 break;
2795 }
2796 }
2797 nfsm_chain_build_done(error, &nmrep);
2798 if (error) {
2799 nfsm_chain_null(&nmrep);
2800 goto out;
2801 }
2802 error = mbuf_setnext(nmrep.nmc_mcur, mrest);
2803 if (error) {
2804 printf("nfs cb: mbuf_setnext failed %d\n", error);
2805 goto out;
2806 }
2807 mrest = NULL;
2808 /* Calculate the size of the reply */
2809 replen = 0;
2810 for (m = nmrep.nmc_mhead; m; m = mbuf_next(m))
2811 replen += mbuf_len(m);
2812 mbuf_pkthdr_setlen(mhead, replen);
2813 error = mbuf_pkthdr_setrcvif(mhead, NULL);
2814 nfsm_chain_set_recmark(error, &nmrep, (replen - NFSX_UNSIGNED) | 0x80000000);
2815 nfsm_chain_null(&nmrep);
2816
2817 /* send the reply */
2818 bzero(&msg, sizeof(msg));
2819 error = sock_sendmbuf(so, &msg, mhead, 0, &sentlen);
2820 mhead = NULL;
2821 if (!error && ((int)sentlen != replen))
2822 error = EWOULDBLOCK;
2823 if (error == EWOULDBLOCK) /* inability to send response is considered fatal */
2824 error = ETIMEDOUT;
2825 out:
2826 if (error)
2827 nfsm_chain_cleanup(&nmrep);
2828 if (mhead)
2829 mbuf_freem(mhead);
2830 if (mrest)
2831 mbuf_freem(mrest);
2832 if (mreq)
2833 mbuf_freem(mreq);
2834 return (error);
2835 }
2836
2837
2838 /*
2839 * Initialize an nfs_rpc_record_state structure.
2840 */
2841 void
2842 nfs_rpc_record_state_init(struct nfs_rpc_record_state *nrrsp)
2843 {
2844 bzero(nrrsp, sizeof(*nrrsp));
2845 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft);
2846 }
2847
2848 /*
2849 * Clean up an nfs_rpc_record_state structure.
2850 */
2851 void
2852 nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state *nrrsp)
2853 {
2854 if (nrrsp->nrrs_m) {
2855 mbuf_freem(nrrsp->nrrs_m);
2856 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL;
2857 }
2858 }
2859
2860 /*
2861 * Read the next (marked) RPC record from the socket.
2862 *
2863 * *recvp returns if any data was received.
2864 * *mp returns the next complete RPC record
2865 */
2866 int
2867 nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int flags, int *recvp, mbuf_t *mp)
2868 {
2869 struct iovec aio;
2870 struct msghdr msg;
2871 size_t rcvlen;
2872 int error = 0;
2873 mbuf_t m;
2874
2875 *recvp = 0;
2876 *mp = NULL;
2877
2878 /* read the TCP RPC record marker */
2879 while (!error && nrrsp->nrrs_markerleft) {
2880 aio.iov_base = ((char*)&nrrsp->nrrs_fragleft +
2881 sizeof(nrrsp->nrrs_fragleft) - nrrsp->nrrs_markerleft);
2882 aio.iov_len = nrrsp->nrrs_markerleft;
2883 bzero(&msg, sizeof(msg));
2884 msg.msg_iov = &aio;
2885 msg.msg_iovlen = 1;
2886 error = sock_receive(so, &msg, flags, &rcvlen);
2887 if (error || !rcvlen)
2888 break;
2889 *recvp = 1;
2890 nrrsp->nrrs_markerleft -= rcvlen;
2891 if (nrrsp->nrrs_markerleft)
2892 continue;
2893 /* record marker complete */
2894 nrrsp->nrrs_fragleft = ntohl(nrrsp->nrrs_fragleft);
2895 if (nrrsp->nrrs_fragleft & 0x80000000) {
2896 nrrsp->nrrs_lastfrag = 1;
2897 nrrsp->nrrs_fragleft &= ~0x80000000;
2898 }
2899 nrrsp->nrrs_reclen += nrrsp->nrrs_fragleft;
2900 if (nrrsp->nrrs_reclen > NFS_MAXPACKET) {
2901 /* This is SERIOUS! We are out of sync with the sender. */
2902 log(LOG_ERR, "impossible RPC record length (%d) on callback", nrrsp->nrrs_reclen);
2903 error = EFBIG;
2904 }
2905 }
2906
2907 /* read the TCP RPC record fragment */
2908 while (!error && !nrrsp->nrrs_markerleft && nrrsp->nrrs_fragleft) {
2909 m = NULL;
2910 rcvlen = nrrsp->nrrs_fragleft;
2911 error = sock_receivembuf(so, NULL, &m, flags, &rcvlen);
2912 if (error || !rcvlen || !m)
2913 break;
2914 *recvp = 1;
2915 /* append mbufs to list */
2916 nrrsp->nrrs_fragleft -= rcvlen;
2917 if (!nrrsp->nrrs_m) {
2918 nrrsp->nrrs_m = m;
2919 } else {
2920 error = mbuf_setnext(nrrsp->nrrs_mlast, m);
2921 if (error) {
2922 printf("nfs tcp rcv: mbuf_setnext failed %d\n", error);
2923 mbuf_freem(m);
2924 break;
2925 }
2926 }
2927 while (mbuf_next(m))
2928 m = mbuf_next(m);
2929 nrrsp->nrrs_mlast = m;
2930 }
2931
2932 /* done reading fragment? */
2933 if (!error && !nrrsp->nrrs_markerleft && !nrrsp->nrrs_fragleft) {
2934 /* reset socket fragment parsing state */
2935 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft);
2936 if (nrrsp->nrrs_lastfrag) {
2937 /* RPC record complete */
2938 *mp = nrrsp->nrrs_m;
2939 /* reset socket record parsing state */
2940 nrrsp->nrrs_reclen = 0;
2941 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL;
2942 nrrsp->nrrs_lastfrag = 0;
2943 }
2944 }
2945
2946 return (error);
2947 }
2948
2949
2950
2951 /*
2952 * The NFS client send routine.
2953 *
2954 * Send the given NFS request out the mount's socket.
2955 * Holds nfs_sndlock() for the duration of this call.
2956 *
2957 * - check for request termination (sigintr)
2958 * - wait for reconnect, if necessary
2959 * - UDP: check the congestion window
2960 * - make a copy of the request to send
2961 * - UDP: update the congestion window
2962 * - send the request
2963 *
2964 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
2965 * rexmit count is also updated if this isn't the first send.
2966 *
2967 * If the send is not successful, make sure R_MUSTRESEND is set.
2968 * If this wasn't the first transmit, set R_RESENDERR.
2969 * Also, undo any UDP congestion window changes made.
2970 *
2971 * If the error appears to indicate that the socket should
2972 * be reconnected, mark the socket for reconnection.
2973 *
2974 * Only return errors when the request should be aborted.
2975 */
2976 int
2977 nfs_send(struct nfsreq *req, int wait)
2978 {
2979 struct nfsmount *nmp;
2980 struct nfs_socket *nso;
2981 int error, error2, sotype, rexmit, slpflag = 0, needrecon;
2982 struct msghdr msg;
2983 struct sockaddr *sendnam;
2984 mbuf_t mreqcopy;
2985 size_t sentlen = 0;
2986 struct timespec ts = { 2, 0 };
2987
2988 again:
2989 error = nfs_sndlock(req);
2990 if (error) {
2991 lck_mtx_lock(&req->r_mtx);
2992 req->r_error = error;
2993 req->r_flags &= ~R_SENDING;
2994 lck_mtx_unlock(&req->r_mtx);
2995 return (error);
2996 }
2997
2998 error = nfs_sigintr(req->r_nmp, req, NULL, 0);
2999 if (error) {
3000 nfs_sndunlock(req);
3001 lck_mtx_lock(&req->r_mtx);
3002 req->r_error = error;
3003 req->r_flags &= ~R_SENDING;
3004 lck_mtx_unlock(&req->r_mtx);
3005 return (error);
3006 }
3007 nmp = req->r_nmp;
3008 sotype = nmp->nm_sotype;
3009
3010 /*
3011 * If it's a setup RPC but we're not in SETUP... must need reconnect.
3012 * If it's a recovery RPC but the socket's not ready... must need reconnect.
3013 */
3014 if (((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) ||
3015 ((req->r_flags & R_RECOVER) && !(nmp->nm_sockflags & NMSOCK_READY))) {
3016 error = ETIMEDOUT;
3017 nfs_sndunlock(req);
3018 lck_mtx_lock(&req->r_mtx);
3019 req->r_error = error;
3020 req->r_flags &= ~R_SENDING;
3021 lck_mtx_unlock(&req->r_mtx);
3022 return (error);
3023 }
3024
3025 /* If the socket needs reconnection, do that now. */
3026 /* wait until socket is ready - unless this request is part of setup */
3027 lck_mtx_lock(&nmp->nm_lock);
3028 if (!(nmp->nm_sockflags & NMSOCK_READY) &&
3029 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) {
3030 if (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR))
3031 slpflag |= PCATCH;
3032 lck_mtx_unlock(&nmp->nm_lock);
3033 nfs_sndunlock(req);
3034 if (!wait) {
3035 lck_mtx_lock(&req->r_mtx);
3036 req->r_flags &= ~R_SENDING;
3037 req->r_flags |= R_MUSTRESEND;
3038 req->r_rtt = 0;
3039 lck_mtx_unlock(&req->r_mtx);
3040 return (0);
3041 }
3042 NFS_SOCK_DBG("nfs_send: 0x%llx wait reconnect\n", req->r_xid);
3043 lck_mtx_lock(&req->r_mtx);
3044 req->r_flags &= ~R_MUSTRESEND;
3045 req->r_rtt = 0;
3046 lck_mtx_unlock(&req->r_mtx);
3047 lck_mtx_lock(&nmp->nm_lock);
3048 while (!(nmp->nm_sockflags & NMSOCK_READY)) {
3049 /* don't bother waiting if the socket thread won't be reconnecting it */
3050 if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) {
3051 error = EIO;
3052 break;
3053 }
3054 if ((NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && (nmp->nm_reconnect_start > 0)) {
3055 struct timeval now;
3056 microuptime(&now);
3057 if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) {
3058 /* soft mount in reconnect for a while... terminate ASAP */
3059 OSAddAtomic64(1, &nfsstats.rpctimeouts);
3060 req->r_flags |= R_SOFTTERM;
3061 req->r_error = error = ETIMEDOUT;
3062 break;
3063 }
3064 }
3065 /* make sure socket thread is running, then wait */
3066 nfs_mount_sock_thread_wake(nmp);
3067 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
3068 break;
3069 msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait", &ts);
3070 slpflag = 0;
3071 }
3072 lck_mtx_unlock(&nmp->nm_lock);
3073 if (error) {
3074 lck_mtx_lock(&req->r_mtx);
3075 req->r_error = error;
3076 req->r_flags &= ~R_SENDING;
3077 lck_mtx_unlock(&req->r_mtx);
3078 return (error);
3079 }
3080 goto again;
3081 }
3082 nso = nmp->nm_nso;
3083 /* note that we're using the mount's socket to do the send */
3084 nmp->nm_state |= NFSSTA_SENDING; /* will be cleared by nfs_sndunlock() */
3085 lck_mtx_unlock(&nmp->nm_lock);
3086 if (!nso) {
3087 nfs_sndunlock(req);
3088 lck_mtx_lock(&req->r_mtx);
3089 req->r_flags &= ~R_SENDING;
3090 req->r_flags |= R_MUSTRESEND;
3091 req->r_rtt = 0;
3092 lck_mtx_unlock(&req->r_mtx);
3093 return (0);
3094 }
3095
3096 lck_mtx_lock(&req->r_mtx);
3097 rexmit = (req->r_flags & R_SENT);
3098
3099 if (sotype == SOCK_DGRAM) {
3100 lck_mtx_lock(&nmp->nm_lock);
3101 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) {
3102 /* if we can't send this out yet, wait on the cwnd queue */
3103 slpflag = (NMFLAG(nmp, INTR) && req->r_thread) ? PCATCH : 0;
3104 lck_mtx_unlock(&nmp->nm_lock);
3105 nfs_sndunlock(req);
3106 req->r_flags &= ~R_SENDING;
3107 req->r_flags |= R_MUSTRESEND;
3108 lck_mtx_unlock(&req->r_mtx);
3109 if (!wait) {
3110 req->r_rtt = 0;
3111 return (0);
3112 }
3113 lck_mtx_lock(&nmp->nm_lock);
3114 while (nmp->nm_sent >= nmp->nm_cwnd) {
3115 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
3116 break;
3117 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain);
3118 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts);
3119 slpflag = 0;
3120 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) {
3121 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
3122 req->r_cchain.tqe_next = NFSREQNOLIST;
3123 }
3124 }
3125 lck_mtx_unlock(&nmp->nm_lock);
3126 goto again;
3127 }
3128 /*
3129 * We update these *before* the send to avoid racing
3130 * against others who may be looking to send requests.
3131 */
3132 if (!rexmit) {
3133 /* first transmit */
3134 req->r_flags |= R_CWND;
3135 nmp->nm_sent += NFS_CWNDSCALE;
3136 } else {
3137 /*
3138 * When retransmitting, turn timing off
3139 * and divide congestion window by 2.
3140 */
3141 req->r_flags &= ~R_TIMING;
3142 nmp->nm_cwnd >>= 1;
3143 if (nmp->nm_cwnd < NFS_CWNDSCALE)
3144 nmp->nm_cwnd = NFS_CWNDSCALE;
3145 }
3146 lck_mtx_unlock(&nmp->nm_lock);
3147 }
3148
3149 req->r_flags &= ~R_MUSTRESEND;
3150 lck_mtx_unlock(&req->r_mtx);
3151
3152 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL,
3153 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy);
3154 if (error) {
3155 if (wait)
3156 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error);
3157 nfs_sndunlock(req);
3158 lck_mtx_lock(&req->r_mtx);
3159 req->r_flags &= ~R_SENDING;
3160 req->r_flags |= R_MUSTRESEND;
3161 req->r_rtt = 0;
3162 lck_mtx_unlock(&req->r_mtx);
3163 return (0);
3164 }
3165
3166 bzero(&msg, sizeof(msg));
3167 if ((sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so) && ((sendnam = nmp->nm_saddr))) {
3168 msg.msg_name = (caddr_t)sendnam;
3169 msg.msg_namelen = sendnam->sa_len;
3170 }
3171 error = sock_sendmbuf(nso->nso_so, &msg, mreqcopy, 0, &sentlen);
3172 if (error || (sentlen != req->r_mreqlen)) {
3173 NFS_SOCK_DBG("nfs_send: 0x%llx sent %d/%d error %d\n",
3174 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error);
3175 }
3176
3177 if (!error && (sentlen != req->r_mreqlen))
3178 error = EWOULDBLOCK;
3179 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen));
3180
3181 lck_mtx_lock(&req->r_mtx);
3182 req->r_flags &= ~R_SENDING;
3183 req->r_rtt = 0;
3184 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT))
3185 req->r_rexmit = NFS_MAXREXMIT;
3186
3187 if (!error) {
3188 /* SUCCESS */
3189 req->r_flags &= ~R_RESENDERR;
3190 if (rexmit)
3191 OSAddAtomic64(1, &nfsstats.rpcretries);
3192 req->r_flags |= R_SENT;
3193 if (req->r_flags & R_WAITSENT) {
3194 req->r_flags &= ~R_WAITSENT;
3195 wakeup(req);
3196 }
3197 nfs_sndunlock(req);
3198 lck_mtx_unlock(&req->r_mtx);
3199 return (0);
3200 }
3201
3202 /* send failed */
3203 req->r_flags |= R_MUSTRESEND;
3204 if (rexmit)
3205 req->r_flags |= R_RESENDERR;
3206 if ((error == EINTR) || (error == ERESTART))
3207 req->r_error = error;
3208 lck_mtx_unlock(&req->r_mtx);
3209
3210 if (sotype == SOCK_DGRAM) {
3211 /*
3212 * Note: even though a first send may fail, we consider
3213 * the request sent for congestion window purposes.
3214 * So we don't need to undo any of the changes made above.
3215 */
3216 /*
3217 * Socket errors ignored for connectionless sockets??
3218 * For now, ignore them all
3219 */
3220 if ((error != EINTR) && (error != ERESTART) &&
3221 (error != EWOULDBLOCK) && (error != EIO) && (nso == nmp->nm_nso)) {
3222 int clearerror = 0, optlen = sizeof(clearerror);
3223 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
3224 #ifdef NFS_SOCKET_DEBUGGING
3225 if (clearerror)
3226 NFS_SOCK_DBG("nfs_send: ignoring UDP socket error %d so %d\n",
3227 error, clearerror);
3228 #endif
3229 }
3230 }
3231
3232 /* check if it appears we should reconnect the socket */
3233 switch (error) {
3234 case EWOULDBLOCK:
3235 /* if send timed out, reconnect if on TCP */
3236 if (sotype != SOCK_STREAM)
3237 break;
3238 case EPIPE:
3239 case EADDRNOTAVAIL:
3240 case ENETDOWN:
3241 case ENETUNREACH:
3242 case ENETRESET:
3243 case ECONNABORTED:
3244 case ECONNRESET:
3245 case ENOTCONN:
3246 case ESHUTDOWN:
3247 case ECONNREFUSED:
3248 case EHOSTDOWN:
3249 case EHOSTUNREACH:
3250 needrecon = 1;
3251 break;
3252 }
3253 if (needrecon && (nso == nmp->nm_nso)) { /* mark socket as needing reconnect */
3254 NFS_SOCK_DBG("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error);
3255 nfs_need_reconnect(nmp);
3256 }
3257
3258 nfs_sndunlock(req);
3259
3260 if (nfs_is_dead(error, nmp))
3261 error = EIO;
3262
3263 /*
3264 * Don't log some errors:
3265 * EPIPE errors may be common with servers that drop idle connections.
3266 * EADDRNOTAVAIL may occur on network transitions.
3267 * ENOTCONN may occur under some network conditions.
3268 */
3269 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN))
3270 error = 0;
3271 if (error && (error != EINTR) && (error != ERESTART))
3272 log(LOG_INFO, "nfs send error %d for server %s\n", error,
3273 !req->r_nmp ? "<unmounted>" :
3274 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
3275
3276 /* prefer request termination error over other errors */
3277 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
3278 if (error2)
3279 error = error2;
3280
3281 /* only allow the following errors to be returned */
3282 if ((error != EINTR) && (error != ERESTART) && (error != EIO) &&
3283 (error != ENXIO) && (error != ETIMEDOUT))
3284 error = 0;
3285 return (error);
3286 }
3287
3288 /*
3289 * NFS client socket upcalls
3290 *
3291 * Pull RPC replies out of an NFS mount's socket and match them
3292 * up with the pending request.
3293 *
3294 * The datagram code is simple because we always get whole
3295 * messages out of the socket.
3296 *
3297 * The stream code is more involved because we have to parse
3298 * the RPC records out of the stream.
3299 */
3300
3301 /* NFS client UDP socket upcall */
3302 void
3303 nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag)
3304 {
3305 struct nfsmount *nmp = arg;
3306 struct nfs_socket *nso = nmp->nm_nso;
3307 size_t rcvlen;
3308 mbuf_t m;
3309 int error = 0;
3310
3311 if (nmp->nm_sockflags & NMSOCK_CONNECTING)
3312 return;
3313
3314 do {
3315 /* make sure we're on the current socket */
3316 if (!nso || (nso->nso_so != so))
3317 return;
3318
3319 m = NULL;
3320 rcvlen = 1000000;
3321 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
3322 if (m)
3323 nfs_request_match_reply(nmp, m);
3324 } while (m && !error);
3325
3326 if (error && (error != EWOULDBLOCK)) {
3327 /* problems with the socket... mark for reconnection */
3328 NFS_SOCK_DBG("nfs_udp_rcv: need reconnect %d\n", error);
3329 nfs_need_reconnect(nmp);
3330 }
3331 }
3332
3333 /* NFS client TCP socket upcall */
3334 void
3335 nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag)
3336 {
3337 struct nfsmount *nmp = arg;
3338 struct nfs_socket *nso = nmp->nm_nso;
3339 struct nfs_rpc_record_state nrrs;
3340 mbuf_t m;
3341 int error = 0;
3342 int recv = 1;
3343 int wup = 0;
3344
3345 if (nmp->nm_sockflags & NMSOCK_CONNECTING)
3346 return;
3347
3348 /* make sure we're on the current socket */
3349 lck_mtx_lock(&nmp->nm_lock);
3350 nso = nmp->nm_nso;
3351 if (!nso || (nso->nso_so != so) || (nmp->nm_sockflags & (NMSOCK_DISCONNECTING))) {
3352 lck_mtx_unlock(&nmp->nm_lock);
3353 return;
3354 }
3355 lck_mtx_unlock(&nmp->nm_lock);
3356
3357 /* make sure this upcall should be trying to do work */
3358 lck_mtx_lock(&nso->nso_lock);
3359 if (nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) {
3360 lck_mtx_unlock(&nso->nso_lock);
3361 return;
3362 }
3363 nso->nso_flags |= NSO_UPCALL;
3364 nrrs = nso->nso_rrs;
3365 lck_mtx_unlock(&nso->nso_lock);
3366
3367 /* loop while we make error-free progress */
3368 while (!error && recv) {
3369 error = nfs_rpc_record_read(so, &nrrs, MSG_DONTWAIT, &recv, &m);
3370 if (m) /* match completed response with request */
3371 nfs_request_match_reply(nmp, m);
3372 }
3373
3374 /* Update the sockets's rpc parsing state */
3375 lck_mtx_lock(&nso->nso_lock);
3376 nso->nso_rrs = nrrs;
3377 if (nso->nso_flags & NSO_DISCONNECTING)
3378 wup = 1;
3379 nso->nso_flags &= ~NSO_UPCALL;
3380 lck_mtx_unlock(&nso->nso_lock);
3381 if (wup)
3382 wakeup(&nso->nso_flags);
3383
3384 #ifdef NFS_SOCKET_DEBUGGING
3385 if (!recv && (error != EWOULDBLOCK))
3386 NFS_SOCK_DBG("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error);
3387 #endif
3388 /* note: no error and no data indicates server closed its end */
3389 if ((error != EWOULDBLOCK) && (error || !recv)) {
3390 /* problems with the socket... mark for reconnection */
3391 NFS_SOCK_DBG("nfs_tcp_rcv: need reconnect %d\n", error);
3392 nfs_need_reconnect(nmp);
3393 }
3394 }
3395
3396 /*
3397 * "poke" a socket to try to provoke any pending errors
3398 */
3399 void
3400 nfs_sock_poke(struct nfsmount *nmp)
3401 {
3402 struct iovec aio;
3403 struct msghdr msg;
3404 size_t len;
3405 int error = 0;
3406 int dummy;
3407
3408 lck_mtx_lock(&nmp->nm_lock);
3409 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) ||
3410 !(nmp->nm_sockflags & NMSOCK_READY) || !nmp->nm_nso || !nmp->nm_nso->nso_so) {
3411 /* Nothing to poke */
3412 nmp->nm_sockflags &= ~NMSOCK_POKE;
3413 wakeup(&nmp->nm_sockflags);
3414 lck_mtx_unlock(&nmp->nm_lock);
3415 return;
3416 }
3417 lck_mtx_unlock(&nmp->nm_lock);
3418 aio.iov_base = &dummy;
3419 aio.iov_len = 0;
3420 len = 0;
3421 bzero(&msg, sizeof(msg));
3422 msg.msg_iov = &aio;
3423 msg.msg_iovlen = 1;
3424 error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len);
3425 NFS_SOCK_DBG("nfs_sock_poke: error %d\n", error);
3426 lck_mtx_lock(&nmp->nm_lock);
3427 nmp->nm_sockflags &= ~NMSOCK_POKE;
3428 wakeup(&nmp->nm_sockflags);
3429 lck_mtx_unlock(&nmp->nm_lock);
3430 nfs_is_dead(error, nmp);
3431 }
3432
3433 /*
3434 * Match an RPC reply with the corresponding request
3435 */
3436 void
3437 nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
3438 {
3439 struct nfsreq *req;
3440 struct nfsm_chain nmrep;
3441 u_int32_t reply = 0, rxid = 0;
3442 int error = 0, asyncioq, t1;
3443
3444 /* Get the xid and check that it is an rpc reply */
3445 nfsm_chain_dissect_init(error, &nmrep, mrep);
3446 nfsm_chain_get_32(error, &nmrep, rxid);
3447 nfsm_chain_get_32(error, &nmrep, reply);
3448 if (error || (reply != RPC_REPLY)) {
3449 OSAddAtomic64(1, &nfsstats.rpcinvalid);
3450 mbuf_freem(mrep);
3451 return;
3452 }
3453
3454 /*
3455 * Loop through the request list to match up the reply
3456 * Iff no match, just drop it.
3457 */
3458 lck_mtx_lock(nfs_request_mutex);
3459 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
3460 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid)))
3461 continue;
3462 /* looks like we have it, grab lock and double check */
3463 lck_mtx_lock(&req->r_mtx);
3464 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
3465 lck_mtx_unlock(&req->r_mtx);
3466 continue;
3467 }
3468 /* Found it.. */
3469 req->r_nmrep = nmrep;
3470 lck_mtx_lock(&nmp->nm_lock);
3471 if (nmp->nm_sotype == SOCK_DGRAM) {
3472 /*
3473 * Update congestion window.
3474 * Do the additive increase of one rpc/rtt.
3475 */
3476 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
3477 if (nmp->nm_cwnd <= nmp->nm_sent) {
3478 nmp->nm_cwnd +=
3479 ((NFS_CWNDSCALE * NFS_CWNDSCALE) +
3480 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
3481 if (nmp->nm_cwnd > NFS_MAXCWND)
3482 nmp->nm_cwnd = NFS_MAXCWND;
3483 }
3484 if (req->r_flags & R_CWND) {
3485 nmp->nm_sent -= NFS_CWNDSCALE;
3486 req->r_flags &= ~R_CWND;
3487 }
3488 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
3489 /* congestion window is open, poke the cwnd queue */
3490 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
3491 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
3492 req2->r_cchain.tqe_next = NFSREQNOLIST;
3493 wakeup(req2);
3494 }
3495 }
3496 /*
3497 * Update rtt using a gain of 0.125 on the mean
3498 * and a gain of 0.25 on the deviation.
3499 */
3500 if (req->r_flags & R_TIMING) {
3501 /*
3502 * Since the timer resolution of
3503 * NFS_HZ is so course, it can often
3504 * result in r_rtt == 0. Since
3505 * r_rtt == N means that the actual
3506 * rtt is between N+dt and N+2-dt ticks,
3507 * add 1.
3508 */
3509 if (proct[req->r_procnum] == 0)
3510 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum);
3511 t1 = req->r_rtt + 1;
3512 t1 -= (NFS_SRTT(req) >> 3);
3513 NFS_SRTT(req) += t1;
3514 if (t1 < 0)
3515 t1 = -t1;
3516 t1 -= (NFS_SDRTT(req) >> 2);
3517 NFS_SDRTT(req) += t1;
3518 }
3519 nmp->nm_timeouts = 0;
3520 lck_mtx_unlock(&nmp->nm_lock);
3521 /* signal anyone waiting on this request */
3522 wakeup(req);
3523 asyncioq = (req->r_callback.rcb_func != NULL);
3524 if (nfs_request_using_gss(req))
3525 nfs_gss_clnt_rpcdone(req);
3526 lck_mtx_unlock(&req->r_mtx);
3527 lck_mtx_unlock(nfs_request_mutex);
3528 /* if it's an async RPC with a callback, queue it up */
3529 if (asyncioq)
3530 nfs_asyncio_finish(req);
3531 break;
3532 }
3533
3534 if (!req) {
3535 /* not matched to a request, so drop it. */
3536 lck_mtx_unlock(nfs_request_mutex);
3537 OSAddAtomic64(1, &nfsstats.rpcunexpected);
3538 mbuf_freem(mrep);
3539 }
3540 }
3541
3542 /*
3543 * Wait for the reply for a given request...
3544 * ...potentially resending the request if necessary.
3545 */
3546 int
3547 nfs_wait_reply(struct nfsreq *req)
3548 {
3549 struct timespec ts = { 2, 0 };
3550 int error = 0, slpflag, first = 1;
3551
3552 if (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR))
3553 slpflag = PCATCH;
3554 else
3555 slpflag = 0;
3556
3557 lck_mtx_lock(&req->r_mtx);
3558 while (!req->r_nmrep.nmc_mhead) {
3559 if ((error = nfs_sigintr(req->r_nmp, req, first ? NULL : req->r_thread, 0)))
3560 break;
3561 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
3562 break;
3563 /* check if we need to resend */
3564 if (req->r_flags & R_MUSTRESEND) {
3565 NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
3566 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt);
3567 req->r_flags |= R_SENDING;
3568 lck_mtx_unlock(&req->r_mtx);
3569 if (nfs_request_using_gss(req)) {
3570 /*
3571 * It's an RPCSEC_GSS request.
3572 * Can't just resend the original request
3573 * without bumping the cred sequence number.
3574 * Go back and re-build the request.
3575 */
3576 lck_mtx_lock(&req->r_mtx);
3577 req->r_flags &= ~R_SENDING;
3578 lck_mtx_unlock(&req->r_mtx);
3579 return (EAGAIN);
3580 }
3581 error = nfs_send(req, 1);
3582 lck_mtx_lock(&req->r_mtx);
3583 NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
3584 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error);
3585 if (error)
3586 break;
3587 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
3588 break;
3589 }
3590 /* need to poll if we're P_NOREMOTEHANG */
3591 if (nfs_noremotehang(req->r_thread))
3592 ts.tv_sec = 1;
3593 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts);
3594 first = slpflag = 0;
3595 }
3596 lck_mtx_unlock(&req->r_mtx);
3597
3598 return (error);
3599 }
3600
3601 /*
3602 * An NFS request goes something like this:
3603 * (nb: always frees up mreq mbuf list)
3604 * nfs_request_create()
3605 * - allocates a request struct if one is not provided
3606 * - initial fill-in of the request struct
3607 * nfs_request_add_header()
3608 * - add the RPC header
3609 * nfs_request_send()
3610 * - link it into list
3611 * - call nfs_send() for first transmit
3612 * nfs_request_wait()
3613 * - call nfs_wait_reply() to wait for the reply
3614 * nfs_request_finish()
3615 * - break down rpc header and return with error or nfs reply
3616 * pointed to by nmrep.
3617 * nfs_request_rele()
3618 * nfs_request_destroy()
3619 * - clean up the request struct
3620 * - free the request struct if it was allocated by nfs_request_create()
3621 */
3622
3623 /*
3624 * Set up an NFS request struct (allocating if no request passed in).
3625 */
3626 int
3627 nfs_request_create(
3628 nfsnode_t np,
3629 mount_t mp, /* used only if !np */
3630 struct nfsm_chain *nmrest,
3631 int procnum,
3632 thread_t thd,
3633 kauth_cred_t cred,
3634 struct nfsreq **reqp)
3635 {
3636 struct nfsreq *req, *newreq = NULL;
3637 struct nfsmount *nmp;
3638
3639 req = *reqp;
3640 if (!req) {
3641 /* allocate a new NFS request structure */
3642 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK);
3643 if (!newreq) {
3644 mbuf_freem(nmrest->nmc_mhead);
3645 nmrest->nmc_mhead = NULL;
3646 return (ENOMEM);
3647 }
3648 req = newreq;
3649 }
3650
3651 bzero(req, sizeof(*req));
3652 if (req == newreq)
3653 req->r_flags = R_ALLOCATED;
3654
3655 nmp = VFSTONFS(np ? NFSTOMP(np) : mp);
3656 if (nfs_mount_gone(nmp)) {
3657 if (newreq)
3658 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
3659 return (ENXIO);
3660 }
3661 lck_mtx_lock(&nmp->nm_lock);
3662 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) &&
3663 (nmp->nm_state & NFSSTA_TIMEO)) {
3664 lck_mtx_unlock(&nmp->nm_lock);
3665 mbuf_freem(nmrest->nmc_mhead);
3666 nmrest->nmc_mhead = NULL;
3667 if (newreq)
3668 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
3669 return (ENXIO);
3670 }
3671
3672 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS))
3673 OSAddAtomic64(1, &nfsstats.rpccnt[procnum]);
3674 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL))
3675 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
3676
3677 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL);
3678 req->r_nmp = nmp;
3679 nmp->nm_ref++;
3680 req->r_np = np;
3681 req->r_thread = thd;
3682 if (!thd)
3683 req->r_flags |= R_NOINTR;
3684 if (IS_VALID_CRED(cred)) {
3685 kauth_cred_ref(cred);
3686 req->r_cred = cred;
3687 }
3688 req->r_procnum = procnum;
3689 if (proct[procnum] > 0)
3690 req->r_flags |= R_TIMING;
3691 req->r_nmrep.nmc_mhead = NULL;
3692 SLIST_INIT(&req->r_gss_seqlist);
3693 req->r_achain.tqe_next = NFSREQNOLIST;
3694 req->r_rchain.tqe_next = NFSREQNOLIST;
3695 req->r_cchain.tqe_next = NFSREQNOLIST;
3696
3697 /* set auth flavor to use for request */
3698 if (!req->r_cred)
3699 req->r_auth = RPCAUTH_NONE;
3700 else if (req->r_np && (req->r_np->n_auth != RPCAUTH_INVALID))
3701 req->r_auth = req->r_np->n_auth;
3702 else
3703 req->r_auth = nmp->nm_auth;
3704
3705 lck_mtx_unlock(&nmp->nm_lock);
3706
3707 /* move the request mbuf chain to the nfsreq */
3708 req->r_mrest = nmrest->nmc_mhead;
3709 nmrest->nmc_mhead = NULL;
3710
3711 req->r_flags |= R_INITTED;
3712 req->r_refs = 1;
3713 if (newreq)
3714 *reqp = req;
3715 return (0);
3716 }
3717
3718 /*
3719 * Clean up and free an NFS request structure.
3720 */
3721 void
3722 nfs_request_destroy(struct nfsreq *req)
3723 {
3724 struct nfsmount *nmp;
3725 struct gss_seq *gsp, *ngsp;
3726 int clearjbtimeo = 0;
3727
3728 if (!req || !(req->r_flags & R_INITTED))
3729 return;
3730 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3731 req->r_flags &= ~R_INITTED;
3732 if (req->r_lflags & RL_QUEUED)
3733 nfs_reqdequeue(req);
3734
3735 if (req->r_achain.tqe_next != NFSREQNOLIST) {
3736 /*
3737 * Still on an async I/O queue?
3738 * %%% But which one, we may be on a local iod.
3739 */
3740 lck_mtx_lock(nfsiod_mutex);
3741 if (nmp && req->r_achain.tqe_next != NFSREQNOLIST) {
3742 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
3743 req->r_achain.tqe_next = NFSREQNOLIST;
3744 }
3745 lck_mtx_unlock(nfsiod_mutex);
3746 }
3747
3748 lck_mtx_lock(&req->r_mtx);
3749 if (nmp) {
3750 lck_mtx_lock(&nmp->nm_lock);
3751 if (req->r_flags & R_CWND) {
3752 /* Decrement the outstanding request count. */
3753 req->r_flags &= ~R_CWND;
3754 nmp->nm_sent -= NFS_CWNDSCALE;
3755 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
3756 /* congestion window is open, poke the cwnd queue */
3757 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
3758 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
3759 req2->r_cchain.tqe_next = NFSREQNOLIST;
3760 wakeup(req2);
3761 }
3762 }
3763 assert((req->r_flags & R_RESENDQ) == 0);
3764 /* XXX should we just remove this conditional, we should have a reference if we're resending */
3765 if (req->r_rchain.tqe_next != NFSREQNOLIST) {
3766 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
3767 req->r_rchain.tqe_next = NFSREQNOLIST;
3768 if (req->r_flags & R_RESENDQ)
3769 req->r_flags &= ~R_RESENDQ;
3770 }
3771 if (req->r_cchain.tqe_next != NFSREQNOLIST) {
3772 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
3773 req->r_cchain.tqe_next = NFSREQNOLIST;
3774 }
3775 if (req->r_flags & R_JBTPRINTFMSG) {
3776 req->r_flags &= ~R_JBTPRINTFMSG;
3777 nmp->nm_jbreqs--;
3778 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
3779 }
3780 lck_mtx_unlock(&nmp->nm_lock);
3781 }
3782 lck_mtx_unlock(&req->r_mtx);
3783
3784 if (clearjbtimeo)
3785 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL);
3786 if (req->r_mhead)
3787 mbuf_freem(req->r_mhead);
3788 else if (req->r_mrest)
3789 mbuf_freem(req->r_mrest);
3790 if (req->r_nmrep.nmc_mhead)
3791 mbuf_freem(req->r_nmrep.nmc_mhead);
3792 if (IS_VALID_CRED(req->r_cred))
3793 kauth_cred_unref(&req->r_cred);
3794 if (nfs_request_using_gss(req))
3795 nfs_gss_clnt_rpcdone(req);
3796 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp)
3797 FREE(gsp, M_TEMP);
3798 if (req->r_gss_ctx)
3799 nfs_gss_clnt_ctx_unref(req);
3800 if (req->r_wrongsec)
3801 FREE(req->r_wrongsec, M_TEMP);
3802 if (nmp)
3803 nfs_mount_rele(nmp);
3804 lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
3805 if (req->r_flags & R_ALLOCATED)
3806 FREE_ZONE(req, sizeof(*req), M_NFSREQ);
3807 }
3808
3809 void
3810 nfs_request_ref(struct nfsreq *req, int locked)
3811 {
3812 if (!locked)
3813 lck_mtx_lock(&req->r_mtx);
3814 if (req->r_refs <= 0)
3815 panic("nfsreq reference error");
3816 req->r_refs++;
3817 if (!locked)
3818 lck_mtx_unlock(&req->r_mtx);
3819 }
3820
3821 void
3822 nfs_request_rele(struct nfsreq *req)
3823 {
3824 int destroy;
3825
3826 lck_mtx_lock(&req->r_mtx);
3827 if (req->r_refs <= 0)
3828 panic("nfsreq reference underflow");
3829 req->r_refs--;
3830 destroy = (req->r_refs == 0);
3831 lck_mtx_unlock(&req->r_mtx);
3832 if (destroy)
3833 nfs_request_destroy(req);
3834 }
3835
3836
3837 /*
3838 * Add an (updated) RPC header with authorization to an NFS request.
3839 */
3840 int
3841 nfs_request_add_header(struct nfsreq *req)
3842 {
3843 struct nfsmount *nmp;
3844 int error = 0;
3845 mbuf_t m;
3846
3847 /* free up any previous header */
3848 if ((m = req->r_mhead)) {
3849 while (m && (m != req->r_mrest))
3850 m = mbuf_free(m);
3851 req->r_mhead = NULL;
3852 }
3853
3854 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3855 if (nfs_mount_gone(nmp))
3856 return (ENXIO);
3857
3858 error = nfsm_rpchead(req, req->r_mrest, &req->r_xid, &req->r_mhead);
3859 if (error)
3860 return (error);
3861
3862 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead);
3863 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3864 if (nfs_mount_gone(nmp))
3865 return (ENXIO);
3866 lck_mtx_lock(&nmp->nm_lock);
3867 if (NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT))
3868 req->r_retry = nmp->nm_retry;
3869 else
3870 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
3871 lck_mtx_unlock(&nmp->nm_lock);
3872
3873 return (error);
3874 }
3875
3876
3877 /*
3878 * Queue an NFS request up and send it out.
3879 */
3880 int
3881 nfs_request_send(struct nfsreq *req, int wait)
3882 {
3883 struct nfsmount *nmp;
3884 struct timeval now;
3885
3886 lck_mtx_lock(&req->r_mtx);
3887 req->r_flags |= R_SENDING;
3888 lck_mtx_unlock(&req->r_mtx);
3889
3890 lck_mtx_lock(nfs_request_mutex);
3891
3892 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3893 if (nfs_mount_gone(nmp)) {
3894 lck_mtx_unlock(nfs_request_mutex);
3895 return (ENXIO);
3896 }
3897
3898 microuptime(&now);
3899 if (!req->r_start) {
3900 req->r_start = now.tv_sec;
3901 req->r_lastmsg = now.tv_sec -
3902 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
3903 }
3904
3905 OSAddAtomic64(1, &nfsstats.rpcrequests);
3906
3907 /*
3908 * Chain request into list of outstanding requests. Be sure
3909 * to put it LAST so timer finds oldest requests first.
3910 * Make sure that the request queue timer is running
3911 * to check for possible request timeout.
3912 */
3913 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain);
3914 req->r_lflags |= RL_QUEUED;
3915 if (!nfs_request_timer_on) {
3916 nfs_request_timer_on = 1;
3917 nfs_interval_timer_start(nfs_request_timer_call,
3918 NFS_REQUESTDELAY);
3919 }
3920 lck_mtx_unlock(nfs_request_mutex);
3921
3922 /* Send the request... */
3923 return (nfs_send(req, wait));
3924 }
3925
3926 /*
3927 * Call nfs_wait_reply() to wait for the reply.
3928 */
3929 void
3930 nfs_request_wait(struct nfsreq *req)
3931 {
3932 req->r_error = nfs_wait_reply(req);
3933 }
3934
3935 /*
3936 * Finish up an NFS request by dequeueing it and
3937 * doing the initial NFS request reply processing.
3938 */
3939 int
3940 nfs_request_finish(
3941 struct nfsreq *req,
3942 struct nfsm_chain *nmrepp,
3943 int *status)
3944 {
3945 struct nfsmount *nmp;
3946 mbuf_t mrep;
3947 int verf_type = 0;
3948 uint32_t verf_len = 0;
3949 uint32_t reply_status = 0;
3950 uint32_t rejected_status = 0;
3951 uint32_t auth_status = 0;
3952 uint32_t accepted_status = 0;
3953 struct nfsm_chain nmrep;
3954 int error, clearjbtimeo;
3955
3956 error = req->r_error;
3957
3958 if (nmrepp)
3959 nmrepp->nmc_mhead = NULL;
3960
3961 /* RPC done, unlink the request. */
3962 nfs_reqdequeue(req);
3963
3964 mrep = req->r_nmrep.nmc_mhead;
3965
3966 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3967
3968 if ((req->r_flags & R_CWND) && nmp) {
3969 /*
3970 * Decrement the outstanding request count.
3971 */
3972 req->r_flags &= ~R_CWND;
3973 lck_mtx_lock(&nmp->nm_lock);
3974 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
3975 nmp->nm_sent -= NFS_CWNDSCALE;
3976 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
3977 /* congestion window is open, poke the cwnd queue */
3978 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
3979 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
3980 req2->r_cchain.tqe_next = NFSREQNOLIST;
3981 wakeup(req2);
3982 }
3983 lck_mtx_unlock(&nmp->nm_lock);
3984 }
3985
3986 if (nfs_request_using_gss(req)) {
3987 /*
3988 * If the request used an RPCSEC_GSS credential
3989 * then reset its sequence number bit in the
3990 * request window.
3991 */
3992 nfs_gss_clnt_rpcdone(req);
3993
3994 /*
3995 * If we need to re-send, go back and re-build the
3996 * request based on a new sequence number.
3997 * Note that we're using the original XID.
3998 */
3999 if (error == EAGAIN) {
4000 req->r_error = 0;
4001 if (mrep)
4002 mbuf_freem(mrep);
4003 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs
4004 req->r_nmrep.nmc_mhead = NULL;
4005 req->r_flags |= R_RESTART;
4006 if (error == ENEEDAUTH) {
4007 req->r_xid = 0; // get a new XID
4008 error = 0;
4009 }
4010 goto nfsmout;
4011 }
4012 }
4013
4014 /*
4015 * If there was a successful reply, make sure to mark the mount as up.
4016 * If a tprintf message was given (or if this is a timed-out soft mount)
4017 * then post a tprintf message indicating the server is alive again.
4018 */
4019 if (!error) {
4020 if ((req->r_flags & R_TPRINTFMSG) ||
4021 (nmp && (NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) &&
4022 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE|NFSSTA_DEAD)) == NFSSTA_TIMEO)))
4023 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again");
4024 else
4025 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL);
4026 }
4027 if (!error && !nmp)
4028 error = ENXIO;
4029 nfsmout_if(error);
4030
4031 /*
4032 * break down the RPC header and check if ok
4033 */
4034 nmrep = req->r_nmrep;
4035 nfsm_chain_get_32(error, &nmrep, reply_status);
4036 nfsmout_if(error);
4037 if (reply_status == RPC_MSGDENIED) {
4038 nfsm_chain_get_32(error, &nmrep, rejected_status);
4039 nfsmout_if(error);
4040 if (rejected_status == RPC_MISMATCH) {
4041 error = ENOTSUP;
4042 goto nfsmout;
4043 }
4044 nfsm_chain_get_32(error, &nmrep, auth_status);
4045 nfsmout_if(error);
4046 switch (auth_status) {
4047 case RPCSEC_GSS_CREDPROBLEM:
4048 case RPCSEC_GSS_CTXPROBLEM:
4049 /*
4050 * An RPCSEC_GSS cred or context problem.
4051 * We can't use it anymore.
4052 * Restore the args, renew the context
4053 * and set up for a resend.
4054 */
4055 error = nfs_gss_clnt_args_restore(req);
4056 if (error && error != ENEEDAUTH)
4057 break;
4058
4059 if (!error) {
4060 error = nfs_gss_clnt_ctx_renew(req);
4061 if (error)
4062 break;
4063 }
4064 mbuf_freem(mrep);
4065 req->r_nmrep.nmc_mhead = NULL;
4066 req->r_xid = 0; // get a new XID
4067 req->r_flags |= R_RESTART;
4068 goto nfsmout;
4069 default:
4070 error = EACCES;
4071 break;
4072 }
4073 goto nfsmout;
4074 }
4075
4076 /* Now check the verifier */
4077 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor
4078 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length
4079 nfsmout_if(error);
4080
4081 switch (req->r_auth) {
4082 case RPCAUTH_NONE:
4083 case RPCAUTH_SYS:
4084 /* Any AUTH_SYS verifier is ignored */
4085 if (verf_len > 0)
4086 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
4087 nfsm_chain_get_32(error, &nmrep, accepted_status);
4088 break;
4089 case RPCAUTH_KRB5:
4090 case RPCAUTH_KRB5I:
4091 case RPCAUTH_KRB5P:
4092 error = nfs_gss_clnt_verf_get(req, &nmrep,
4093 verf_type, verf_len, &accepted_status);
4094 break;
4095 }
4096 nfsmout_if(error);
4097
4098 switch (accepted_status) {
4099 case RPC_SUCCESS:
4100 if (req->r_procnum == NFSPROC_NULL) {
4101 /*
4102 * The NFS null procedure is unique,
4103 * in not returning an NFS status.
4104 */
4105 *status = NFS_OK;
4106 } else {
4107 nfsm_chain_get_32(error, &nmrep, *status);
4108 nfsmout_if(error);
4109 }
4110
4111 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) {
4112 /*
4113 * It's a JUKEBOX error - delay and try again
4114 */
4115 int delay, slpflag = (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
4116
4117 mbuf_freem(mrep);
4118 req->r_nmrep.nmc_mhead = NULL;
4119 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) {
4120 /* we're not yet completely mounted and */
4121 /* we can't complete an RPC, so we fail */
4122 OSAddAtomic64(1, &nfsstats.rpctimeouts);
4123 nfs_softterm(req);
4124 error = req->r_error;
4125 goto nfsmout;
4126 }
4127 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2);
4128 if (req->r_delay > 30)
4129 req->r_delay = 30;
4130 if (nmp->nm_tprintf_initial_delay && (req->r_delay >= nmp->nm_tprintf_initial_delay)) {
4131 if (!(req->r_flags & R_JBTPRINTFMSG)) {
4132 req->r_flags |= R_JBTPRINTFMSG;
4133 lck_mtx_lock(&nmp->nm_lock);
4134 nmp->nm_jbreqs++;
4135 lck_mtx_unlock(&nmp->nm_lock);
4136 }
4137 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO,
4138 "resource temporarily unavailable (jukebox)", 0);
4139 }
4140 if ((NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && (req->r_delay == 30) &&
4141 !(req->r_flags & R_NOINTR)) {
4142 /* for soft mounts, just give up after a short while */
4143 OSAddAtomic64(1, &nfsstats.rpctimeouts);
4144 nfs_softterm(req);
4145 error = req->r_error;
4146 goto nfsmout;
4147 }
4148 delay = req->r_delay;
4149 if (req->r_callback.rcb_func) {
4150 struct timeval now;
4151 microuptime(&now);
4152 req->r_resendtime = now.tv_sec + delay;
4153 } else {
4154 do {
4155 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
4156 goto nfsmout;
4157 tsleep(nfs_request_finish, PSOCK|slpflag, "nfs_jukebox_trylater", hz);
4158 slpflag = 0;
4159 } while (--delay > 0);
4160 }
4161 req->r_xid = 0; // get a new XID
4162 req->r_flags |= R_RESTART;
4163 req->r_start = 0;
4164 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER);
4165 return (0);
4166 }
4167
4168 if (req->r_flags & R_JBTPRINTFMSG) {
4169 req->r_flags &= ~R_JBTPRINTFMSG;
4170 lck_mtx_lock(&nmp->nm_lock);
4171 nmp->nm_jbreqs--;
4172 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
4173 lck_mtx_unlock(&nmp->nm_lock);
4174 nfs_up(nmp, req->r_thread, clearjbtimeo, "resource available again");
4175 }
4176
4177 if ((nmp->nm_vers >= NFS_VER4) && (*status == NFSERR_WRONGSEC)) {
4178 /*
4179 * Hmmm... we need to try a different security flavor.
4180 * The first time a request hits this, we will allocate an array
4181 * to track flavors to try. We fill the array with the mount's
4182 * preferred flavors or the server's preferred flavors or just the
4183 * flavors we support.
4184 */
4185 uint32_t srvflavors[NX_MAX_SEC_FLAVORS];
4186 int srvcount, i, j;
4187
4188 /* Call SECINFO to try to get list of flavors from server. */
4189 srvcount = NX_MAX_SEC_FLAVORS;
4190 nfs4_secinfo_rpc(nmp, &req->r_secinfo, req->r_cred, srvflavors, &srvcount);
4191
4192 if (!req->r_wrongsec) {
4193 /* first time... set up flavor array */
4194 MALLOC(req->r_wrongsec, uint32_t*, NX_MAX_SEC_FLAVORS*sizeof(uint32_t), M_TEMP, M_WAITOK);
4195 if (!req->r_wrongsec) {
4196 error = EACCES;
4197 goto nfsmout;
4198 }
4199 i=0;
4200 if (nmp->nm_sec.count) { /* use the mount's preferred list of flavors */
4201 for(; i < nmp->nm_sec.count; i++)
4202 req->r_wrongsec[i] = nmp->nm_sec.flavors[i];
4203 } else if (srvcount) { /* otherwise use the server's list of flavors */
4204 for(; i < srvcount; i++)
4205 req->r_wrongsec[i] = srvflavors[i];
4206 } else { /* otherwise, just try the flavors we support. */
4207 req->r_wrongsec[i++] = RPCAUTH_KRB5P;
4208 req->r_wrongsec[i++] = RPCAUTH_KRB5I;
4209 req->r_wrongsec[i++] = RPCAUTH_KRB5;
4210 req->r_wrongsec[i++] = RPCAUTH_SYS;
4211 req->r_wrongsec[i++] = RPCAUTH_NONE;
4212 }
4213 for(; i < NX_MAX_SEC_FLAVORS; i++) /* invalidate any remaining slots */
4214 req->r_wrongsec[i] = RPCAUTH_INVALID;
4215 }
4216
4217 /* clear the current flavor from the list */
4218 for(i=0; i < NX_MAX_SEC_FLAVORS; i++)
4219 if (req->r_wrongsec[i] == req->r_auth)
4220 req->r_wrongsec[i] = RPCAUTH_INVALID;
4221
4222 /* find the next flavor to try */
4223 for(i=0; i < NX_MAX_SEC_FLAVORS; i++)
4224 if (req->r_wrongsec[i] != RPCAUTH_INVALID) {
4225 if (!srvcount) /* no server list, just try it */
4226 break;
4227 /* check that it's in the server's list */
4228 for(j=0; j < srvcount; j++)
4229 if (req->r_wrongsec[i] == srvflavors[j])
4230 break;
4231 if (j < srvcount) /* found */
4232 break;
4233 /* not found in server list */
4234 req->r_wrongsec[i] = RPCAUTH_INVALID;
4235 }
4236 if (i == NX_MAX_SEC_FLAVORS) {
4237 /* nothing left to try! */
4238 error = EACCES;
4239 goto nfsmout;
4240 }
4241
4242 /* retry with the next auth flavor */
4243 req->r_auth = req->r_wrongsec[i];
4244 req->r_xid = 0; // get a new XID
4245 req->r_flags |= R_RESTART;
4246 req->r_start = 0;
4247 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_WRONGSEC);
4248 return (0);
4249 }
4250 if ((nmp->nm_vers >= NFS_VER4) && req->r_wrongsec) {
4251 /*
4252 * We renegotiated security for this request; so update the
4253 * default security flavor for the associated node.
4254 */
4255 if (req->r_np)
4256 req->r_np->n_auth = req->r_auth;
4257 }
4258
4259 if (*status == NFS_OK) {
4260 /*
4261 * Successful NFS request
4262 */
4263 *nmrepp = nmrep;
4264 req->r_nmrep.nmc_mhead = NULL;
4265 break;
4266 }
4267 /* Got an NFS error of some kind */
4268
4269 /*
4270 * If the File Handle was stale, invalidate the
4271 * lookup cache, just in case.
4272 */
4273 if ((*status == ESTALE) && req->r_np) {
4274 cache_purge(NFSTOV(req->r_np));
4275 /* if monitored, also send delete event */
4276 if (vnode_ismonitored(NFSTOV(req->r_np)))
4277 nfs_vnode_notify(req->r_np, (VNODE_EVENT_ATTRIB|VNODE_EVENT_DELETE));
4278 }
4279 if (nmp->nm_vers == NFS_VER2)
4280 mbuf_freem(mrep);
4281 else
4282 *nmrepp = nmrep;
4283 req->r_nmrep.nmc_mhead = NULL;
4284 error = 0;
4285 break;
4286 case RPC_PROGUNAVAIL:
4287 error = EPROGUNAVAIL;
4288 break;
4289 case RPC_PROGMISMATCH:
4290 error = ERPCMISMATCH;
4291 break;
4292 case RPC_PROCUNAVAIL:
4293 error = EPROCUNAVAIL;
4294 break;
4295 case RPC_GARBAGE:
4296 error = EBADRPC;
4297 break;
4298 case RPC_SYSTEM_ERR:
4299 default:
4300 error = EIO;
4301 break;
4302 }
4303 nfsmout:
4304 if (req->r_flags & R_JBTPRINTFMSG) {
4305 req->r_flags &= ~R_JBTPRINTFMSG;
4306 lck_mtx_lock(&nmp->nm_lock);
4307 nmp->nm_jbreqs--;
4308 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
4309 lck_mtx_unlock(&nmp->nm_lock);
4310 if (clearjbtimeo)
4311 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL);
4312 }
4313 FSDBG(273, R_XID32(req->r_xid), nmp, req,
4314 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error);
4315 return (error);
4316 }
4317
4318 /*
4319 * NFS request using a GSS/Kerberos security flavor?
4320 */
4321 int
4322 nfs_request_using_gss(struct nfsreq *req)
4323 {
4324 if (!req->r_gss_ctx)
4325 return (0);
4326 switch (req->r_auth) {
4327 case RPCAUTH_KRB5:
4328 case RPCAUTH_KRB5I:
4329 case RPCAUTH_KRB5P:
4330 return (1);
4331 }
4332 return (0);
4333 }
4334
4335 /*
4336 * Perform an NFS request synchronously.
4337 */
4338
4339 int
4340 nfs_request(
4341 nfsnode_t np,
4342 mount_t mp, /* used only if !np */
4343 struct nfsm_chain *nmrest,
4344 int procnum,
4345 vfs_context_t ctx,
4346 struct nfsreq_secinfo_args *si,
4347 struct nfsm_chain *nmrepp,
4348 u_int64_t *xidp,
4349 int *status)
4350 {
4351 return nfs_request2(np, mp, nmrest, procnum,
4352 vfs_context_thread(ctx), vfs_context_ucred(ctx),
4353 si, 0, nmrepp, xidp, status);
4354 }
4355
4356 int
4357 nfs_request2(
4358 nfsnode_t np,
4359 mount_t mp, /* used only if !np */
4360 struct nfsm_chain *nmrest,
4361 int procnum,
4362 thread_t thd,
4363 kauth_cred_t cred,
4364 struct nfsreq_secinfo_args *si,
4365 int flags,
4366 struct nfsm_chain *nmrepp,
4367 u_int64_t *xidp,
4368 int *status)
4369 {
4370 struct nfsreq rq, *req = &rq;
4371 int error;
4372
4373 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req)))
4374 return (error);
4375 req->r_flags |= (flags & (R_OPTMASK | R_SOFT));
4376 if (si)
4377 req->r_secinfo = *si;
4378
4379 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0);
4380 do {
4381 req->r_error = 0;
4382 req->r_flags &= ~R_RESTART;
4383 if ((error = nfs_request_add_header(req)))
4384 break;
4385 if (xidp)
4386 *xidp = req->r_xid;
4387 if ((error = nfs_request_send(req, 1)))
4388 break;
4389 nfs_request_wait(req);
4390 if ((error = nfs_request_finish(req, nmrepp, status)))
4391 break;
4392 } while (req->r_flags & R_RESTART);
4393
4394 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error);
4395 nfs_request_rele(req);
4396 return (error);
4397 }
4398
4399
4400 /*
4401 * Set up a new null proc request to exchange GSS context tokens with the
4402 * server. Associate the context that we are setting up with the request that we
4403 * are sending.
4404 */
4405
4406 int
4407 nfs_request_gss(
4408 mount_t mp,
4409 struct nfsm_chain *nmrest,
4410 thread_t thd,
4411 kauth_cred_t cred,
4412 int flags,
4413 struct nfs_gss_clnt_ctx *cp, /* Set to gss context to renew or setup */
4414 struct nfsm_chain *nmrepp,
4415 int *status)
4416 {
4417 struct nfsreq rq, *req = &rq;
4418 int error, wait = 1;
4419
4420 if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req)))
4421 return (error);
4422 req->r_flags |= (flags & R_OPTMASK);
4423
4424 if (cp == NULL) {
4425 printf("nfs_request_gss request has no context\n");
4426 nfs_request_rele(req);
4427 return (NFSERR_EAUTH);
4428 }
4429 nfs_gss_clnt_ctx_ref(req, cp);
4430
4431 /*
4432 * Don't wait for a reply to a context destroy advisory
4433 * to avoid hanging on a dead server.
4434 */
4435 if (cp->gss_clnt_proc == RPCSEC_GSS_DESTROY)
4436 wait = 0;
4437
4438 FSDBG_TOP(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, 0);
4439 do {
4440 req->r_error = 0;
4441 req->r_flags &= ~R_RESTART;
4442 if ((error = nfs_request_add_header(req)))
4443 break;
4444
4445 if ((error = nfs_request_send(req, wait)))
4446 break;
4447 if (!wait)
4448 break;
4449
4450 nfs_request_wait(req);
4451 if ((error = nfs_request_finish(req, nmrepp, status)))
4452 break;
4453 } while (req->r_flags & R_RESTART);
4454
4455 FSDBG_BOT(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, error);
4456
4457 nfs_gss_clnt_ctx_unref(req);
4458 nfs_request_rele(req);
4459
4460 return (error);
4461 }
4462
4463 /*
4464 * Create and start an asynchronous NFS request.
4465 */
4466 int
4467 nfs_request_async(
4468 nfsnode_t np,
4469 mount_t mp, /* used only if !np */
4470 struct nfsm_chain *nmrest,
4471 int procnum,
4472 thread_t thd,
4473 kauth_cred_t cred,
4474 struct nfsreq_secinfo_args *si,
4475 int flags,
4476 struct nfsreq_cbinfo *cb,
4477 struct nfsreq **reqp)
4478 {
4479 struct nfsreq *req;
4480 struct nfsmount *nmp;
4481 int error, sent;
4482
4483 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp);
4484 req = *reqp;
4485 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error);
4486 if (error)
4487 return (error);
4488 req->r_flags |= (flags & R_OPTMASK);
4489 req->r_flags |= R_ASYNC;
4490 if (si)
4491 req->r_secinfo = *si;
4492 if (cb)
4493 req->r_callback = *cb;
4494 error = nfs_request_add_header(req);
4495 if (!error) {
4496 req->r_flags |= R_WAITSENT;
4497 if (req->r_callback.rcb_func)
4498 nfs_request_ref(req, 0);
4499 error = nfs_request_send(req, 1);
4500 lck_mtx_lock(&req->r_mtx);
4501 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) {
4502 /* make sure to wait until this async I/O request gets sent */
4503 int slpflag = (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
4504 struct timespec ts = { 2, 0 };
4505 while (!(req->r_flags & R_SENT)) {
4506 nmp = req->r_nmp;
4507 if ((req->r_flags & R_RESENDQ) && !nfs_mount_gone(nmp)) {
4508 lck_mtx_lock(&nmp->nm_lock);
4509 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) {
4510 /*
4511 * It's not going to get off the resend queue if we're in recovery.
4512 * So, just take it off ourselves. We could be holding mount state
4513 * busy and thus holding up the start of recovery.
4514 */
4515 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
4516 req->r_rchain.tqe_next = NFSREQNOLIST;
4517 if (req->r_flags & R_RESENDQ)
4518 req->r_flags &= ~R_RESENDQ;
4519 lck_mtx_unlock(&nmp->nm_lock);
4520 req->r_flags |= R_SENDING;
4521 lck_mtx_unlock(&req->r_mtx);
4522 error = nfs_send(req, 1);
4523 /* Remove the R_RESENDQ reference */
4524 nfs_request_rele(req);
4525 lck_mtx_lock(&req->r_mtx);
4526 if (error)
4527 break;
4528 continue;
4529 }
4530 lck_mtx_unlock(&nmp->nm_lock);
4531 }
4532 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
4533 break;
4534 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts);
4535 slpflag = 0;
4536 }
4537 }
4538 sent = req->r_flags & R_SENT;
4539 lck_mtx_unlock(&req->r_mtx);
4540 if (error && req->r_callback.rcb_func && !sent) {
4541 nfs_request_rele(req);
4542 }
4543 }
4544 FSDBG(274, R_XID32(req->r_xid), np, procnum, error);
4545 if (error || req->r_callback.rcb_func)
4546 nfs_request_rele(req);
4547
4548 return (error);
4549 }
4550
4551 /*
4552 * Wait for and finish an asynchronous NFS request.
4553 */
4554 int
4555 nfs_request_async_finish(
4556 struct nfsreq *req,
4557 struct nfsm_chain *nmrepp,
4558 u_int64_t *xidp,
4559 int *status)
4560 {
4561 int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0;
4562 struct nfsmount *nmp;
4563
4564 lck_mtx_lock(&req->r_mtx);
4565 if (!asyncio)
4566 req->r_flags |= R_ASYNCWAIT;
4567 while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */
4568 struct timespec ts = { 2, 0 };
4569
4570 if ((nmp = req->r_nmp)) {
4571 lck_mtx_lock(&nmp->nm_lock);
4572 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) {
4573 /*
4574 * It's not going to get off the resend queue if we're in recovery.
4575 * So, just take it off ourselves. We could be holding mount state
4576 * busy and thus holding up the start of recovery.
4577 */
4578 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
4579 req->r_rchain.tqe_next = NFSREQNOLIST;
4580 if (req->r_flags & R_RESENDQ)
4581 req->r_flags &= ~R_RESENDQ;
4582 /* Remove the R_RESENDQ reference */
4583 assert(req->r_refs > 0);
4584 req->r_refs--;
4585 lck_mtx_unlock(&nmp->nm_lock);
4586 break;
4587 }
4588 lck_mtx_unlock(&nmp->nm_lock);
4589 }
4590 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
4591 break;
4592 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", &ts);
4593 }
4594 lck_mtx_unlock(&req->r_mtx);
4595
4596 if (!error) {
4597 nfs_request_wait(req);
4598 error = nfs_request_finish(req, nmrepp, status);
4599 }
4600
4601 while (!error && (req->r_flags & R_RESTART)) {
4602 if (asyncio) {
4603 assert(req->r_achain.tqe_next == NFSREQNOLIST);
4604 lck_mtx_lock(&req->r_mtx);
4605 req->r_flags &= ~R_IOD;
4606 if (req->r_resendtime) { /* send later */
4607 nfs_asyncio_resend(req);
4608 lck_mtx_unlock(&req->r_mtx);
4609 return (EINPROGRESS);
4610 }
4611 lck_mtx_unlock(&req->r_mtx);
4612 }
4613 req->r_error = 0;
4614 req->r_flags &= ~R_RESTART;
4615 if ((error = nfs_request_add_header(req)))
4616 break;
4617 if ((error = nfs_request_send(req, !asyncio)))
4618 break;
4619 if (asyncio)
4620 return (EINPROGRESS);
4621 nfs_request_wait(req);
4622 if ((error = nfs_request_finish(req, nmrepp, status)))
4623 break;
4624 }
4625 if (xidp)
4626 *xidp = req->r_xid;
4627
4628 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error);
4629 nfs_request_rele(req);
4630 return (error);
4631 }
4632
4633 /*
4634 * Cancel a pending asynchronous NFS request.
4635 */
4636 void
4637 nfs_request_async_cancel(struct nfsreq *req)
4638 {
4639 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E);
4640 nfs_request_rele(req);
4641 }
4642
4643 /*
4644 * Flag a request as being terminated.
4645 */
4646 void
4647 nfs_softterm(struct nfsreq *req)
4648 {
4649 struct nfsmount *nmp = req->r_nmp;
4650 req->r_flags |= R_SOFTTERM;
4651 req->r_error = ETIMEDOUT;
4652 if (!(req->r_flags & R_CWND) || nfs_mount_gone(nmp))
4653 return;
4654 /* update congestion window */
4655 req->r_flags &= ~R_CWND;
4656 lck_mtx_lock(&nmp->nm_lock);
4657 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
4658 nmp->nm_sent -= NFS_CWNDSCALE;
4659 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
4660 /* congestion window is open, poke the cwnd queue */
4661 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
4662 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
4663 req2->r_cchain.tqe_next = NFSREQNOLIST;
4664 wakeup(req2);
4665 }
4666 lck_mtx_unlock(&nmp->nm_lock);
4667 }
4668
4669 /*
4670 * Ensure req isn't in use by the timer, then dequeue it.
4671 */
4672 void
4673 nfs_reqdequeue(struct nfsreq *req)
4674 {
4675 lck_mtx_lock(nfs_request_mutex);
4676 while (req->r_lflags & RL_BUSY) {
4677 req->r_lflags |= RL_WAITING;
4678 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL);
4679 }
4680 if (req->r_lflags & RL_QUEUED) {
4681 TAILQ_REMOVE(&nfs_reqq, req, r_chain);
4682 req->r_lflags &= ~RL_QUEUED;
4683 }
4684 lck_mtx_unlock(nfs_request_mutex);
4685 }
4686
4687 /*
4688 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
4689 * free()'d out from under it.
4690 */
4691 void
4692 nfs_reqbusy(struct nfsreq *req)
4693 {
4694 if (req->r_lflags & RL_BUSY)
4695 panic("req locked");
4696 req->r_lflags |= RL_BUSY;
4697 }
4698
4699 /*
4700 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
4701 */
4702 struct nfsreq *
4703 nfs_reqnext(struct nfsreq *req)
4704 {
4705 struct nfsreq * nextreq;
4706
4707 if (req == NULL)
4708 return (NULL);
4709 /*
4710 * We need to get and busy the next req before signalling the
4711 * current one, otherwise wakeup() may block us and we'll race to
4712 * grab the next req.
4713 */
4714 nextreq = TAILQ_NEXT(req, r_chain);
4715 if (nextreq != NULL)
4716 nfs_reqbusy(nextreq);
4717 /* unbusy and signal. */
4718 req->r_lflags &= ~RL_BUSY;
4719 if (req->r_lflags & RL_WAITING) {
4720 req->r_lflags &= ~RL_WAITING;
4721 wakeup(&req->r_lflags);
4722 }
4723 return (nextreq);
4724 }
4725
4726 /*
4727 * NFS request queue timer routine
4728 *
4729 * Scan the NFS request queue for any requests that have timed out.
4730 *
4731 * Alert the system of unresponsive servers.
4732 * Mark expired requests on soft mounts as terminated.
4733 * For UDP, mark/signal requests for retransmission.
4734 */
4735 void
4736 nfs_request_timer(__unused void *param0, __unused void *param1)
4737 {
4738 struct nfsreq *req;
4739 struct nfsmount *nmp;
4740 int timeo, maxtime, finish_asyncio, error;
4741 struct timeval now;
4742 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue;
4743
4744 restart:
4745 lck_mtx_lock(nfs_request_mutex);
4746 req = TAILQ_FIRST(&nfs_reqq);
4747 if (req == NULL) { /* no requests - turn timer off */
4748 nfs_request_timer_on = 0;
4749 lck_mtx_unlock(nfs_request_mutex);
4750 return;
4751 }
4752
4753 nfs_reqbusy(req);
4754 TAILQ_INIT(&nfs_mount_poke_queue);
4755
4756 microuptime(&now);
4757 for ( ; req != NULL ; req = nfs_reqnext(req)) {
4758 nmp = req->r_nmp;
4759 if (nmp == NULL) {
4760 NFS_SOCK_DBG("Found a request with out a mount!\n");
4761 continue;
4762 }
4763 if (req->r_error || req->r_nmrep.nmc_mhead)
4764 continue;
4765 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) {
4766 if (req->r_callback.rcb_func != NULL) {
4767 /* async I/O RPC needs to be finished */
4768 lck_mtx_lock(&req->r_mtx);
4769 req->r_error = error;
4770 finish_asyncio = !(req->r_flags & R_WAITSENT);
4771 wakeup(req);
4772 lck_mtx_unlock(&req->r_mtx);
4773 if (finish_asyncio)
4774 nfs_asyncio_finish(req);
4775 }
4776 continue;
4777 }
4778
4779 lck_mtx_lock(&req->r_mtx);
4780
4781 if (nmp->nm_tprintf_initial_delay &&
4782 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) &&
4783 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
4784 req->r_lastmsg = now.tv_sec;
4785 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
4786 "not responding", 1);
4787 req->r_flags |= R_TPRINTFMSG;
4788 lck_mtx_lock(&nmp->nm_lock);
4789 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
4790 lck_mtx_unlock(&nmp->nm_lock);
4791 /* we're not yet completely mounted and */
4792 /* we can't complete an RPC, so we fail */
4793 OSAddAtomic64(1, &nfsstats.rpctimeouts);
4794 nfs_softterm(req);
4795 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
4796 wakeup(req);
4797 lck_mtx_unlock(&req->r_mtx);
4798 if (finish_asyncio)
4799 nfs_asyncio_finish(req);
4800 continue;
4801 }
4802 lck_mtx_unlock(&nmp->nm_lock);
4803 }
4804
4805 /*
4806 * Put a reasonable limit on the maximum timeout,
4807 * and reduce that limit when soft mounts get timeouts or are in reconnect.
4808 */
4809 if (!(NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && !nfs_can_squish(nmp))
4810 maxtime = NFS_MAXTIMEO;
4811 else if ((req->r_flags & (R_SETUP|R_RECOVER)) ||
4812 ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
4813 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2;
4814 else
4815 maxtime = NFS_MINTIMEO/4;
4816
4817 /*
4818 * Check for request timeout.
4819 */
4820 if (req->r_rtt >= 0) {
4821 req->r_rtt++;
4822 lck_mtx_lock(&nmp->nm_lock);
4823 if (req->r_flags & R_RESENDERR) {
4824 /* with resend errors, retry every few seconds */
4825 timeo = 4*hz;
4826 } else {
4827 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL)
4828 timeo = NFS_MINIDEMTIMEO; // gss context setup
4829 else if (NMFLAG(nmp, DUMBTIMER))
4830 timeo = nmp->nm_timeo;
4831 else
4832 timeo = NFS_RTO(nmp, proct[req->r_procnum]);
4833
4834 /* ensure 62.5 ms floor */
4835 while (16 * timeo < hz)
4836 timeo *= 2;
4837 if (nmp->nm_timeouts > 0)
4838 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
4839 }
4840 /* limit timeout to max */
4841 if (timeo > maxtime)
4842 timeo = maxtime;
4843 if (req->r_rtt <= timeo) {
4844 NFS_SOCK_DBG("nfs timeout: req time %d and timeo is %d continue\n", req->r_rtt, timeo);
4845 lck_mtx_unlock(&nmp->nm_lock);
4846 lck_mtx_unlock(&req->r_mtx);
4847 continue;
4848 }
4849 /* The request has timed out */
4850 NFS_SOCK_DBG("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
4851 req->r_procnum, proct[req->r_procnum],
4852 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts,
4853 (now.tv_sec - req->r_start)*NFS_HZ, maxtime);
4854 if (nmp->nm_timeouts < 8)
4855 nmp->nm_timeouts++;
4856 if (nfs_mount_check_dead_timeout(nmp)) {
4857 /* Unbusy this request */
4858 req->r_lflags &= ~RL_BUSY;
4859 if (req->r_lflags & RL_WAITING) {
4860 req->r_lflags &= ~RL_WAITING;
4861 wakeup(&req->r_lflags);
4862 }
4863 lck_mtx_unlock(&req->r_mtx);
4864
4865 /* No need to poke this mount */
4866 if (nmp->nm_sockflags & NMSOCK_POKE) {
4867 nmp->nm_sockflags &= ~NMSOCK_POKE;
4868 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
4869 }
4870 /* Release our lock state, so we can become a zombie */
4871 lck_mtx_unlock(nfs_request_mutex);
4872
4873 /*
4874 * Note nfs_mount_make zombie(nmp) must be
4875 * called with nm_lock held. After doing some
4876 * work we release nm_lock in
4877 * nfs_make_mount_zombie with out acquiring any
4878 * other locks. (Later, in nfs_mount_zombie we
4879 * will acquire nfs_request_mutex, r_mtx,
4880 * nm_lock in that order). So we should not be
4881 * introducing deadlock here. We take a reference
4882 * on the mount so that its still there when we
4883 * release the lock.
4884 */
4885 nmp->nm_ref++;
4886 nfs_mount_make_zombie(nmp);
4887 lck_mtx_unlock(&nmp->nm_lock);
4888 nfs_mount_rele(nmp);
4889
4890 /*
4891 * All the request for this mount have now been
4892 * removed from the request queue. Restart to
4893 * process the remaining mounts
4894 */
4895 goto restart;
4896 }
4897
4898 /* if it's been a few seconds, try poking the socket */
4899 if ((nmp->nm_sotype == SOCK_STREAM) &&
4900 ((now.tv_sec - req->r_start) >= 3) &&
4901 !(nmp->nm_sockflags & (NMSOCK_POKE|NMSOCK_UNMOUNT)) &&
4902 (nmp->nm_sockflags & NMSOCK_READY)) {
4903 nmp->nm_sockflags |= NMSOCK_POKE;
4904 /*
4905 * We take a ref on the mount so that we know the mount will still be there
4906 * when we process the nfs_mount_poke_queue. An unmount request will block
4907 * in nfs_mount_drain_and_cleanup until after the poke is finished. We release
4908 * the reference after calling nfs_sock_poke below;
4909 */
4910 nmp->nm_ref++;
4911 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq);
4912 }
4913 lck_mtx_unlock(&nmp->nm_lock);
4914 }
4915
4916 /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */
4917 if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP|R_RECOVER|R_SOFT))) &&
4918 ((req->r_rexmit >= req->r_retry) || /* too many */
4919 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */
4920 OSAddAtomic64(1, &nfsstats.rpctimeouts);
4921 lck_mtx_lock(&nmp->nm_lock);
4922 if (!(nmp->nm_state & NFSSTA_TIMEO)) {
4923 lck_mtx_unlock(&nmp->nm_lock);
4924 /* make sure we note the unresponsive server */
4925 /* (maxtime may be less than tprintf delay) */
4926 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
4927 "not responding", 1);
4928 req->r_lastmsg = now.tv_sec;
4929 req->r_flags |= R_TPRINTFMSG;
4930 } else {
4931 lck_mtx_unlock(&nmp->nm_lock);
4932 }
4933 if (req->r_flags & R_NOINTR) {
4934 /* don't terminate nointr requests on timeout */
4935 lck_mtx_unlock(&req->r_mtx);
4936 continue;
4937 }
4938 NFS_SOCK_DBG("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
4939 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt,
4940 now.tv_sec - req->r_start);
4941 nfs_softterm(req);
4942 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
4943 wakeup(req);
4944 lck_mtx_unlock(&req->r_mtx);
4945 if (finish_asyncio)
4946 nfs_asyncio_finish(req);
4947 continue;
4948 }
4949
4950 /* for TCP, only resend if explicitly requested */
4951 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) {
4952 if (++req->r_rexmit > NFS_MAXREXMIT)
4953 req->r_rexmit = NFS_MAXREXMIT;
4954 req->r_rtt = 0;
4955 lck_mtx_unlock(&req->r_mtx);
4956 continue;
4957 }
4958
4959 /*
4960 * The request needs to be (re)sent. Kick the requester to resend it.
4961 * (unless it's already marked as needing a resend)
4962 */
4963 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) {
4964 lck_mtx_unlock(&req->r_mtx);
4965 continue;
4966 }
4967 NFS_SOCK_DBG("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
4968 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt);
4969 req->r_flags |= R_MUSTRESEND;
4970 req->r_rtt = -1;
4971 wakeup(req);
4972 if ((req->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
4973 nfs_asyncio_resend(req);
4974 lck_mtx_unlock(&req->r_mtx);
4975 }
4976
4977 lck_mtx_unlock(nfs_request_mutex);
4978
4979 /* poke any sockets */
4980 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) {
4981 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
4982 nfs_sock_poke(nmp);
4983 nfs_mount_rele(nmp);
4984 }
4985
4986 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY);
4987 }
4988
4989 /*
4990 * check a thread's proc for the "noremotehang" flag.
4991 */
4992 int
4993 nfs_noremotehang(thread_t thd)
4994 {
4995 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
4996 return (p && proc_noremotehang(p));
4997 }
4998
4999 /*
5000 * Test for a termination condition pending on the process.
5001 * This is used to determine if we need to bail on a mount.
5002 * ETIMEDOUT is returned if there has been a soft timeout.
5003 * EINTR is returned if there is a signal pending that is not being ignored
5004 * and the mount is interruptable, or if we are a thread that is in the process
5005 * of cancellation (also SIGKILL posted).
5006 */
5007 extern int sigprop[NSIG+1];
5008 int
5009 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked)
5010 {
5011 proc_t p;
5012 int error = 0;
5013
5014 if (!nmp)
5015 return (ENXIO);
5016
5017 if (req && (req->r_flags & R_SOFTTERM))
5018 return (ETIMEDOUT); /* request has been terminated. */
5019 if (req && (req->r_flags & R_NOINTR))
5020 thd = NULL; /* don't check for signal on R_NOINTR */
5021
5022 if (!nmplocked)
5023 lck_mtx_lock(&nmp->nm_lock);
5024 if (nmp->nm_state & NFSSTA_FORCE) {
5025 /* If a force unmount is in progress then fail. */
5026 error = EIO;
5027 } else if (vfs_isforce(nmp->nm_mountp)) {
5028 /* Someone is unmounting us, go soft and mark it. */
5029 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT);
5030 nmp->nm_state |= NFSSTA_FORCE;
5031 }
5032
5033 /* Check if the mount is marked dead. */
5034 if (!error && (nmp->nm_state & NFSSTA_DEAD))
5035 error = ENXIO;
5036
5037 /*
5038 * If the mount is hung and we've requested not to hang
5039 * on remote filesystems, then bail now.
5040 */
5041 if (current_proc() != kernproc &&
5042 !error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd))
5043 error = EIO;
5044
5045 if (!nmplocked)
5046 lck_mtx_unlock(&nmp->nm_lock);
5047 if (error)
5048 return (error);
5049
5050 /* may not have a thread for async I/O */
5051 if (thd == NULL || current_proc() == kernproc)
5052 return (0);
5053
5054 /*
5055 * Check if the process is aborted, but don't interrupt if we
5056 * were killed by a signal and this is the exiting thread which
5057 * is attempting to dump core.
5058 */
5059 if (((p = current_proc()) != kernproc) && current_thread_aborted() &&
5060 (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) ||
5061 (p->p_sigacts == NULL) ||
5062 (p->p_sigacts->ps_sig < 1) || (p->p_sigacts->ps_sig > NSIG) ||
5063 !(sigprop[p->p_sigacts->ps_sig] & SA_CORE)))
5064 return (EINTR);
5065
5066 /* mask off thread and process blocked signals. */
5067 if (NMFLAG(nmp, INTR) && ((p = get_bsdthreadtask_info(thd))) &&
5068 proc_pendingsignals(p, NFSINT_SIGMASK))
5069 return (EINTR);
5070 return (0);
5071 }
5072
5073 /*
5074 * Lock a socket against others.
5075 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
5076 * and also to avoid race conditions between the processes with nfs requests
5077 * in progress when a reconnect is necessary.
5078 */
5079 int
5080 nfs_sndlock(struct nfsreq *req)
5081 {
5082 struct nfsmount *nmp = req->r_nmp;
5083 int *statep;
5084 int error = 0, slpflag = 0;
5085 struct timespec ts = { 0, 0 };
5086
5087 if (nfs_mount_gone(nmp))
5088 return (ENXIO);
5089
5090 lck_mtx_lock(&nmp->nm_lock);
5091 statep = &nmp->nm_state;
5092
5093 if (NMFLAG(nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR))
5094 slpflag = PCATCH;
5095 while (*statep & NFSSTA_SNDLOCK) {
5096 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1)))
5097 break;
5098 *statep |= NFSSTA_WANTSND;
5099 if (nfs_noremotehang(req->r_thread))
5100 ts.tv_sec = 1;
5101 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts);
5102 if (slpflag == PCATCH) {
5103 slpflag = 0;
5104 ts.tv_sec = 2;
5105 }
5106 }
5107 if (!error)
5108 *statep |= NFSSTA_SNDLOCK;
5109 lck_mtx_unlock(&nmp->nm_lock);
5110 return (error);
5111 }
5112
5113 /*
5114 * Unlock the stream socket for others.
5115 */
5116 void
5117 nfs_sndunlock(struct nfsreq *req)
5118 {
5119 struct nfsmount *nmp = req->r_nmp;
5120 int *statep, wake = 0;
5121
5122 if (!nmp)
5123 return;
5124 lck_mtx_lock(&nmp->nm_lock);
5125 statep = &nmp->nm_state;
5126 if ((*statep & NFSSTA_SNDLOCK) == 0)
5127 panic("nfs sndunlock");
5128 *statep &= ~(NFSSTA_SNDLOCK|NFSSTA_SENDING);
5129 if (*statep & NFSSTA_WANTSND) {
5130 *statep &= ~NFSSTA_WANTSND;
5131 wake = 1;
5132 }
5133 lck_mtx_unlock(&nmp->nm_lock);
5134 if (wake)
5135 wakeup(statep);
5136 }
5137
5138 int
5139 nfs_aux_request(
5140 struct nfsmount *nmp,
5141 thread_t thd,
5142 struct sockaddr *saddr,
5143 socket_t so,
5144 int sotype,
5145 mbuf_t mreq,
5146 uint32_t xid,
5147 int bindresv,
5148 int timeo,
5149 struct nfsm_chain *nmrep)
5150 {
5151 int error = 0, on = 1, try, sendat = 2, soproto, recv, optlen, restoreto = 0;
5152 socket_t newso = NULL;
5153 struct sockaddr_storage ss;
5154 struct timeval orig_rcvto, orig_sndto, tv = { 1, 0 };
5155 mbuf_t m, mrep = NULL;
5156 struct msghdr msg;
5157 uint32_t rxid = 0, reply = 0, reply_status, rejected_status;
5158 uint32_t verf_type, verf_len, accepted_status;
5159 size_t readlen, sentlen;
5160 struct nfs_rpc_record_state nrrs;
5161
5162 if (!so) {
5163 /* create socket and set options */
5164 soproto = (sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP;
5165 if ((error = sock_socket(saddr->sa_family, sotype, soproto, NULL, NULL, &newso)))
5166 goto nfsmout;
5167
5168 if (bindresv) {
5169 int level = (saddr->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
5170 int optname = (saddr->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE;
5171 int portrange = IP_PORTRANGE_LOW;
5172 error = sock_setsockopt(newso, level, optname, &portrange, sizeof(portrange));
5173 nfsmout_if(error);
5174 ss.ss_len = saddr->sa_len;
5175 ss.ss_family = saddr->sa_family;
5176 if (ss.ss_family == AF_INET) {
5177 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY;
5178 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
5179 } else if (ss.ss_family == AF_INET6) {
5180 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any;
5181 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
5182 } else {
5183 error = EINVAL;
5184 }
5185 if (!error)
5186 error = sock_bind(newso, (struct sockaddr *)&ss);
5187 nfsmout_if(error);
5188 }
5189
5190 if (sotype == SOCK_STREAM) {
5191 # define NFS_AUX_CONNECTION_TIMEOUT 4 /* 4 second timeout for connections */
5192 int count = 0;
5193
5194 error = sock_connect(newso, saddr, MSG_DONTWAIT);
5195 if (error == EINPROGRESS)
5196 error = 0;
5197 nfsmout_if(error);
5198
5199 while ((error = sock_connectwait(newso, &tv)) == EINPROGRESS) {
5200 /* After NFS_AUX_CONNECTION_TIMEOUT bail */
5201 if (++count >= NFS_AUX_CONNECTION_TIMEOUT) {
5202 error = ETIMEDOUT;
5203 break;
5204 }
5205 }
5206 nfsmout_if(error);
5207 }
5208 if (((error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) ||
5209 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) ||
5210 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)))))
5211 goto nfsmout;
5212 so = newso;
5213 } else {
5214 /* make sure socket is using a one second timeout in this function */
5215 optlen = sizeof(orig_rcvto);
5216 error = sock_getsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, &optlen);
5217 if (!error) {
5218 optlen = sizeof(orig_sndto);
5219 error = sock_getsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, &optlen);
5220 }
5221 if (!error) {
5222 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
5223 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
5224 restoreto = 1;
5225 }
5226 }
5227
5228 if (sotype == SOCK_STREAM) {
5229 sendat = 0; /* we only resend the request for UDP */
5230 nfs_rpc_record_state_init(&nrrs);
5231 }
5232
5233 for (try=0; try < timeo; try++) {
5234 if ((error = nfs_sigintr(nmp, NULL, !try ? NULL : thd, 0)))
5235 break;
5236 if (!try || (try == sendat)) {
5237 /* send the request (resending periodically for UDP) */
5238 if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m)))
5239 goto nfsmout;
5240 bzero(&msg, sizeof(msg));
5241 if ((sotype == SOCK_DGRAM) && !sock_isconnected(so)) {
5242 msg.msg_name = saddr;
5243 msg.msg_namelen = saddr->sa_len;
5244 }
5245 if ((error = sock_sendmbuf(so, &msg, m, 0, &sentlen)))
5246 goto nfsmout;
5247 sendat *= 2;
5248 if (sendat > 30)
5249 sendat = 30;
5250 }
5251 /* wait for the response */
5252 if (sotype == SOCK_STREAM) {
5253 /* try to read (more of) record */
5254 error = nfs_rpc_record_read(so, &nrrs, 0, &recv, &mrep);
5255 /* if we don't have the whole record yet, we'll keep trying */
5256 } else {
5257 readlen = 1<<18;
5258 bzero(&msg, sizeof(msg));
5259 error = sock_receivembuf(so, &msg, &mrep, 0, &readlen);
5260 }
5261 if (error == EWOULDBLOCK)
5262 continue;
5263 nfsmout_if(error);
5264 /* parse the response */
5265 nfsm_chain_dissect_init(error, nmrep, mrep);
5266 nfsm_chain_get_32(error, nmrep, rxid);
5267 nfsm_chain_get_32(error, nmrep, reply);
5268 nfsmout_if(error);
5269 if ((rxid != xid) || (reply != RPC_REPLY))
5270 error = EBADRPC;
5271 nfsm_chain_get_32(error, nmrep, reply_status);
5272 nfsmout_if(error);
5273 if (reply_status == RPC_MSGDENIED) {
5274 nfsm_chain_get_32(error, nmrep, rejected_status);
5275 nfsmout_if(error);
5276 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES;
5277 goto nfsmout;
5278 }
5279 nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */
5280 nfsm_chain_get_32(error, nmrep, verf_len); /* verifier length */
5281 nfsmout_if(error);
5282 if (verf_len)
5283 nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len));
5284 nfsm_chain_get_32(error, nmrep, accepted_status);
5285 nfsmout_if(error);
5286 switch (accepted_status) {
5287 case RPC_SUCCESS:
5288 error = 0;
5289 break;
5290 case RPC_PROGUNAVAIL:
5291 error = EPROGUNAVAIL;
5292 break;
5293 case RPC_PROGMISMATCH:
5294 error = EPROGMISMATCH;
5295 break;
5296 case RPC_PROCUNAVAIL:
5297 error = EPROCUNAVAIL;
5298 break;
5299 case RPC_GARBAGE:
5300 error = EBADRPC;
5301 break;
5302 case RPC_SYSTEM_ERR:
5303 default:
5304 error = EIO;
5305 break;
5306 }
5307 break;
5308 }
5309 nfsmout:
5310 if (restoreto) {
5311 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, sizeof(tv));
5312 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, sizeof(tv));
5313 }
5314 if (newso) {
5315 sock_shutdown(newso, SHUT_RDWR);
5316 sock_close(newso);
5317 }
5318 mbuf_freem(mreq);
5319 return (error);
5320 }
5321
5322 int
5323 nfs_portmap_lookup(
5324 struct nfsmount *nmp,
5325 vfs_context_t ctx,
5326 struct sockaddr *sa,
5327 socket_t so,
5328 uint32_t protocol,
5329 uint32_t vers,
5330 uint32_t ipproto,
5331 int timeo)
5332 {
5333 thread_t thd = vfs_context_thread(ctx);
5334 kauth_cred_t cred = vfs_context_ucred(ctx);
5335 struct sockaddr_storage ss;
5336 struct sockaddr *saddr = (struct sockaddr*)&ss;
5337 struct nfsm_chain nmreq, nmrep;
5338 mbuf_t mreq;
5339 int error = 0, ip, pmprog, pmvers, pmproc, ualen = 0;
5340 uint32_t port;
5341 uint64_t xid = 0;
5342 char uaddr[MAX_IPv6_STR_LEN+16];
5343
5344 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5345 if (saddr->sa_family == AF_INET) {
5346 ip = 4;
5347 pmprog = PMAPPROG;
5348 pmvers = PMAPVERS;
5349 pmproc = PMAPPROC_GETPORT;
5350 } else if (saddr->sa_family == AF_INET6) {
5351 ip = 6;
5352 pmprog = RPCBPROG;
5353 pmvers = RPCBVERS4;
5354 pmproc = RPCBPROC_GETVERSADDR;
5355 } else {
5356 return (EINVAL);
5357 }
5358 nfsm_chain_null(&nmreq);
5359 nfsm_chain_null(&nmrep);
5360
5361 tryagain:
5362 /* send portmapper request to get port/uaddr */
5363 if (ip == 4)
5364 ((struct sockaddr_in*)saddr)->sin_port = htons(PMAPPORT);
5365 else
5366 ((struct sockaddr_in6*)saddr)->sin6_port = htons(PMAPPORT);
5367 nfsm_chain_build_alloc_init(error, &nmreq, 8*NFSX_UNSIGNED);
5368 nfsm_chain_add_32(error, &nmreq, protocol);
5369 nfsm_chain_add_32(error, &nmreq, vers);
5370 if (ip == 4) {
5371 nfsm_chain_add_32(error, &nmreq, ipproto);
5372 nfsm_chain_add_32(error, &nmreq, 0);
5373 } else {
5374 if (ipproto == IPPROTO_TCP)
5375 nfsm_chain_add_string(error, &nmreq, "tcp6", 4);
5376 else
5377 nfsm_chain_add_string(error, &nmreq, "udp6", 4);
5378 nfsm_chain_add_string(error, &nmreq, "", 0); /* uaddr */
5379 nfsm_chain_add_string(error, &nmreq, "", 0); /* owner */
5380 }
5381 nfsm_chain_build_done(error, &nmreq);
5382 nfsmout_if(error);
5383 error = nfsm_rpchead2(nmp, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
5384 pmprog, pmvers, pmproc, RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead,
5385 &xid, &mreq);
5386 nfsmout_if(error);
5387 nmreq.nmc_mhead = NULL;
5388 error = nfs_aux_request(nmp, thd, saddr, so, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
5389 mreq, R_XID32(xid), 0, timeo, &nmrep);
5390
5391 /* grab port from portmap response */
5392 if (ip == 4) {
5393 nfsm_chain_get_32(error, &nmrep, port);
5394 if (!error)
5395 ((struct sockaddr_in*)sa)->sin_port = htons(port);
5396 } else {
5397 /* get uaddr string and convert to sockaddr */
5398 nfsm_chain_get_32(error, &nmrep, ualen);
5399 if (!error) {
5400 if (ualen > ((int)sizeof(uaddr)-1))
5401 error = EIO;
5402 if (ualen < 1) {
5403 /* program is not available, just return a zero port */
5404 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5405 ((struct sockaddr_in6*)saddr)->sin6_port = htons(0);
5406 } else {
5407 nfsm_chain_get_opaque(error, &nmrep, ualen, uaddr);
5408 if (!error) {
5409 uaddr[ualen] = '\0';
5410 if (!nfs_uaddr2sockaddr(uaddr, saddr))
5411 error = EIO;
5412 }
5413 }
5414 }
5415 if ((error == EPROGMISMATCH) || (error == EPROCUNAVAIL) || (error == EIO) || (error == EBADRPC)) {
5416 /* remote doesn't support rpcbind version or proc (or we couldn't parse uaddr) */
5417 if (pmvers == RPCBVERS4) {
5418 /* fall back to v3 and GETADDR */
5419 pmvers = RPCBVERS3;
5420 pmproc = RPCBPROC_GETADDR;
5421 nfsm_chain_cleanup(&nmreq);
5422 nfsm_chain_cleanup(&nmrep);
5423 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5424 xid = 0;
5425 error = 0;
5426 goto tryagain;
5427 }
5428 }
5429 if (!error)
5430 bcopy(saddr, sa, min(saddr->sa_len, sa->sa_len));
5431 }
5432 nfsmout:
5433 nfsm_chain_cleanup(&nmreq);
5434 nfsm_chain_cleanup(&nmrep);
5435 return (error);
5436 }
5437
5438 int
5439 nfs_msg(thread_t thd,
5440 const char *server,
5441 const char *msg,
5442 int error)
5443 {
5444 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
5445 tpr_t tpr;
5446
5447 if (p)
5448 tpr = tprintf_open(p);
5449 else
5450 tpr = NULL;
5451 if (error)
5452 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error);
5453 else
5454 tprintf(tpr, "nfs server %s: %s\n", server, msg);
5455 tprintf_close(tpr);
5456 return (0);
5457 }
5458
5459 #define NFS_SQUISH_MOBILE_ONLY 0x0001 /* Squish mounts only on mobile machines */
5460 #define NFS_SQUISH_AUTOMOUNTED_ONLY 0x0002 /* Squish mounts only if the are automounted */
5461 #define NFS_SQUISH_SOFT 0x0004 /* Treat all soft mounts as though they were on a mobile machine */
5462 #define NFS_SQUISH_QUICK 0x0008 /* Try to squish mounts more quickly. */
5463 #define NFS_SQUISH_SHUTDOWN 0x1000 /* Squish all mounts on shutdown. Currently not implemented */
5464
5465 uint32_t nfs_squishy_flags = NFS_SQUISH_MOBILE_ONLY | NFS_SQUISH_AUTOMOUNTED_ONLY | NFS_SQUISH_QUICK;
5466 int32_t nfs_is_mobile;
5467
5468 #define NFS_SQUISHY_DEADTIMEOUT 8 /* Dead time out for squishy mounts */
5469 #define NFS_SQUISHY_QUICKTIMEOUT 4 /* Quicker dead time out when nfs_squish_flags NFS_SQUISH_QUICK bit is set*/
5470
5471 /*
5472 * Could this mount be squished?
5473 */
5474 int
5475 nfs_can_squish(struct nfsmount *nmp)
5476 {
5477 uint64_t flags = vfs_flags(nmp->nm_mountp);
5478 int softsquish = ((nfs_squishy_flags & NFS_SQUISH_SOFT) & NMFLAG(nmp, SOFT));
5479
5480 if (!softsquish && (nfs_squishy_flags & NFS_SQUISH_MOBILE_ONLY) && nfs_is_mobile == 0)
5481 return (0);
5482
5483 if ((nfs_squishy_flags & NFS_SQUISH_AUTOMOUNTED_ONLY) && (flags & MNT_AUTOMOUNTED) == 0)
5484 return (0);
5485
5486 return (1);
5487 }
5488
5489 /*
5490 * NFS mounts default to "rw,hard" - but frequently on mobile clients
5491 * the mount may become "not responding". It's desirable to be able
5492 * to unmount these dead mounts, but only if there is no risk of
5493 * losing data or crashing applications. A "squishy" NFS mount is one
5494 * that can be force unmounted with little risk of harm.
5495 *
5496 * nfs_is_squishy checks if a mount is in a squishy state. A mount is
5497 * in a squishy state iff it is allowed to be squishy and there are no
5498 * dirty pages and there are no mmapped files and there are no files
5499 * open for write. Mounts are allowed to be squishy is controlled by
5500 * the settings of the nfs_squishy_flags and its mobility state. These
5501 * flags can be set by sysctls.
5502 *
5503 * If nfs_is_squishy determines that we are in a squishy state we will
5504 * update the current dead timeout to at least NFS_SQUISHY_DEADTIMEOUT
5505 * (or NFS_SQUISHY_QUICKTIMEOUT if NFS_SQUISH_QUICK is set) (see
5506 * above) or 1/8th of the mount's nm_deadtimeout value, otherwise we just
5507 * update the current dead timeout with the mount's nm_deadtimeout
5508 * value set at mount time.
5509 *
5510 * Assumes that nm_lock is held.
5511 *
5512 * Note this routine is racey, but its effects on setting the
5513 * dead timeout only have effects when we're in trouble and are likely
5514 * to stay that way. Since by default its only for automounted
5515 * volumes on mobile machines; this is a reasonable trade off between
5516 * data integrity and user experience. It can be disabled or set via
5517 * nfs.conf file.
5518 */
5519
5520 int
5521 nfs_is_squishy(struct nfsmount *nmp)
5522 {
5523 mount_t mp = nmp->nm_mountp;
5524 int squishy = 0;
5525 int timeo = (nfs_squishy_flags & NFS_SQUISH_QUICK) ? NFS_SQUISHY_QUICKTIMEOUT : NFS_SQUISHY_DEADTIMEOUT;
5526
5527 NFS_SOCK_DBG("%s: nm_curdeadtimeout = %d, nfs_is_mobile = %d\n",
5528 vfs_statfs(mp)->f_mntfromname, nmp->nm_curdeadtimeout, nfs_is_mobile);
5529
5530 if (!nfs_can_squish(nmp))
5531 goto out;
5532
5533 timeo = (nmp->nm_deadtimeout > timeo) ? max(nmp->nm_deadtimeout/8, timeo) : timeo;
5534 NFS_SOCK_DBG("nm_writers = %d nm_mappers = %d timeo = %d\n", nmp->nm_writers, nmp->nm_mappers, timeo);
5535
5536 if (nmp->nm_writers == 0 && nmp->nm_mappers == 0) {
5537 uint64_t flags = mp ? vfs_flags(mp) : 0;
5538 squishy = 1;
5539
5540 /*
5541 * Walk the nfs nodes and check for dirty buffers it we're not
5542 * RDONLY and we've not already been declared as squishy since
5543 * this can be a bit expensive.
5544 */
5545 if (!(flags & MNT_RDONLY) && !(nmp->nm_state & NFSSTA_SQUISHY))
5546 squishy = !nfs_mount_is_dirty(mp);
5547 }
5548
5549 out:
5550 if (squishy)
5551 nmp->nm_state |= NFSSTA_SQUISHY;
5552 else
5553 nmp->nm_state &= ~NFSSTA_SQUISHY;
5554
5555 nmp->nm_curdeadtimeout = squishy ? timeo : nmp->nm_deadtimeout;
5556
5557 NFS_SOCK_DBG("nm_curdeadtimeout = %d\n", nmp->nm_curdeadtimeout);
5558
5559 return (squishy);
5560 }
5561
5562 /*
5563 * On a send operation, if we can't reach the server and we've got only one server to talk to
5564 * and NFS_SQUISH_QUICK flag is set and we are in a squishy state then mark the mount as dead
5565 * and ask to be forcibly unmounted. Return 1 if we're dead and 0 otherwise.
5566 */
5567 int
5568 nfs_is_dead(int error, struct nfsmount *nmp)
5569 {
5570 fsid_t fsid;
5571
5572 lck_mtx_lock(&nmp->nm_lock);
5573 if (nmp->nm_state & NFSSTA_DEAD) {
5574 lck_mtx_unlock(&nmp->nm_lock);
5575 return (1);
5576 }
5577
5578 if ((error != ENETUNREACH && error != EHOSTUNREACH && error != EADDRNOTAVAIL) ||
5579 !(nmp->nm_locations.nl_numlocs == 1 && nmp->nm_locations.nl_locations[0]->nl_servcount == 1)) {
5580 lck_mtx_unlock(&nmp->nm_lock);
5581 return (0);
5582 }
5583
5584 if ((nfs_squishy_flags & NFS_SQUISH_QUICK) && nfs_is_squishy(nmp)) {
5585 printf("nfs_is_dead: nfs server %s: unreachable. Squished dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
5586 fsid = vfs_statfs(nmp->nm_mountp)->f_fsid;
5587 lck_mtx_unlock(&nmp->nm_lock);
5588 nfs_mount_zombie(nmp, NFSSTA_DEAD);
5589 vfs_event_signal(&fsid, VQ_DEAD, 0);
5590 return (1);
5591 }
5592 lck_mtx_unlock(&nmp->nm_lock);
5593 return (0);
5594 }
5595
5596 /*
5597 * If we've experienced timeouts and we're not really a
5598 * classic hard mount, then just return cached data to
5599 * the caller instead of likely hanging on an RPC.
5600 */
5601 int
5602 nfs_use_cache(struct nfsmount *nmp)
5603 {
5604 /*
5605 *%%% We always let mobile users goto the cache,
5606 * perhaps we should not even require them to have
5607 * a timeout?
5608 */
5609 int cache_ok = (nfs_is_mobile || NMFLAG(nmp, SOFT) ||
5610 nfs_can_squish(nmp) || nmp->nm_deadtimeout);
5611
5612 int timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
5613
5614 /*
5615 * So if we have a timeout and we're not really a hard hard-mount,
5616 * return 1 to not get things out of the cache.
5617 */
5618
5619 return ((nmp->nm_state & timeoutmask) && cache_ok);
5620 }
5621
5622 /*
5623 * Log a message that nfs or lockd server is unresponsive. Check if we
5624 * can be squished and if we can, or that our dead timeout has
5625 * expired, and we're not holding state, set our mount as dead, remove
5626 * our mount state and ask to be unmounted. If we are holding state
5627 * we're being called from the nfs_request_timer and will soon detect
5628 * that we need to unmount.
5629 */
5630 void
5631 nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg, int holding_state)
5632 {
5633 int timeoutmask, wasunresponsive, unresponsive, softnobrowse;
5634 uint32_t do_vfs_signal = 0;
5635 struct timeval now;
5636
5637 if (nfs_mount_gone(nmp))
5638 return;
5639
5640 lck_mtx_lock(&nmp->nm_lock);
5641
5642 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
5643 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */
5644 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
5645 wasunresponsive = (nmp->nm_state & timeoutmask);
5646
5647 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
5648 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
5649
5650 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO))
5651 nmp->nm_state |= NFSSTA_TIMEO;
5652 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO))
5653 nmp->nm_state |= NFSSTA_LOCKTIMEO;
5654 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(nmp->nm_state & NFSSTA_JUKEBOXTIMEO))
5655 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO;
5656
5657 unresponsive = (nmp->nm_state & timeoutmask);
5658
5659 nfs_is_squishy(nmp);
5660
5661 if (unresponsive && (nmp->nm_curdeadtimeout > 0)) {
5662 microuptime(&now);
5663 if (!wasunresponsive) {
5664 nmp->nm_deadto_start = now.tv_sec;
5665 nfs_mount_sock_thread_wake(nmp);
5666 } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_curdeadtimeout && !holding_state) {
5667 if (!(nmp->nm_state & NFSSTA_DEAD))
5668 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname,
5669 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : "");
5670 do_vfs_signal = VQ_DEAD;
5671 }
5672 }
5673 lck_mtx_unlock(&nmp->nm_lock);
5674
5675 if (do_vfs_signal == VQ_DEAD && !(nmp->nm_state & NFSSTA_DEAD))
5676 nfs_mount_zombie(nmp, NFSSTA_DEAD);
5677 else if (softnobrowse || wasunresponsive || !unresponsive)
5678 do_vfs_signal = 0;
5679 else
5680 do_vfs_signal = VQ_NOTRESP;
5681 if (do_vfs_signal)
5682 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, do_vfs_signal, 0);
5683
5684 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
5685 }
5686
5687 void
5688 nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
5689 {
5690 int timeoutmask, wasunresponsive, unresponsive, softnobrowse;
5691 int do_vfs_signal;
5692
5693 if (nfs_mount_gone(nmp))
5694 return;
5695
5696 if (msg)
5697 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
5698
5699 lck_mtx_lock(&nmp->nm_lock);
5700
5701 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
5702 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */
5703 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
5704 wasunresponsive = (nmp->nm_state & timeoutmask);
5705
5706 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
5707 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
5708
5709 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO))
5710 nmp->nm_state &= ~NFSSTA_TIMEO;
5711 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO))
5712 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
5713 if ((flags & NFSSTA_JUKEBOXTIMEO) && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO))
5714 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO;
5715
5716 unresponsive = (nmp->nm_state & timeoutmask);
5717
5718 nmp->nm_deadto_start = 0;
5719 nmp->nm_curdeadtimeout = nmp->nm_deadtimeout;
5720 nmp->nm_state &= ~NFSSTA_SQUISHY;
5721 lck_mtx_unlock(&nmp->nm_lock);
5722
5723 if (softnobrowse)
5724 do_vfs_signal = 0;
5725 else
5726 do_vfs_signal = (wasunresponsive && !unresponsive);
5727 if (do_vfs_signal)
5728 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
5729 }
5730
5731
5732 #endif /* NFSCLIENT */
5733
5734 #if NFSSERVER
5735
5736 /*
5737 * Generate the rpc reply header
5738 * siz arg. is used to decide if adding a cluster is worthwhile
5739 */
5740 int
5741 nfsrv_rephead(
5742 struct nfsrv_descript *nd,
5743 __unused struct nfsrv_sock *slp,
5744 struct nfsm_chain *nmrepp,
5745 size_t siz)
5746 {
5747 mbuf_t mrep;
5748 u_int32_t *tl;
5749 struct nfsm_chain nmrep;
5750 int err, error;
5751
5752 err = nd->nd_repstat;
5753 if (err && (nd->nd_vers == NFS_VER2))
5754 siz = 0;
5755
5756 /*
5757 * If this is a big reply, use a cluster else
5758 * try and leave leading space for the lower level headers.
5759 */
5760 siz += RPC_REPLYSIZ;
5761 if (siz >= nfs_mbuf_minclsize) {
5762 error = mbuf_getpacket(MBUF_WAITOK, &mrep);
5763 } else {
5764 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
5765 }
5766 if (error) {
5767 /* unable to allocate packet */
5768 /* XXX should we keep statistics for these errors? */
5769 return (error);
5770 }
5771 if (siz < nfs_mbuf_minclsize) {
5772 /* leave space for lower level headers */
5773 tl = mbuf_data(mrep);
5774 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */
5775 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
5776 }
5777 nfsm_chain_init(&nmrep, mrep);
5778 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
5779 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
5780 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
5781 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
5782 if (err & NFSERR_AUTHERR) {
5783 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
5784 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
5785 } else {
5786 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
5787 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
5788 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
5789 }
5790 } else {
5791 /* reply status */
5792 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
5793 if (nd->nd_gss_context != NULL) {
5794 /* RPCSEC_GSS verifier */
5795 error = nfs_gss_svc_verf_put(nd, &nmrep);
5796 if (error) {
5797 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
5798 goto done;
5799 }
5800 } else {
5801 /* RPCAUTH_NULL verifier */
5802 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
5803 nfsm_chain_add_32(error, &nmrep, 0);
5804 }
5805 /* accepted status */
5806 switch (err) {
5807 case EPROGUNAVAIL:
5808 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
5809 break;
5810 case EPROGMISMATCH:
5811 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
5812 /* XXX hard coded versions? */
5813 nfsm_chain_add_32(error, &nmrep, NFS_VER2);
5814 nfsm_chain_add_32(error, &nmrep, NFS_VER3);
5815 break;
5816 case EPROCUNAVAIL:
5817 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
5818 break;
5819 case EBADRPC:
5820 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
5821 break;
5822 default:
5823 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
5824 if (nd->nd_gss_context != NULL)
5825 error = nfs_gss_svc_prepare_reply(nd, &nmrep);
5826 if (err != NFSERR_RETVOID)
5827 nfsm_chain_add_32(error, &nmrep,
5828 (err ? nfsrv_errmap(nd, err) : 0));
5829 break;
5830 }
5831 }
5832
5833 done:
5834 nfsm_chain_build_done(error, &nmrep);
5835 if (error) {
5836 /* error composing reply header */
5837 /* XXX should we keep statistics for these errors? */
5838 mbuf_freem(mrep);
5839 return (error);
5840 }
5841
5842 *nmrepp = nmrep;
5843 if ((err != 0) && (err != NFSERR_RETVOID))
5844 OSAddAtomic64(1, &nfsstats.srvrpc_errs);
5845 return (0);
5846 }
5847
5848 /*
5849 * The nfs server send routine.
5850 *
5851 * - return EINTR or ERESTART if interrupted by a signal
5852 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
5853 * - do any cleanup required by recoverable socket errors (???)
5854 */
5855 int
5856 nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
5857 {
5858 int error;
5859 socket_t so = slp->ns_so;
5860 struct sockaddr *sendnam;
5861 struct msghdr msg;
5862
5863 bzero(&msg, sizeof(msg));
5864 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
5865 if ((sendnam = mbuf_data(nam))) {
5866 msg.msg_name = (caddr_t)sendnam;
5867 msg.msg_namelen = sendnam->sa_len;
5868 }
5869 }
5870 error = sock_sendmbuf(so, &msg, top, 0, NULL);
5871 if (!error)
5872 return (0);
5873 log(LOG_INFO, "nfsd send error %d\n", error);
5874
5875 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM))
5876 error = EPIPE; /* zap TCP sockets if they time out on send */
5877
5878 /* Handle any recoverable (soft) socket errors here. (???) */
5879 if (error != EINTR && error != ERESTART && error != EIO &&
5880 error != EWOULDBLOCK && error != EPIPE)
5881 error = 0;
5882
5883 return (error);
5884 }
5885
5886 /*
5887 * Socket upcall routine for the nfsd sockets.
5888 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
5889 * Essentially do as much as possible non-blocking, else punt and it will
5890 * be called with MBUF_WAITOK from an nfsd.
5891 */
5892 void
5893 nfsrv_rcv(socket_t so, void *arg, int waitflag)
5894 {
5895 struct nfsrv_sock *slp = arg;
5896
5897 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID))
5898 return;
5899
5900 lck_rw_lock_exclusive(&slp->ns_rwlock);
5901 nfsrv_rcv_locked(so, slp, waitflag);
5902 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
5903 }
5904 void
5905 nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
5906 {
5907 mbuf_t m, mp, mhck, m2;
5908 int ns_flag=0, error;
5909 struct msghdr msg;
5910 size_t bytes_read;
5911
5912 if ((slp->ns_flag & SLP_VALID) == 0) {
5913 if (waitflag == MBUF_DONTWAIT)
5914 lck_rw_done(&slp->ns_rwlock);
5915 return;
5916 }
5917
5918 #ifdef notdef
5919 /*
5920 * Define this to test for nfsds handling this under heavy load.
5921 */
5922 if (waitflag == MBUF_DONTWAIT) {
5923 ns_flag = SLP_NEEDQ;
5924 goto dorecs;
5925 }
5926 #endif
5927 if (slp->ns_sotype == SOCK_STREAM) {
5928 /*
5929 * If there are already records on the queue, defer soreceive()
5930 * to an(other) nfsd so that there is feedback to the TCP layer that
5931 * the nfs servers are heavily loaded.
5932 */
5933 if (slp->ns_rec) {
5934 ns_flag = SLP_NEEDQ;
5935 goto dorecs;
5936 }
5937
5938 /*
5939 * Do soreceive().
5940 */
5941 bytes_read = 1000000000;
5942 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
5943 if (error || mp == NULL) {
5944 if (error == EWOULDBLOCK)
5945 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
5946 else
5947 ns_flag = SLP_DISCONN;
5948 goto dorecs;
5949 }
5950 m = mp;
5951 if (slp->ns_rawend) {
5952 if ((error = mbuf_setnext(slp->ns_rawend, m)))
5953 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
5954 slp->ns_cc += bytes_read;
5955 } else {
5956 slp->ns_raw = m;
5957 slp->ns_cc = bytes_read;
5958 }
5959 while ((m2 = mbuf_next(m)))
5960 m = m2;
5961 slp->ns_rawend = m;
5962
5963 /*
5964 * Now try and parse record(s) out of the raw stream data.
5965 */
5966 error = nfsrv_getstream(slp, waitflag);
5967 if (error) {
5968 if (error == EPERM)
5969 ns_flag = SLP_DISCONN;
5970 else
5971 ns_flag = SLP_NEEDQ;
5972 }
5973 } else {
5974 struct sockaddr_storage nam;
5975
5976 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
5977 /* already have max # RPC records queued on this socket */
5978 ns_flag = SLP_NEEDQ;
5979 goto dorecs;
5980 }
5981
5982 bzero(&msg, sizeof(msg));
5983 msg.msg_name = (caddr_t)&nam;
5984 msg.msg_namelen = sizeof(nam);
5985
5986 do {
5987 bytes_read = 1000000000;
5988 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
5989 if (mp) {
5990 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
5991 mbuf_setlen(mhck, nam.ss_len);
5992 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
5993 m = mhck;
5994 if (mbuf_setnext(m, mp)) {
5995 /* trouble... just drop it */
5996 printf("nfsrv_rcv: mbuf_setnext failed\n");
5997 mbuf_free(mhck);
5998 m = mp;
5999 }
6000 } else {
6001 m = mp;
6002 }
6003 if (slp->ns_recend)
6004 mbuf_setnextpkt(slp->ns_recend, m);
6005 else {
6006 slp->ns_rec = m;
6007 slp->ns_flag |= SLP_DOREC;
6008 }
6009 slp->ns_recend = m;
6010 mbuf_setnextpkt(m, NULL);
6011 slp->ns_reccnt++;
6012 }
6013 } while (mp);
6014 }
6015
6016 /*
6017 * Now try and process the request records, non-blocking.
6018 */
6019 dorecs:
6020 if (ns_flag)
6021 slp->ns_flag |= ns_flag;
6022 if (waitflag == MBUF_DONTWAIT) {
6023 int wake = (slp->ns_flag & SLP_WORKTODO);
6024 lck_rw_done(&slp->ns_rwlock);
6025 if (wake && nfsd_thread_count) {
6026 lck_mtx_lock(nfsd_mutex);
6027 nfsrv_wakenfsd(slp);
6028 lck_mtx_unlock(nfsd_mutex);
6029 }
6030 }
6031 }
6032
6033 /*
6034 * Try and extract an RPC request from the mbuf data list received on a
6035 * stream socket. The "waitflag" argument indicates whether or not it
6036 * can sleep.
6037 */
6038 int
6039 nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
6040 {
6041 mbuf_t m;
6042 char *cp1, *cp2, *mdata;
6043 int len, mlen, error;
6044 mbuf_t om, m2, recm;
6045 u_int32_t recmark;
6046
6047 if (slp->ns_flag & SLP_GETSTREAM)
6048 panic("nfs getstream");
6049 slp->ns_flag |= SLP_GETSTREAM;
6050 for (;;) {
6051 if (slp->ns_reclen == 0) {
6052 if (slp->ns_cc < NFSX_UNSIGNED) {
6053 slp->ns_flag &= ~SLP_GETSTREAM;
6054 return (0);
6055 }
6056 m = slp->ns_raw;
6057 mdata = mbuf_data(m);
6058 mlen = mbuf_len(m);
6059 if (mlen >= NFSX_UNSIGNED) {
6060 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
6061 mdata += NFSX_UNSIGNED;
6062 mlen -= NFSX_UNSIGNED;
6063 mbuf_setdata(m, mdata, mlen);
6064 } else {
6065 cp1 = (caddr_t)&recmark;
6066 cp2 = mdata;
6067 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
6068 while (mlen == 0) {
6069 m = mbuf_next(m);
6070 cp2 = mbuf_data(m);
6071 mlen = mbuf_len(m);
6072 }
6073 *cp1++ = *cp2++;
6074 mlen--;
6075 mbuf_setdata(m, cp2, mlen);
6076 }
6077 }
6078 slp->ns_cc -= NFSX_UNSIGNED;
6079 recmark = ntohl(recmark);
6080 slp->ns_reclen = recmark & ~0x80000000;
6081 if (recmark & 0x80000000)
6082 slp->ns_flag |= SLP_LASTFRAG;
6083 else
6084 slp->ns_flag &= ~SLP_LASTFRAG;
6085 if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) {
6086 slp->ns_flag &= ~SLP_GETSTREAM;
6087 return (EPERM);
6088 }
6089 }
6090
6091 /*
6092 * Now get the record part.
6093 *
6094 * Note that slp->ns_reclen may be 0. Linux sometimes
6095 * generates 0-length RPCs
6096 */
6097 recm = NULL;
6098 if (slp->ns_cc == slp->ns_reclen) {
6099 recm = slp->ns_raw;
6100 slp->ns_raw = slp->ns_rawend = NULL;
6101 slp->ns_cc = slp->ns_reclen = 0;
6102 } else if (slp->ns_cc > slp->ns_reclen) {
6103 len = 0;
6104 m = slp->ns_raw;
6105 mlen = mbuf_len(m);
6106 mdata = mbuf_data(m);
6107 om = NULL;
6108 while (len < slp->ns_reclen) {
6109 if ((len + mlen) > slp->ns_reclen) {
6110 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
6111 slp->ns_flag &= ~SLP_GETSTREAM;
6112 return (EWOULDBLOCK);
6113 }
6114 if (om) {
6115 if (mbuf_setnext(om, m2)) {
6116 /* trouble... just drop it */
6117 printf("nfsrv_getstream: mbuf_setnext failed\n");
6118 mbuf_freem(m2);
6119 slp->ns_flag &= ~SLP_GETSTREAM;
6120 return (EWOULDBLOCK);
6121 }
6122 recm = slp->ns_raw;
6123 } else {
6124 recm = m2;
6125 }
6126 mdata += slp->ns_reclen - len;
6127 mlen -= slp->ns_reclen - len;
6128 mbuf_setdata(m, mdata, mlen);
6129 len = slp->ns_reclen;
6130 } else if ((len + mlen) == slp->ns_reclen) {
6131 om = m;
6132 len += mlen;
6133 m = mbuf_next(m);
6134 recm = slp->ns_raw;
6135 if (mbuf_setnext(om, NULL)) {
6136 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
6137 slp->ns_flag &= ~SLP_GETSTREAM;
6138 return (EWOULDBLOCK);
6139 }
6140 mlen = mbuf_len(m);
6141 mdata = mbuf_data(m);
6142 } else {
6143 om = m;
6144 len += mlen;
6145 m = mbuf_next(m);
6146 mlen = mbuf_len(m);
6147 mdata = mbuf_data(m);
6148 }
6149 }
6150 slp->ns_raw = m;
6151 slp->ns_cc -= len;
6152 slp->ns_reclen = 0;
6153 } else {
6154 slp->ns_flag &= ~SLP_GETSTREAM;
6155 return (0);
6156 }
6157
6158 /*
6159 * Accumulate the fragments into a record.
6160 */
6161 if (slp->ns_frag == NULL) {
6162 slp->ns_frag = recm;
6163 } else {
6164 m = slp->ns_frag;
6165 while ((m2 = mbuf_next(m)))
6166 m = m2;
6167 if ((error = mbuf_setnext(m, recm)))
6168 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
6169 }
6170 if (slp->ns_flag & SLP_LASTFRAG) {
6171 if (slp->ns_recend)
6172 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
6173 else {
6174 slp->ns_rec = slp->ns_frag;
6175 slp->ns_flag |= SLP_DOREC;
6176 }
6177 slp->ns_recend = slp->ns_frag;
6178 slp->ns_frag = NULL;
6179 }
6180 }
6181 }
6182
6183 /*
6184 * Parse an RPC header.
6185 */
6186 int
6187 nfsrv_dorec(
6188 struct nfsrv_sock *slp,
6189 struct nfsd *nfsd,
6190 struct nfsrv_descript **ndp)
6191 {
6192 mbuf_t m;
6193 mbuf_t nam;
6194 struct nfsrv_descript *nd;
6195 int error = 0;
6196
6197 *ndp = NULL;
6198 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL))
6199 return (ENOBUFS);
6200 MALLOC_ZONE(nd, struct nfsrv_descript *,
6201 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
6202 if (!nd)
6203 return (ENOMEM);
6204 m = slp->ns_rec;
6205 slp->ns_rec = mbuf_nextpkt(m);
6206 if (slp->ns_rec)
6207 mbuf_setnextpkt(m, NULL);
6208 else {
6209 slp->ns_flag &= ~SLP_DOREC;
6210 slp->ns_recend = NULL;
6211 }
6212 slp->ns_reccnt--;
6213 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
6214 nam = m;
6215 m = mbuf_next(m);
6216 if ((error = mbuf_setnext(nam, NULL)))
6217 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
6218 } else
6219 nam = NULL;
6220 nd->nd_nam2 = nam;
6221 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
6222 if (!error)
6223 error = nfsrv_getreq(nd);
6224 if (error) {
6225 if (nam)
6226 mbuf_freem(nam);
6227 if (nd->nd_gss_context)
6228 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
6229 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
6230 return (error);
6231 }
6232 nd->nd_mrep = NULL;
6233 *ndp = nd;
6234 nfsd->nfsd_nd = nd;
6235 return (0);
6236 }
6237
6238 /*
6239 * Parse an RPC request
6240 * - verify it
6241 * - fill in the cred struct.
6242 */
6243 int
6244 nfsrv_getreq(struct nfsrv_descript *nd)
6245 {
6246 struct nfsm_chain *nmreq;
6247 int len, i;
6248 u_int32_t nfsvers, auth_type;
6249 int error = 0;
6250 uid_t user_id;
6251 gid_t group_id;
6252 int ngroups;
6253 uint32_t val;
6254
6255 nd->nd_cr = NULL;
6256 nd->nd_gss_context = NULL;
6257 nd->nd_gss_seqnum = 0;
6258 nd->nd_gss_mb = NULL;
6259
6260 user_id = group_id = -2;
6261 val = auth_type = len = 0;
6262
6263 nmreq = &nd->nd_nmreq;
6264 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
6265 nfsm_chain_get_32(error, nmreq, val); // RPC Call
6266 if (!error && (val != RPC_CALL))
6267 error = EBADRPC;
6268 nfsmout_if(error);
6269 nd->nd_repstat = 0;
6270 nfsm_chain_get_32(error, nmreq, val); // RPC Version
6271 nfsmout_if(error);
6272 if (val != RPC_VER2) {
6273 nd->nd_repstat = ERPCMISMATCH;
6274 nd->nd_procnum = NFSPROC_NOOP;
6275 return (0);
6276 }
6277 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number
6278 nfsmout_if(error);
6279 if (val != NFS_PROG) {
6280 nd->nd_repstat = EPROGUNAVAIL;
6281 nd->nd_procnum = NFSPROC_NOOP;
6282 return (0);
6283 }
6284 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
6285 nfsmout_if(error);
6286 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
6287 nd->nd_repstat = EPROGMISMATCH;
6288 nd->nd_procnum = NFSPROC_NOOP;
6289 return (0);
6290 }
6291 nd->nd_vers = nfsvers;
6292 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
6293 nfsmout_if(error);
6294 if ((nd->nd_procnum >= NFS_NPROCS) ||
6295 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
6296 nd->nd_repstat = EPROCUNAVAIL;
6297 nd->nd_procnum = NFSPROC_NOOP;
6298 return (0);
6299 }
6300 if (nfsvers != NFS_VER3)
6301 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
6302 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor
6303 nfsm_chain_get_32(error, nmreq, len); // Auth Length
6304 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ))
6305 error = EBADRPC;
6306 nfsmout_if(error);
6307
6308 /* Handle authentication */
6309 if (auth_type == RPCAUTH_SYS) {
6310 struct posix_cred temp_pcred;
6311 if (nd->nd_procnum == NFSPROC_NULL)
6312 return (0);
6313 nd->nd_sec = RPCAUTH_SYS;
6314 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp
6315 nfsm_chain_get_32(error, nmreq, len); // hostname length
6316 if (len < 0 || len > NFS_MAXNAMLEN)
6317 error = EBADRPC;
6318 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname
6319 nfsmout_if(error);
6320
6321 /* create a temporary credential using the bits from the wire */
6322 bzero(&temp_pcred, sizeof(temp_pcred));
6323 nfsm_chain_get_32(error, nmreq, user_id);
6324 nfsm_chain_get_32(error, nmreq, group_id);
6325 temp_pcred.cr_groups[0] = group_id;
6326 nfsm_chain_get_32(error, nmreq, len); // extra GID count
6327 if ((len < 0) || (len > RPCAUTH_UNIXGIDS))
6328 error = EBADRPC;
6329 nfsmout_if(error);
6330 for (i = 1; i <= len; i++)
6331 if (i < NGROUPS)
6332 nfsm_chain_get_32(error, nmreq, temp_pcred.cr_groups[i]);
6333 else
6334 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
6335 nfsmout_if(error);
6336 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
6337 if (ngroups > 1)
6338 nfsrv_group_sort(&temp_pcred.cr_groups[0], ngroups);
6339 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
6340 nfsm_chain_get_32(error, nmreq, len); // verifier length
6341 if (len < 0 || len > RPCAUTH_MAXSIZ)
6342 error = EBADRPC;
6343 if (len > 0)
6344 nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
6345
6346 /* request creation of a real credential */
6347 temp_pcred.cr_uid = user_id;
6348 temp_pcred.cr_ngroups = ngroups;
6349 nd->nd_cr = posix_cred_create(&temp_pcred);
6350 if (nd->nd_cr == NULL) {
6351 nd->nd_repstat = ENOMEM;
6352 nd->nd_procnum = NFSPROC_NOOP;
6353 return (0);
6354 }
6355 } else if (auth_type == RPCSEC_GSS) {
6356 error = nfs_gss_svc_cred_get(nd, nmreq);
6357 if (error) {
6358 if (error == EINVAL)
6359 goto nfsmout; // drop the request
6360 nd->nd_repstat = error;
6361 nd->nd_procnum = NFSPROC_NOOP;
6362 return (0);
6363 }
6364 } else {
6365 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE
6366 return (0);
6367 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
6368 nd->nd_procnum = NFSPROC_NOOP;
6369 return (0);
6370 }
6371 return (0);
6372 nfsmout:
6373 if (IS_VALID_CRED(nd->nd_cr))
6374 kauth_cred_unref(&nd->nd_cr);
6375 nfsm_chain_cleanup(nmreq);
6376 return (error);
6377 }
6378
6379 /*
6380 * Search for a sleeping nfsd and wake it up.
6381 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
6382 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
6383 * Note: Must be called with nfsd_mutex held.
6384 */
6385 void
6386 nfsrv_wakenfsd(struct nfsrv_sock *slp)
6387 {
6388 struct nfsd *nd;
6389
6390 if ((slp->ns_flag & SLP_VALID) == 0)
6391 return;
6392
6393 lck_rw_lock_exclusive(&slp->ns_rwlock);
6394 /* if there's work to do on this socket, make sure it's queued up */
6395 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
6396 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
6397 slp->ns_flag |= SLP_WAITQ;
6398 }
6399 lck_rw_done(&slp->ns_rwlock);
6400
6401 /* wake up a waiting nfsd, if possible */
6402 nd = TAILQ_FIRST(&nfsd_queue);
6403 if (!nd)
6404 return;
6405
6406 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
6407 nd->nfsd_flag &= ~NFSD_WAITING;
6408 wakeup(nd);
6409 }
6410
6411 #endif /* NFSSERVER */