]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
71b6e5c447a228f104cfaab97cf3950e7cf6a5ce
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66 */
67
68 /*
69 * Socket operations for use by nfs
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/proc.h>
75 #include <sys/signalvar.h>
76 #include <sys/kauth.h>
77 #include <sys/mount_internal.h>
78 #include <sys/kernel.h>
79 #include <sys/kpi_mbuf.h>
80 #include <sys/malloc.h>
81 #include <sys/vnode.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/syslog.h>
86 #include <sys/tprintf.h>
87 #include <libkern/OSAtomic.h>
88
89 #include <sys/time.h>
90 #include <kern/clock.h>
91 #include <kern/task.h>
92 #include <kern/thread.h>
93 #include <kern/thread_call.h>
94 #include <sys/user.h>
95 #include <sys/acct.h>
96
97 #include <netinet/in.h>
98 #include <netinet/tcp.h>
99
100 #include <nfs/rpcv2.h>
101 #include <nfs/krpc.h>
102 #include <nfs/nfsproto.h>
103 #include <nfs/nfs.h>
104 #include <nfs/xdr_subs.h>
105 #include <nfs/nfsm_subs.h>
106 #include <nfs/nfs_gss.h>
107 #include <nfs/nfsmount.h>
108 #include <nfs/nfsnode.h>
109
110 /* XXX */
111 boolean_t current_thread_aborted(void);
112 kern_return_t thread_terminate(thread_t);
113
114
115 #if NFSSERVER
116 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
117
118 int nfsrv_getstream(struct nfsrv_sock *,int);
119 int nfsrv_getreq(struct nfsrv_descript *);
120 extern int nfsv3_procid[NFS_NPROCS];
121 #endif /* NFSSERVER */
122
123 /*
124 * compare two sockaddr structures
125 */
126 int
127 nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2)
128 {
129 if (!sa1)
130 return (-1);
131 if (!sa2)
132 return (1);
133 if (sa1->sa_family != sa2->sa_family)
134 return ((sa1->sa_family < sa2->sa_family) ? -1 : 1);
135 if (sa1->sa_len != sa2->sa_len)
136 return ((sa1->sa_len < sa2->sa_len) ? -1 : 1);
137 if (sa1->sa_family == AF_INET)
138 return (bcmp(&((struct sockaddr_in*)sa1)->sin_addr,
139 &((struct sockaddr_in*)sa2)->sin_addr, sizeof(((struct sockaddr_in*)sa1)->sin_addr)));
140 if (sa1->sa_family == AF_INET6)
141 return (bcmp(&((struct sockaddr_in6*)sa1)->sin6_addr,
142 &((struct sockaddr_in6*)sa2)->sin6_addr, sizeof(((struct sockaddr_in6*)sa1)->sin6_addr)));
143 return (-1);
144 }
145
146 #if NFSCLIENT
147
148 int nfs_reconnect(struct nfsmount *);
149 int nfs_connect_setup(struct nfsmount *);
150 void nfs_mount_sock_thread(void *, wait_result_t);
151 void nfs_udp_rcv(socket_t, void*, int);
152 void nfs_tcp_rcv(socket_t, void*, int);
153 void nfs_sock_poke(struct nfsmount *);
154 void nfs_request_match_reply(struct nfsmount *, mbuf_t);
155 void nfs_reqdequeue(struct nfsreq *);
156 void nfs_reqbusy(struct nfsreq *);
157 struct nfsreq *nfs_reqnext(struct nfsreq *);
158 int nfs_wait_reply(struct nfsreq *);
159 void nfs_softterm(struct nfsreq *);
160
161 #ifdef NFS_SOCKET_DEBUGGING
162 #define NFS_SOCK_DBG(X) printf X
163 #else
164 #define NFS_SOCK_DBG(X)
165 #endif
166
167 /*
168 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
169 * Use the mean and mean deviation of rtt for the appropriate type of rpc
170 * for the frequent rpcs and a default for the others.
171 * The justification for doing "other" this way is that these rpcs
172 * happen so infrequently that timer est. would probably be stale.
173 * Also, since many of these rpcs are
174 * non-idempotent, a conservative timeout is desired.
175 * getattr, lookup - A+2D
176 * read, write - A+4D
177 * other - nm_timeo
178 */
179 #define NFS_RTO(n, t) \
180 ((t) == 0 ? (n)->nm_timeo : \
181 ((t) < 3 ? \
182 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
183 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
184 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
185 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
186
187 /*
188 * Defines which timer to use for the procnum.
189 * 0 - default
190 * 1 - getattr
191 * 2 - lookup
192 * 3 - read
193 * 4 - write
194 */
195 static int proct[NFS_NPROCS] = {
196 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
197 };
198
199 /*
200 * There is a congestion window for outstanding rpcs maintained per mount
201 * point. The cwnd size is adjusted in roughly the way that:
202 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
203 * SIGCOMM '88". ACM, August 1988.
204 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
205 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
206 * of rpcs is in progress.
207 * (The sent count and cwnd are scaled for integer arith.)
208 * Variants of "slow start" were tried and were found to be too much of a
209 * performance hit (ave. rtt 3 times larger),
210 * I suspect due to the large rtt that nfs rpcs have.
211 */
212 #define NFS_CWNDSCALE 256
213 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
214 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
215
216 /*
217 * Increment location index to next address/server/location.
218 */
219 void
220 nfs_location_next(struct nfs_fs_locations *nlp, struct nfs_location_index *nlip)
221 {
222 uint8_t loc = nlip->nli_loc;
223 uint8_t serv = nlip->nli_serv;
224 uint8_t addr = nlip->nli_addr;
225
226 /* move to next address */
227 addr++;
228 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) {
229 /* no more addresses on current server, go to first address of next server */
230 next_server:
231 addr = 0;
232 serv++;
233 if (serv >= nlp->nl_locations[loc]->nl_servcount) {
234 /* no more servers on current location, go to first server of next location */
235 serv = 0;
236 loc++;
237 if (loc >= nlp->nl_numlocs)
238 loc = 0; /* after last location, wrap back around to first location */
239 }
240 }
241 /*
242 * It's possible for this next server to not have any addresses.
243 * Check for that here and go to the next server.
244 * But bail out if we've managed to come back around to the original
245 * location that was passed in. (That would mean no servers had any
246 * addresses. And we don't want to spin here forever.)
247 */
248 if ((loc == nlip->nli_loc) && (serv == nlip->nli_serv) && (addr == nlip->nli_addr))
249 return;
250 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount)
251 goto next_server;
252
253 nlip->nli_loc = loc;
254 nlip->nli_serv = serv;
255 nlip->nli_addr = addr;
256 }
257
258 /*
259 * Compare two location indices.
260 */
261 int
262 nfs_location_index_cmp(struct nfs_location_index *nlip1, struct nfs_location_index *nlip2)
263 {
264 if (nlip1->nli_loc != nlip2->nli_loc)
265 return (nlip1->nli_loc - nlip2->nli_loc);
266 if (nlip1->nli_serv != nlip2->nli_serv)
267 return (nlip1->nli_serv - nlip2->nli_serv);
268 return (nlip1->nli_addr - nlip2->nli_addr);
269 }
270
271 /*
272 * Get the mntfromname (or path portion only) for a given location.
273 */
274 void
275 nfs_location_mntfromname(struct nfs_fs_locations *locs, struct nfs_location_index idx, char *s, int size, int pathonly)
276 {
277 struct nfs_fs_location *fsl = locs->nl_locations[idx.nli_loc];
278 char *p;
279 int cnt, i;
280
281 p = s;
282 if (!pathonly) {
283 cnt = snprintf(p, size, "%s:", fsl->nl_servers[idx.nli_serv]->ns_name);
284 p += cnt;
285 size -= cnt;
286 }
287 if (fsl->nl_path.np_compcount == 0) {
288 /* mounting root export on server */
289 if (size > 0) {
290 *p++ = '/';
291 *p++ = '\0';
292 }
293 return;
294 }
295 /* append each server path component */
296 for (i=0; (size > 0) && (i < (int)fsl->nl_path.np_compcount); i++) {
297 cnt = snprintf(p, size, "/%s", fsl->nl_path.np_components[i]);
298 p += cnt;
299 size -= cnt;
300 }
301 }
302
303 /*
304 * NFS client connect socket upcall.
305 * (Used only during socket connect/search.)
306 */
307 void
308 nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag)
309 {
310 struct nfs_socket *nso = arg;
311 size_t rcvlen;
312 mbuf_t m;
313 int error = 0, recv = 1;
314
315 if (nso->nso_flags & NSO_CONNECTING) {
316 NFS_SOCK_DBG(("nfs connect - socket %p upcall - connecting\n", nso));
317 wakeup(nso->nso_wake);
318 return;
319 }
320
321 lck_mtx_lock(&nso->nso_lock);
322 if ((nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) || !(nso->nso_flags & NSO_PINGING)) {
323 NFS_SOCK_DBG(("nfs connect - socket %p upcall - nevermind\n", nso));
324 lck_mtx_unlock(&nso->nso_lock);
325 return;
326 }
327 NFS_SOCK_DBG(("nfs connect - socket %p upcall\n", nso));
328 nso->nso_flags |= NSO_UPCALL;
329
330 /* loop while we make error-free progress */
331 while (!error && recv) {
332 /* make sure we're still interested in this socket */
333 if (nso->nso_flags & (NSO_DISCONNECTING|NSO_DEAD))
334 break;
335 lck_mtx_unlock(&nso->nso_lock);
336 m = NULL;
337 if (nso->nso_sotype == SOCK_STREAM) {
338 error = nfs_rpc_record_read(so, &nso->nso_rrs, MSG_DONTWAIT, &recv, &m);
339 } else {
340 rcvlen = 1000000;
341 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
342 recv = m ? 1 : 0;
343 }
344 lck_mtx_lock(&nso->nso_lock);
345 if (m) {
346 /* match response with request */
347 struct nfsm_chain nmrep;
348 uint32_t reply = 0, rxid = 0, verf_type, verf_len;
349 uint32_t reply_status, rejected_status, accepted_status;
350
351 nfsm_chain_dissect_init(error, &nmrep, m);
352 nfsm_chain_get_32(error, &nmrep, rxid);
353 nfsm_chain_get_32(error, &nmrep, reply);
354 if (!error && ((reply != RPC_REPLY) || (rxid != nso->nso_pingxid)))
355 error = EBADRPC;
356 nfsm_chain_get_32(error, &nmrep, reply_status);
357 if (!error && (reply_status == RPC_MSGDENIED)) {
358 nfsm_chain_get_32(error, &nmrep, rejected_status);
359 if (!error)
360 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES;
361 }
362 nfsm_chain_get_32(error, &nmrep, verf_type); /* verifier flavor */
363 nfsm_chain_get_32(error, &nmrep, verf_len); /* verifier length */
364 nfsmout_if(error);
365 if (verf_len)
366 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
367 nfsm_chain_get_32(error, &nmrep, accepted_status);
368 nfsmout_if(error);
369 if ((accepted_status == RPC_PROGMISMATCH) && !nso->nso_version) {
370 uint32_t minvers, maxvers;
371 nfsm_chain_get_32(error, &nmrep, minvers);
372 nfsm_chain_get_32(error, &nmrep, maxvers);
373 nfsmout_if(error);
374 if (nso->nso_protocol == PMAPPROG) {
375 if ((minvers > RPCBVERS4) || (maxvers < PMAPVERS))
376 error = EPROGMISMATCH;
377 else if ((nso->nso_saddr->sa_family == AF_INET) &&
378 (PMAPVERS >= minvers) && (PMAPVERS <= maxvers))
379 nso->nso_version = PMAPVERS;
380 else if (nso->nso_saddr->sa_family == AF_INET6) {
381 if ((RPCBVERS4 >= minvers) && (RPCBVERS4 <= maxvers))
382 nso->nso_version = RPCBVERS4;
383 else if ((RPCBVERS3 >= minvers) && (RPCBVERS3 <= maxvers))
384 nso->nso_version = RPCBVERS3;
385 }
386 } else if (nso->nso_protocol == NFS_PROG) {
387 if ((minvers > NFS_VER4) || (maxvers < NFS_VER2))
388 error = EPROGMISMATCH;
389 else if ((NFS_VER3 >= minvers) && (NFS_VER3 <= maxvers))
390 nso->nso_version = NFS_VER3;
391 else if ((NFS_VER2 >= minvers) && (NFS_VER2 <= maxvers))
392 nso->nso_version = NFS_VER2;
393 else if ((NFS_VER4 >= minvers) && (NFS_VER4 <= maxvers))
394 nso->nso_version = NFS_VER4;
395 }
396 if (!error && nso->nso_version)
397 accepted_status = RPC_SUCCESS;
398 }
399 if (!error) {
400 switch (accepted_status) {
401 case RPC_SUCCESS:
402 error = 0;
403 break;
404 case RPC_PROGUNAVAIL:
405 error = EPROGUNAVAIL;
406 break;
407 case RPC_PROGMISMATCH:
408 error = EPROGMISMATCH;
409 break;
410 case RPC_PROCUNAVAIL:
411 error = EPROCUNAVAIL;
412 break;
413 case RPC_GARBAGE:
414 error = EBADRPC;
415 break;
416 case RPC_SYSTEM_ERR:
417 default:
418 error = EIO;
419 break;
420 }
421 }
422 nfsmout:
423 nso->nso_flags &= ~NSO_PINGING;
424 if (error) {
425 nso->nso_error = error;
426 nso->nso_flags |= NSO_DEAD;
427 } else {
428 nso->nso_flags |= NSO_VERIFIED;
429 }
430 mbuf_freem(m);
431 /* wake up search thread */
432 wakeup(nso->nso_wake);
433 break;
434 }
435 }
436
437 nso->nso_flags &= ~NSO_UPCALL;
438 if ((error != EWOULDBLOCK) && (error || !recv)) {
439 /* problems with the socket... */
440 nso->nso_error = error ? error : EPIPE;
441 nso->nso_flags |= NSO_DEAD;
442 wakeup(nso->nso_wake);
443 }
444 if (nso->nso_flags & NSO_DISCONNECTING)
445 wakeup(&nso->nso_flags);
446 lck_mtx_unlock(&nso->nso_lock);
447 }
448
449 /*
450 * Create/initialize an nfs_socket structure.
451 */
452 int
453 nfs_socket_create(
454 __unused struct nfsmount *nmp,
455 struct sockaddr *sa,
456 int sotype,
457 in_port_t port,
458 uint32_t protocol,
459 uint32_t vers,
460 int resvport,
461 struct nfs_socket **nsop)
462 {
463 struct nfs_socket *nso;
464 struct timeval now;
465 int error;
466 #ifdef NFS_SOCKET_DEBUGGING
467 char naddr[MAX_IPv6_STR_LEN];
468 void *sinaddr;
469
470 if (sa->sa_family == AF_INET)
471 sinaddr = &((struct sockaddr_in*)sa)->sin_addr;
472 else
473 sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr;
474 if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr)
475 strlcpy(naddr, "<unknown>", sizeof(naddr));
476 #endif
477
478 *nsop = NULL;
479
480 /* Create the socket. */
481 MALLOC(nso, struct nfs_socket *, sizeof(struct nfs_socket), M_TEMP, M_WAITOK|M_ZERO);
482 if (nso)
483 MALLOC(nso->nso_saddr, struct sockaddr *, sa->sa_len, M_SONAME, M_WAITOK|M_ZERO);
484 if (!nso || !nso->nso_saddr) {
485 if (nso)
486 FREE(nso, M_TEMP);
487 return (ENOMEM);
488 }
489 lck_mtx_init(&nso->nso_lock, nfs_request_grp, LCK_ATTR_NULL);
490 nso->nso_sotype = sotype;
491 if (nso->nso_sotype == SOCK_STREAM)
492 nfs_rpc_record_state_init(&nso->nso_rrs);
493 microuptime(&now);
494 nso->nso_timestamp = now.tv_sec;
495 bcopy(sa, nso->nso_saddr, sa->sa_len);
496 if (sa->sa_family == AF_INET)
497 ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port);
498 else if (sa->sa_family == AF_INET6)
499 ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port);
500 nso->nso_protocol = protocol;
501 nso->nso_version = vers;
502
503 error = sock_socket(sa->sa_family, nso->nso_sotype, 0, NULL, NULL, &nso->nso_so);
504
505 /* Some servers require that the client port be a reserved port number. */
506 if (!error && resvport && ((sa->sa_family == AF_INET) || (sa->sa_family == AF_INET6))) {
507 struct sockaddr_storage ss;
508 int level = (sa->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
509 int optname = (sa->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE;
510 int portrange = IP_PORTRANGE_LOW;
511
512 error = sock_setsockopt(nso->nso_so, level, optname, &portrange, sizeof(portrange));
513 if (!error) { /* bind now to check for failure */
514 ss.ss_len = sa->sa_len;
515 ss.ss_family = sa->sa_family;
516 if (ss.ss_family == AF_INET) {
517 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY;
518 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
519 } else if (ss.ss_family == AF_INET6) {
520 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any;
521 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
522 } else {
523 error = EINVAL;
524 }
525 if (!error)
526 error = sock_bind(nso->nso_so, (struct sockaddr*)&ss);
527 }
528 }
529
530 if (error) {
531 NFS_SOCK_DBG(("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n",
532 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nso, naddr, sotype,
533 resvport ? "r" : "", port, protocol, vers));
534 nfs_socket_destroy(nso);
535 } else {
536 NFS_SOCK_DBG(("nfs connect %s created socket %p %s type %d%s port %d prot %d %d\n",
537 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, naddr,
538 sotype, resvport ? "r" : "", port, protocol, vers));
539 *nsop = nso;
540 }
541 return (error);
542 }
543
544 /*
545 * Destroy an nfs_socket structure.
546 */
547 void
548 nfs_socket_destroy(struct nfs_socket *nso)
549 {
550 struct timespec ts = { 4, 0 };
551
552 lck_mtx_lock(&nso->nso_lock);
553 nso->nso_flags |= NSO_DISCONNECTING;
554 if (nso->nso_flags & NSO_UPCALL) /* give upcall a chance to complete */
555 msleep(&nso->nso_flags, &nso->nso_lock, PZERO-1, "nfswaitupcall", &ts);
556 lck_mtx_unlock(&nso->nso_lock);
557 sock_shutdown(nso->nso_so, SHUT_RDWR);
558 sock_close(nso->nso_so);
559 if (nso->nso_sotype == SOCK_STREAM)
560 nfs_rpc_record_state_cleanup(&nso->nso_rrs);
561 lck_mtx_destroy(&nso->nso_lock, nfs_request_grp);
562 if (nso->nso_saddr)
563 FREE(nso->nso_saddr, M_SONAME);
564 if (nso->nso_saddr2)
565 FREE(nso->nso_saddr2, M_SONAME);
566 NFS_SOCK_DBG(("nfs connect - socket %p destroyed\n", nso));
567 FREE(nso, M_TEMP);
568 }
569
570 /*
571 * Set common socket options on an nfs_socket.
572 */
573 void
574 nfs_socket_options(struct nfsmount *nmp, struct nfs_socket *nso)
575 {
576 /*
577 * Set socket send/receive timeouts
578 * - Receive timeout shouldn't matter because most receives are performed
579 * in the socket upcall non-blocking.
580 * - Send timeout should allow us to react to a blocked socket.
581 * Soft mounts will want to abort sooner.
582 */
583 struct timeval timeo;
584 int on = 1, proto;
585
586 timeo.tv_usec = 0;
587 timeo.tv_sec = NMFLAG(nmp, SOFT) ? 5 : 60;
588 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
589 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
590 if (nso->nso_sotype == SOCK_STREAM) {
591 /* Assume that SOCK_STREAM always requires a connection */
592 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
593 /* set nodelay for TCP */
594 sock_gettype(nso->nso_so, NULL, NULL, &proto);
595 if (proto == IPPROTO_TCP)
596 sock_setsockopt(nso->nso_so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
597 }
598 if (nso->nso_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
599 int reserve = NFS_UDPSOCKBUF;
600 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
601 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
602 }
603 /* set SO_NOADDRERR to detect network changes ASAP */
604 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
605 /* just playin' it safe with upcalls */
606 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
607 /* socket should be interruptible if the mount is */
608 if (!NMFLAG(nmp, INTR))
609 sock_nointerrupt(nso->nso_so, 1);
610 }
611
612 /*
613 * Release resources held in an nfs_socket_search.
614 */
615 void
616 nfs_socket_search_cleanup(struct nfs_socket_search *nss)
617 {
618 struct nfs_socket *nso, *nsonext;
619
620 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) {
621 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
622 nss->nss_sockcnt--;
623 nfs_socket_destroy(nso);
624 }
625 if (nss->nss_sock) {
626 nfs_socket_destroy(nss->nss_sock);
627 nss->nss_sock = NULL;
628 }
629 }
630
631 /*
632 * Prefer returning certain errors over others.
633 * This function returns a ranking of the given error.
634 */
635 int
636 nfs_connect_error_class(int error)
637 {
638 switch (error) {
639 case 0:
640 return (0);
641 case ETIMEDOUT:
642 case EAGAIN:
643 return (1);
644 case EPIPE:
645 case EADDRNOTAVAIL:
646 case ENETDOWN:
647 case ENETUNREACH:
648 case ENETRESET:
649 case ECONNABORTED:
650 case ECONNRESET:
651 case EISCONN:
652 case ENOTCONN:
653 case ESHUTDOWN:
654 case ECONNREFUSED:
655 case EHOSTDOWN:
656 case EHOSTUNREACH:
657 return (2);
658 case ERPCMISMATCH:
659 case EPROCUNAVAIL:
660 case EPROGMISMATCH:
661 case EPROGUNAVAIL:
662 return (3);
663 case EBADRPC:
664 return (4);
665 default:
666 return (5);
667 }
668 }
669
670 /*
671 * Make sure a socket search returns the best error.
672 */
673 void
674 nfs_socket_search_update_error(struct nfs_socket_search *nss, int error)
675 {
676 if (nfs_connect_error_class(error) >= nfs_connect_error_class(nss->nss_error))
677 nss->nss_error = error;
678 }
679
680 /*
681 * Continue the socket search until we have something to report.
682 */
683 int
684 nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss)
685 {
686 struct nfs_socket *nso, *nsonext;
687 struct timeval now;
688 struct nfs_fs_location *fsl;
689 struct nfs_fs_server *fss;
690 struct sockaddr_storage ss;
691 char *addrstr;
692 int error, nomore = 0;
693
694 loop:
695 microuptime(&now);
696 NFS_SOCK_DBG(("nfs connect %s search %ld\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, now.tv_sec));
697
698 /* Time to start another socket? */
699 while ((nss->nss_last < 0) || (nss->nss_sockcnt == 0) ||
700 ((nss->nss_sockcnt < 4) && (now.tv_sec >= (nss->nss_last + 2)))) {
701 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
702 return (EINTR);
703 /* Find the next address to try... */
704 /* Have we run out of locations? */
705 if (!nomore && (nss->nss_last != -1) && !nfs_location_index_cmp(&nss->nss_nextloc, &nss->nss_startloc))
706 nomore = 1;
707 if (nomore) {
708 if (nss->nss_last < 0)
709 nss->nss_last = now.tv_sec;
710 break;
711 }
712 /* Can we convert the address to a sockaddr? */
713 fsl = nmp->nm_locations.nl_locations[nss->nss_nextloc.nli_loc];
714 fss = fsl->nl_servers[nss->nss_nextloc.nli_serv];
715 addrstr = fss->ns_addresses[nss->nss_nextloc.nli_addr];
716 if (!nfs_uaddr2sockaddr(addrstr, (struct sockaddr*)&ss)) {
717 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
718 nss->nss_last = -2;
719 continue;
720 }
721 /* Check that socket family is acceptable. */
722 if (nmp->nm_sofamily && (ss.ss_family != nmp->nm_sofamily)) {
723 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
724 nss->nss_last = -2;
725 continue;
726 }
727
728 /* Create the socket. */
729 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nss->nss_sotype,
730 nss->nss_port, nss->nss_protocol, nss->nss_version,
731 ((nss->nss_protocol == NFS_PROG) && NMFLAG(nmp, RESVPORT)), &nso);
732 if (error)
733 return (error);
734
735 nso->nso_location = nss->nss_nextloc;
736 nso->nso_wake = nss;
737 error = sock_setupcall(nso->nso_so, nfs_connect_upcall, nso);
738 if (error) {
739 lck_mtx_lock(&nso->nso_lock);
740 nso->nso_error = error;
741 nso->nso_flags |= NSO_DEAD;
742 lck_mtx_unlock(&nso->nso_lock);
743 }
744
745 TAILQ_INSERT_TAIL(&nss->nss_socklist, nso, nso_link);
746 nss->nss_sockcnt++;
747 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
748
749 nss->nss_last = now.tv_sec;
750 }
751
752 /* check each active socket and try to push it along */
753 TAILQ_FOREACH(nso, &nss->nss_socklist, nso_link) {
754 lck_mtx_lock(&nso->nso_lock);
755 if (!(nso->nso_flags & NSO_CONNECTED)) {
756 if ((nso->nso_sotype != SOCK_STREAM) && NMFLAG(nmp, NOCONNECT)) {
757 /* no connection needed, just say it's already connected */
758 nso->nso_flags |= NSO_CONNECTED;
759 NFS_SOCK_DBG(("nfs connect %s UDP socket %p noconnect\n",
760 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
761 } else if (!(nso->nso_flags & NSO_CONNECTING)) {
762 /* initiate the connection */
763 nso->nso_flags |= NSO_CONNECTING;
764 lck_mtx_unlock(&nso->nso_lock);
765 NFS_SOCK_DBG(("nfs connect %s connecting socket %p\n",
766 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
767 error = sock_connect(nso->nso_so, nso->nso_saddr, MSG_DONTWAIT);
768 lck_mtx_lock(&nso->nso_lock);
769 if (error && (error != EINPROGRESS)) {
770 nso->nso_error = error;
771 nso->nso_flags |= NSO_DEAD;
772 lck_mtx_unlock(&nso->nso_lock);
773 continue;
774 }
775 }
776 if (nso->nso_flags & NSO_CONNECTING) {
777 /* check the connection */
778 if (sock_isconnected(nso->nso_so)) {
779 NFS_SOCK_DBG(("nfs connect %s socket %p is connected\n",
780 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
781 nso->nso_flags &= ~NSO_CONNECTING;
782 nso->nso_flags |= NSO_CONNECTED;
783 } else {
784 int optlen = sizeof(error);
785 error = 0;
786 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &error, &optlen);
787 if (error) { /* we got an error on the socket */
788 NFS_SOCK_DBG(("nfs connect %s socket %p connection error %d\n",
789 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
790 if (nss->nss_flags & NSS_VERBOSE)
791 log(LOG_INFO, "nfs_connect: socket error %d for %s\n",
792 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
793 nso->nso_error = error;
794 nso->nso_flags |= NSO_DEAD;
795 lck_mtx_unlock(&nso->nso_lock);
796 continue;
797 }
798 }
799 }
800 if (nso->nso_flags & NSO_CONNECTED)
801 nfs_socket_options(nmp, nso);
802 }
803 if (!(nso->nso_flags & NSO_CONNECTED)) {
804 lck_mtx_unlock(&nso->nso_lock);
805 continue;
806 }
807 if (!(nso->nso_flags & (NSO_PINGING|NSO_VERIFIED)) ||
808 ((nso->nso_sotype == SOCK_DGRAM) && (now.tv_sec >= nso->nso_reqtimestamp+2))) {
809 /* initiate a NULL RPC request */
810 uint64_t xid = nso->nso_pingxid;
811 mbuf_t m, mreq = NULL;
812 struct msghdr msg;
813 size_t reqlen, sentlen;
814 uint32_t vers;
815
816 if (!(vers = nso->nso_version)) {
817 if (nso->nso_protocol == PMAPPROG)
818 vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
819 else if (nso->nso_protocol == NFS_PROG)
820 vers = NFS_VER3;
821 }
822 lck_mtx_unlock(&nso->nso_lock);
823 error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS,
824 vfs_context_ucred(vfs_context_kernel()), NULL, NULL, &xid, &mreq);
825 lck_mtx_lock(&nso->nso_lock);
826 if (!error) {
827 nso->nso_flags |= NSO_PINGING;
828 nso->nso_pingxid = R_XID32(xid);
829 nso->nso_reqtimestamp = now.tv_sec;
830 bzero(&msg, sizeof(msg));
831 if ((nso->nso_sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so)) {
832 msg.msg_name = nso->nso_saddr;
833 msg.msg_namelen = nso->nso_saddr->sa_len;
834 }
835 for (reqlen=0, m=mreq; m; m = mbuf_next(m))
836 reqlen += mbuf_len(m);
837 lck_mtx_unlock(&nso->nso_lock);
838 error = sock_sendmbuf(nso->nso_so, &msg, mreq, 0, &sentlen);
839 NFS_SOCK_DBG(("nfs connect %s verifying socket %p send rv %d\n",
840 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
841 lck_mtx_lock(&nso->nso_lock);
842 if (!error && (sentlen != reqlen))
843 error = ETIMEDOUT;
844 }
845 if (error) {
846 nso->nso_error = error;
847 nso->nso_flags |= NSO_DEAD;
848 lck_mtx_unlock(&nso->nso_lock);
849 continue;
850 }
851 }
852 if (nso->nso_flags & NSO_VERIFIED) {
853 /* WOOHOO!! This socket looks good! */
854 NFS_SOCK_DBG(("nfs connect %s socket %p verified\n",
855 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
856 if (!nso->nso_version) {
857 /* If the version isn't set, the default must have worked. */
858 if (nso->nso_protocol == PMAPPROG)
859 nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
860 if (nso->nso_protocol == NFS_PROG)
861 nso->nso_version = NFS_VER3;
862 }
863 lck_mtx_unlock(&nso->nso_lock);
864 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
865 nss->nss_sockcnt--;
866 nss->nss_sock = nso;
867 break;
868 }
869 lck_mtx_unlock(&nso->nso_lock);
870 }
871
872 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) {
873 lck_mtx_lock(&nso->nso_lock);
874 if (now.tv_sec >= (nso->nso_timestamp + nss->nss_timeo)) {
875 /* took too long */
876 NFS_SOCK_DBG(("nfs connect %s socket %p timed out\n",
877 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
878 nso->nso_error = ETIMEDOUT;
879 nso->nso_flags |= NSO_DEAD;
880 }
881 if (!(nso->nso_flags & NSO_DEAD)) {
882 lck_mtx_unlock(&nso->nso_lock);
883 continue;
884 }
885 lck_mtx_unlock(&nso->nso_lock);
886 NFS_SOCK_DBG(("nfs connect %s reaping socket %p %d\n",
887 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, nso->nso_error));
888 nfs_socket_search_update_error(nss, nso->nso_error);
889 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
890 nss->nss_sockcnt--;
891 nfs_socket_destroy(nso);
892 if (!nomore)
893 nss->nss_last = -2;
894 }
895
896 /*
897 * Keep looping if we haven't found a socket yet and we have more
898 * sockets to (continue to) try.
899 */
900 error = 0;
901 if (!nss->nss_sock && (!TAILQ_EMPTY(&nss->nss_socklist) || !nomore)) {
902 /* log a warning if connect is taking a while */
903 if (((now.tv_sec - nss->nss_timestamp) >= 30) && ((nss->nss_flags & (NSS_VERBOSE|NSS_WARNED)) == NSS_VERBOSE)) {
904 log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n",
905 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
906 nss->nss_flags |= NSS_WARNED;
907 }
908 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
909 return (EINTR);
910 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0)))
911 return (error);
912 if (nss->nss_last >= 0)
913 tsleep(nss, PSOCK, "nfs_connect_search_wait", hz);
914 goto loop;
915 }
916
917 NFS_SOCK_DBG(("nfs connect %s returning %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, error));
918 return (error);
919 }
920
921 /*
922 * Initialize a new NFS connection.
923 *
924 * Search for a location to connect a socket to and initialize the connection.
925 *
926 * An NFS mount may have multiple locations/servers/addresses available.
927 * We attempt to connect to each one asynchronously and will start
928 * several sockets in parallel if other locations are slow to answer.
929 * We'll use the first NFS socket we can successfully set up.
930 *
931 * The search may involve contacting the portmapper service first.
932 *
933 * A mount's initial connection may require negotiating some parameters such
934 * as socket type and NFS version.
935 */
936 int
937 nfs_connect(struct nfsmount *nmp, int verbose, int timeo)
938 {
939 struct nfs_socket_search nss;
940 struct nfs_socket *nso, *nsonfs;
941 struct sockaddr_storage ss;
942 struct sockaddr *saddr, *oldsaddr;
943 sock_upcall upcall;
944 struct timeval now, start;
945 int error, savederror, nfsvers;
946 uint8_t sotype = nmp->nm_sotype ? nmp->nm_sotype : SOCK_STREAM;
947 fhandle_t *fh = NULL;
948 char *path = NULL;
949 in_port_t port;
950
951 /* paranoia... check that we have at least one address in the locations */
952 uint32_t loc, serv;
953 for (loc=0; loc < nmp->nm_locations.nl_numlocs; loc++) {
954 for (serv=0; serv < nmp->nm_locations.nl_locations[loc]->nl_servcount; serv++) {
955 if (nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount)
956 break;
957 NFS_SOCK_DBG(("nfs connect %s search, server %s has no addresses\n",
958 vfs_statfs(nmp->nm_mountp)->f_mntfromname,
959 nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name));
960 }
961 if (serv < nmp->nm_locations.nl_locations[loc]->nl_servcount)
962 break;
963 }
964 if (loc >= nmp->nm_locations.nl_numlocs) {
965 NFS_SOCK_DBG(("nfs connect %s search failed, no addresses\n",
966 vfs_statfs(nmp->nm_mountp)->f_mntfromname));
967 return (EINVAL);
968 }
969
970 lck_mtx_lock(&nmp->nm_lock);
971 nmp->nm_sockflags |= NMSOCK_CONNECTING;
972 nmp->nm_nss = &nss;
973 lck_mtx_unlock(&nmp->nm_lock);
974 microuptime(&start);
975 savederror = error = 0;
976
977 tryagain:
978 /* initialize socket search state */
979 bzero(&nss, sizeof(nss));
980 nss.nss_error = savederror;
981 TAILQ_INIT(&nss.nss_socklist);
982 nss.nss_sotype = sotype;
983 nss.nss_startloc = nmp->nm_locations.nl_current;
984 nss.nss_timestamp = start.tv_sec;
985 nss.nss_timeo = timeo;
986 if (verbose)
987 nss.nss_flags |= NSS_VERBOSE;
988
989 /* First time connecting, we may need to negotiate some things */
990 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
991 if (!nmp->nm_vers) {
992 /* No NFS version specified... */
993 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
994 /* ...connect to portmapper first if we (may) need any ports. */
995 nss.nss_port = PMAPPORT;
996 nss.nss_protocol = PMAPPROG;
997 nss.nss_version = 0;
998 } else {
999 /* ...connect to NFS port first. */
1000 nss.nss_port = nmp->nm_nfsport;
1001 nss.nss_protocol = NFS_PROG;
1002 nss.nss_version = 0;
1003 }
1004 } else if (nmp->nm_vers >= NFS_VER4) {
1005 /* For NFSv4, we use the given (or default) port. */
1006 nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT;
1007 nss.nss_protocol = NFS_PROG;
1008 nss.nss_version = 4;
1009 } else {
1010 /* For NFSv3/v2... */
1011 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
1012 /* ...connect to portmapper first if we need any ports. */
1013 nss.nss_port = PMAPPORT;
1014 nss.nss_protocol = PMAPPROG;
1015 nss.nss_version = 0;
1016 } else {
1017 /* ...connect to NFS port first. */
1018 nss.nss_port = nmp->nm_nfsport;
1019 nss.nss_protocol = NFS_PROG;
1020 nss.nss_version = nmp->nm_vers;
1021 }
1022 }
1023 NFS_SOCK_DBG(("nfs connect first %s, so type %d port %d prot %d %d\n",
1024 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port,
1025 nss.nss_protocol, nss.nss_version));
1026 } else {
1027 /* we've connected before, just connect to NFS port */
1028 if (!nmp->nm_nfsport) {
1029 /* need to ask portmapper which port that would be */
1030 nss.nss_port = PMAPPORT;
1031 nss.nss_protocol = PMAPPROG;
1032 nss.nss_version = 0;
1033 } else {
1034 nss.nss_port = nmp->nm_nfsport;
1035 nss.nss_protocol = NFS_PROG;
1036 nss.nss_version = nmp->nm_vers;
1037 }
1038 NFS_SOCK_DBG(("nfs connect %s, so type %d port %d prot %d %d\n",
1039 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port,
1040 nss.nss_protocol, nss.nss_version));
1041 }
1042
1043 /* Set next location to first valid location. */
1044 /* If start location is invalid, find next location. */
1045 nss.nss_nextloc = nss.nss_startloc;
1046 if ((nss.nss_nextloc.nli_serv >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servcount) ||
1047 (nss.nss_nextloc.nli_addr >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servers[nss.nss_nextloc.nli_serv]->ns_addrcount)) {
1048 nfs_location_next(&nmp->nm_locations, &nss.nss_nextloc);
1049 if (!nfs_location_index_cmp(&nss.nss_nextloc, &nss.nss_startloc)) {
1050 NFS_SOCK_DBG(("nfs connect %s search failed, couldn't find a valid location index\n",
1051 vfs_statfs(nmp->nm_mountp)->f_mntfromname));
1052 return (ENOENT);
1053 }
1054 }
1055 nss.nss_last = -1;
1056
1057 keepsearching:
1058
1059 error = nfs_connect_search_loop(nmp, &nss);
1060 if (error || !nss.nss_sock) {
1061 /* search failed */
1062 nfs_socket_search_cleanup(&nss);
1063 if (!error && (nss.nss_sotype == SOCK_STREAM) && !nmp->nm_sotype && (nmp->nm_vers < NFS_VER4)) {
1064 /* Try using UDP */
1065 sotype = SOCK_DGRAM;
1066 savederror = nss.nss_error;
1067 NFS_SOCK_DBG(("nfs connect %s TCP failed %d %d, trying UDP\n",
1068 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error));
1069 goto tryagain;
1070 }
1071 if (!error)
1072 error = nss.nss_error ? nss.nss_error : ETIMEDOUT;
1073 lck_mtx_lock(&nmp->nm_lock);
1074 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
1075 nmp->nm_nss = NULL;
1076 lck_mtx_unlock(&nmp->nm_lock);
1077 if (nss.nss_flags & NSS_WARNED)
1078 log(LOG_INFO, "nfs_connect: socket connect aborted for %s\n",
1079 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1080 if (fh)
1081 FREE(fh, M_TEMP);
1082 if (path)
1083 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1084 NFS_SOCK_DBG(("nfs connect %s search failed, returning %d\n",
1085 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error));
1086 return (error);
1087 }
1088
1089 /* try to use nss_sock */
1090 nso = nss.nss_sock;
1091 nss.nss_sock = NULL;
1092
1093 /* We may be speaking to portmap first... to determine port(s). */
1094 if (nso->nso_saddr->sa_family == AF_INET)
1095 port = ntohs(((struct sockaddr_in*)nso->nso_saddr)->sin_port);
1096 else
1097 port = ntohs(((struct sockaddr_in6*)nso->nso_saddr)->sin6_port);
1098 if (port == PMAPPORT) {
1099 /* Use this portmapper port to get the port #s we need. */
1100 NFS_SOCK_DBG(("nfs connect %s got portmapper socket %p\n",
1101 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
1102
1103 /* remove the connect upcall so nfs_portmap_lookup() can use this socket */
1104 sock_setupcall(nso->nso_so, NULL, NULL);
1105
1106 /* Set up socket address and port for NFS socket. */
1107 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1108
1109 /* If NFS version not set, try NFSv3 then NFSv2. */
1110 nfsvers = nmp->nm_vers ? nmp->nm_vers : NFS_VER3;
1111
1112 if (!(port = nmp->nm_nfsport)) {
1113 if (ss.ss_family == AF_INET)
1114 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
1115 else if (ss.ss_family == AF_INET6)
1116 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
1117 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1118 nso->nso_so, NFS_PROG, nfsvers,
1119 (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
1120 if (!error) {
1121 if (ss.ss_family == AF_INET)
1122 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1123 else if (ss.ss_family == AF_INET6)
1124 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1125 if (!port)
1126 error = EPROGUNAVAIL;
1127 }
1128 if (error && !nmp->nm_vers) {
1129 nfsvers = NFS_VER2;
1130 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1131 nso->nso_so, NFS_PROG, nfsvers,
1132 (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
1133 if (!error) {
1134 if (ss.ss_family == AF_INET)
1135 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1136 else if (ss.ss_family == AF_INET6)
1137 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1138 if (!port)
1139 error = EPROGUNAVAIL;
1140 }
1141 }
1142 if (error) {
1143 nfs_socket_search_update_error(&nss, error);
1144 nfs_socket_destroy(nso);
1145 goto keepsearching;
1146 }
1147 }
1148 /* Create NFS protocol socket and add it to the list of sockets. */
1149 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nso->nso_sotype, port,
1150 NFS_PROG, nfsvers, NMFLAG(nmp, RESVPORT), &nsonfs);
1151 if (error) {
1152 nfs_socket_search_update_error(&nss, error);
1153 nfs_socket_destroy(nso);
1154 goto keepsearching;
1155 }
1156 nsonfs->nso_location = nso->nso_location;
1157 nsonfs->nso_wake = &nss;
1158 error = sock_setupcall(nsonfs->nso_so, nfs_connect_upcall, nsonfs);
1159 if (error) {
1160 nfs_socket_search_update_error(&nss, error);
1161 nfs_socket_destroy(nsonfs);
1162 nfs_socket_destroy(nso);
1163 goto keepsearching;
1164 }
1165 TAILQ_INSERT_TAIL(&nss.nss_socklist, nsonfs, nso_link);
1166 nss.nss_sockcnt++;
1167 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) {
1168 /* Set up socket address and port for MOUNT socket. */
1169 error = 0;
1170 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1171 port = nmp->nm_mountport;
1172 if (ss.ss_family == AF_INET)
1173 ((struct sockaddr_in*)&ss)->sin_port = htons(port);
1174 else if (ss.ss_family == AF_INET6)
1175 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port);
1176 if (!port) {
1177 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */
1178 /* If NFS version is unknown, optimistically choose for NFSv3. */
1179 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
1180 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
1181 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1182 nso->nso_so, RPCPROG_MNT, mntvers, mntproto, timeo);
1183 }
1184 if (!error) {
1185 if (ss.ss_family == AF_INET)
1186 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1187 else if (ss.ss_family == AF_INET6)
1188 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1189 if (!port)
1190 error = EPROGUNAVAIL;
1191 }
1192 /* create sockaddr for MOUNT */
1193 if (!error)
1194 MALLOC(nsonfs->nso_saddr2, struct sockaddr *, ss.ss_len, M_SONAME, M_WAITOK|M_ZERO);
1195 if (!error && !nsonfs->nso_saddr2)
1196 error = ENOMEM;
1197 if (!error)
1198 bcopy(&ss, nsonfs->nso_saddr2, ss.ss_len);
1199 if (error) {
1200 lck_mtx_lock(&nsonfs->nso_lock);
1201 nsonfs->nso_error = error;
1202 nsonfs->nso_flags |= NSO_DEAD;
1203 lck_mtx_unlock(&nsonfs->nso_lock);
1204 }
1205 }
1206 nfs_socket_destroy(nso);
1207 goto keepsearching;
1208 }
1209
1210 /* nso is an NFS socket */
1211 NFS_SOCK_DBG(("nfs connect %s got NFS socket %p\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
1212
1213 /* If NFS version wasn't specified, it was determined during the connect. */
1214 nfsvers = nmp->nm_vers ? nmp->nm_vers : (int)nso->nso_version;
1215
1216 /* Perform MOUNT call for initial NFSv2/v3 connection/mount. */
1217 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) {
1218 error = 0;
1219 saddr = nso->nso_saddr2;
1220 if (!saddr) {
1221 /* Need sockaddr for MOUNT port */
1222 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
1223 port = nmp->nm_mountport;
1224 if (ss.ss_family == AF_INET)
1225 ((struct sockaddr_in*)&ss)->sin_port = htons(port);
1226 else if (ss.ss_family == AF_INET6)
1227 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port);
1228 if (!port) {
1229 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */
1230 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
1231 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
1232 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
1233 NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
1234 if (ss.ss_family == AF_INET)
1235 port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
1236 else if (ss.ss_family == AF_INET6)
1237 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
1238 }
1239 if (!error) {
1240 if (port)
1241 saddr = (struct sockaddr*)&ss;
1242 else
1243 error = EPROGUNAVAIL;
1244 }
1245 }
1246 if (saddr)
1247 MALLOC(fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO);
1248 if (saddr && fh)
1249 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1250 if (!saddr || !fh || !path) {
1251 if (!error)
1252 error = ENOMEM;
1253 if (fh)
1254 FREE(fh, M_TEMP);
1255 if (path)
1256 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1257 fh = NULL;
1258 path = NULL;
1259 nfs_socket_search_update_error(&nss, error);
1260 nfs_socket_destroy(nso);
1261 goto keepsearching;
1262 }
1263 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, path, MAXPATHLEN, 1);
1264 error = nfs3_mount_rpc(nmp, saddr, nso->nso_sotype, nfsvers,
1265 path, vfs_context_current(), timeo, fh, &nmp->nm_servsec);
1266 NFS_SOCK_DBG(("nfs connect %s socket %p mount %d\n",
1267 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
1268 if (!error) {
1269 /* Make sure we can agree on a security flavor. */
1270 int o, s; /* indices into mount option and server security flavor lists */
1271 int found = 0;
1272
1273 if ((nfsvers == NFS_VER3) && !nmp->nm_servsec.count) {
1274 /* Some servers return an empty list to indicate RPCAUTH_SYS? */
1275 nmp->nm_servsec.count = 1;
1276 nmp->nm_servsec.flavors[0] = RPCAUTH_SYS;
1277 }
1278 if (nmp->nm_sec.count) {
1279 /* Choose the first flavor in our list that the server supports. */
1280 if (!nmp->nm_servsec.count) {
1281 /* we don't know what the server supports, just use our first choice */
1282 nmp->nm_auth = nmp->nm_sec.flavors[0];
1283 found = 1;
1284 }
1285 for (o=0; !found && (o < nmp->nm_sec.count); o++)
1286 for (s=0; !found && (s < nmp->nm_servsec.count); s++)
1287 if (nmp->nm_sec.flavors[o] == nmp->nm_servsec.flavors[s]) {
1288 nmp->nm_auth = nmp->nm_sec.flavors[o];
1289 found = 1;
1290 }
1291 } else {
1292 /* Choose the first one we support from the server's list. */
1293 if (!nmp->nm_servsec.count) {
1294 nmp->nm_auth = RPCAUTH_SYS;
1295 found = 1;
1296 }
1297 for (s=0; s < nmp->nm_servsec.count; s++)
1298 switch (nmp->nm_servsec.flavors[s]) {
1299 case RPCAUTH_SYS:
1300 /* prefer RPCAUTH_SYS to RPCAUTH_NONE */
1301 if (found && (nmp->nm_auth == RPCAUTH_NONE))
1302 found = 0;
1303 case RPCAUTH_NONE:
1304 case RPCAUTH_KRB5:
1305 case RPCAUTH_KRB5I:
1306 case RPCAUTH_KRB5P:
1307 if (!found) {
1308 nmp->nm_auth = nmp->nm_servsec.flavors[s];
1309 found = 1;
1310 }
1311 break;
1312 }
1313 }
1314 error = !found ? EAUTH : 0;
1315 }
1316 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1317 path = NULL;
1318 if (error) {
1319 nfs_socket_search_update_error(&nss, error);
1320 FREE(fh, M_TEMP);
1321 fh = NULL;
1322 nfs_socket_destroy(nso);
1323 goto keepsearching;
1324 }
1325 if (nmp->nm_fh)
1326 FREE(nmp->nm_fh, M_TEMP);
1327 nmp->nm_fh = fh;
1328 fh = NULL;
1329 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_CALLUMNT);
1330 }
1331
1332 /* put the real upcall in place */
1333 upcall = (nso->nso_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
1334 error = sock_setupcall(nso->nso_so, upcall, nmp);
1335 if (error) {
1336 nfs_socket_search_update_error(&nss, error);
1337 nfs_socket_destroy(nso);
1338 goto keepsearching;
1339 }
1340
1341 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1342 /* set mntfromname to this location */
1343 if (!NM_OMATTR_GIVEN(nmp, MNTFROM))
1344 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location,
1345 vfs_statfs(nmp->nm_mountp)->f_mntfromname,
1346 sizeof(vfs_statfs(nmp->nm_mountp)->f_mntfromname), 0);
1347 /* some negotiated values need to remain unchanged for the life of the mount */
1348 if (!nmp->nm_sotype)
1349 nmp->nm_sotype = nso->nso_sotype;
1350 if (!nmp->nm_vers) {
1351 nmp->nm_vers = nfsvers;
1352 /* If we negotiated NFSv4, set nm_nfsport if we ended up on the standard NFS port */
1353 if ((nfsvers >= NFS_VER4) && !NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) {
1354 if (nso->nso_saddr->sa_family == AF_INET)
1355 port = ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port);
1356 else if (nso->nso_saddr->sa_family == AF_INET6)
1357 port = ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port);
1358 else
1359 port = 0;
1360 if (port == NFS_PORT)
1361 nmp->nm_nfsport = NFS_PORT;
1362 }
1363 }
1364 /* do some version-specific pre-mount set up */
1365 if (nmp->nm_vers >= NFS_VER4) {
1366 microtime(&now);
1367 nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec;
1368 if (!NMFLAG(nmp, NOCALLBACK))
1369 nfs4_mount_callback_setup(nmp);
1370 }
1371 }
1372
1373 /* Initialize NFS socket state variables */
1374 lck_mtx_lock(&nmp->nm_lock);
1375 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
1376 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
1377 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
1378 nmp->nm_sdrtt[3] = 0;
1379 if (nso->nso_sotype == SOCK_DGRAM) {
1380 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
1381 nmp->nm_sent = 0;
1382 } else if (nso->nso_sotype == SOCK_STREAM) {
1383 nmp->nm_timeouts = 0;
1384 }
1385 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
1386 nmp->nm_sockflags |= NMSOCK_SETUP;
1387 /* move the socket to the mount structure */
1388 nmp->nm_nso = nso;
1389 oldsaddr = nmp->nm_saddr;
1390 nmp->nm_saddr = nso->nso_saddr;
1391 lck_mtx_unlock(&nmp->nm_lock);
1392 error = nfs_connect_setup(nmp);
1393 lck_mtx_lock(&nmp->nm_lock);
1394 nmp->nm_sockflags &= ~NMSOCK_SETUP;
1395 if (!error) {
1396 nmp->nm_sockflags |= NMSOCK_READY;
1397 wakeup(&nmp->nm_sockflags);
1398 }
1399 if (error) {
1400 NFS_SOCK_DBG(("nfs connect %s socket %p setup failed %d\n",
1401 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
1402 nfs_socket_search_update_error(&nss, error);
1403 nmp->nm_saddr = oldsaddr;
1404 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1405 /* undo settings made prior to setup */
1406 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_SOCKET_TYPE))
1407 nmp->nm_sotype = 0;
1408 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_VERSION)) {
1409 if (nmp->nm_vers >= NFS_VER4) {
1410 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT))
1411 nmp->nm_nfsport = 0;
1412 if (nmp->nm_cbid)
1413 nfs4_mount_callback_shutdown(nmp);
1414 if (IS_VALID_CRED(nmp->nm_mcred))
1415 kauth_cred_unref(&nmp->nm_mcred);
1416 bzero(&nmp->nm_un, sizeof(nmp->nm_un));
1417 }
1418 nmp->nm_vers = 0;
1419 }
1420 }
1421 lck_mtx_unlock(&nmp->nm_lock);
1422 nmp->nm_nso = NULL;
1423 nfs_socket_destroy(nso);
1424 goto keepsearching;
1425 }
1426
1427 /* update current location */
1428 if ((nmp->nm_locations.nl_current.nli_flags & NLI_VALID) &&
1429 (nmp->nm_locations.nl_current.nli_serv != nso->nso_location.nli_serv)) {
1430 /* server has changed, we should initiate failover/recovery */
1431 // XXX
1432 }
1433 nmp->nm_locations.nl_current = nso->nso_location;
1434 nmp->nm_locations.nl_current.nli_flags |= NLI_VALID;
1435
1436 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
1437 /* We have now successfully connected... make a note of it. */
1438 nmp->nm_sockflags |= NMSOCK_HASCONNECTED;
1439 }
1440
1441 lck_mtx_unlock(&nmp->nm_lock);
1442 if (oldsaddr)
1443 FREE(oldsaddr, M_SONAME);
1444
1445 if (nss.nss_flags & NSS_WARNED)
1446 log(LOG_INFO, "nfs_connect: socket connect completed for %s\n",
1447 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1448
1449 nmp->nm_nss = NULL;
1450 nfs_socket_search_cleanup(&nss);
1451 if (fh)
1452 FREE(fh, M_TEMP);
1453 if (path)
1454 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
1455 NFS_SOCK_DBG(("nfs connect %s success\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname));
1456 return (0);
1457 }
1458
1459
1460 /* setup & confirm socket connection is functional */
1461 int
1462 nfs_connect_setup(struct nfsmount *nmp)
1463 {
1464 int error = 0;
1465
1466 if (nmp->nm_vers >= NFS_VER4) {
1467 if (nmp->nm_state & NFSSTA_CLIENTID) {
1468 /* first, try to renew our current state */
1469 error = nfs4_renew(nmp, R_SETUP);
1470 if ((error == NFSERR_ADMIN_REVOKED) ||
1471 (error == NFSERR_CB_PATH_DOWN) ||
1472 (error == NFSERR_EXPIRED) ||
1473 (error == NFSERR_LEASE_MOVED) ||
1474 (error == NFSERR_STALE_CLIENTID)) {
1475 lck_mtx_lock(&nmp->nm_lock);
1476 nfs_need_recover(nmp, error);
1477 lck_mtx_unlock(&nmp->nm_lock);
1478 }
1479 }
1480 error = nfs4_setclientid(nmp);
1481 }
1482 return (error);
1483 }
1484
1485 /*
1486 * NFS socket reconnect routine:
1487 * Called when a connection is broken.
1488 * - disconnect the old socket
1489 * - nfs_connect() again
1490 * - set R_MUSTRESEND for all outstanding requests on mount point
1491 * If this fails the mount point is DEAD!
1492 */
1493 int
1494 nfs_reconnect(struct nfsmount *nmp)
1495 {
1496 struct nfsreq *rq;
1497 struct timeval now;
1498 thread_t thd = current_thread();
1499 int error, wentdown = 0, verbose = 1;
1500 time_t lastmsg;
1501
1502 microuptime(&now);
1503 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay);
1504
1505 nfs_disconnect(nmp);
1506
1507 while ((error = nfs_connect(nmp, verbose, 30))) {
1508 verbose = 0;
1509 nfs_disconnect(nmp);
1510 if ((error == EINTR) || (error == ERESTART))
1511 return (EINTR);
1512 if (error == EIO)
1513 return (EIO);
1514 microuptime(&now);
1515 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) {
1516 lastmsg = now.tv_sec;
1517 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect");
1518 wentdown = 1;
1519 }
1520 lck_mtx_lock(&nmp->nm_lock);
1521 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
1522 /* we're not yet completely mounted and */
1523 /* we can't reconnect, so we fail */
1524 lck_mtx_unlock(&nmp->nm_lock);
1525 return (error);
1526 }
1527 nfs_mount_check_dead_timeout(nmp);
1528 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) {
1529 lck_mtx_unlock(&nmp->nm_lock);
1530 return (error);
1531 }
1532 lck_mtx_unlock(&nmp->nm_lock);
1533 tsleep(&lbolt, PSOCK, "nfs_reconnect_delay", 0);
1534 if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
1535 return (error);
1536 }
1537
1538 if (wentdown)
1539 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected");
1540
1541 /*
1542 * Loop through outstanding request list and mark all requests
1543 * as needing a resend. (Though nfs_need_reconnect() probably
1544 * marked them all already.)
1545 */
1546 lck_mtx_lock(nfs_request_mutex);
1547 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
1548 if (rq->r_nmp == nmp) {
1549 lck_mtx_lock(&rq->r_mtx);
1550 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
1551 rq->r_flags |= R_MUSTRESEND;
1552 rq->r_rtt = -1;
1553 wakeup(rq);
1554 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
1555 nfs_asyncio_resend(rq);
1556 }
1557 lck_mtx_unlock(&rq->r_mtx);
1558 }
1559 }
1560 lck_mtx_unlock(nfs_request_mutex);
1561 return (0);
1562 }
1563
1564 /*
1565 * NFS disconnect. Clean up and unlink.
1566 */
1567 void
1568 nfs_disconnect(struct nfsmount *nmp)
1569 {
1570 struct nfs_socket *nso;
1571
1572 lck_mtx_lock(&nmp->nm_lock);
1573 tryagain:
1574 if (nmp->nm_nso) {
1575 struct timespec ts = { 1, 0 };
1576 if (nmp->nm_state & NFSSTA_SENDING) { /* wait for sending to complete */
1577 nmp->nm_state |= NFSSTA_WANTSND;
1578 msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitsending", &ts);
1579 goto tryagain;
1580 }
1581 if (nmp->nm_sockflags & NMSOCK_POKE) { /* wait for poking to complete */
1582 msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO-1, "nfswaitpoke", &ts);
1583 goto tryagain;
1584 }
1585 nmp->nm_sockflags |= NMSOCK_DISCONNECTING;
1586 nmp->nm_sockflags &= ~NMSOCK_READY;
1587 nso = nmp->nm_nso;
1588 nmp->nm_nso = NULL;
1589 if (nso->nso_saddr == nmp->nm_saddr)
1590 nso->nso_saddr = NULL;
1591 lck_mtx_unlock(&nmp->nm_lock);
1592 nfs_socket_destroy(nso);
1593 lck_mtx_lock(&nmp->nm_lock);
1594 nmp->nm_sockflags &= ~NMSOCK_DISCONNECTING;
1595 lck_mtx_unlock(&nmp->nm_lock);
1596 } else {
1597 lck_mtx_unlock(&nmp->nm_lock);
1598 }
1599 }
1600
1601 /*
1602 * mark an NFS mount as needing a reconnect/resends.
1603 */
1604 void
1605 nfs_need_reconnect(struct nfsmount *nmp)
1606 {
1607 struct nfsreq *rq;
1608
1609 lck_mtx_lock(&nmp->nm_lock);
1610 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP);
1611 lck_mtx_unlock(&nmp->nm_lock);
1612
1613 /*
1614 * Loop through outstanding request list and
1615 * mark all requests as needing a resend.
1616 */
1617 lck_mtx_lock(nfs_request_mutex);
1618 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
1619 if (rq->r_nmp == nmp) {
1620 lck_mtx_lock(&rq->r_mtx);
1621 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
1622 rq->r_flags |= R_MUSTRESEND;
1623 rq->r_rtt = -1;
1624 wakeup(rq);
1625 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
1626 nfs_asyncio_resend(rq);
1627 }
1628 lck_mtx_unlock(&rq->r_mtx);
1629 }
1630 }
1631 lck_mtx_unlock(nfs_request_mutex);
1632 }
1633
1634
1635 /*
1636 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
1637 */
1638 void
1639 nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
1640 {
1641 struct nfsmount *nmp = arg;
1642 struct timespec ts = { 30, 0 };
1643 thread_t thd = current_thread();
1644 struct nfsreq *req;
1645 struct timeval now;
1646 int error, dofinish;
1647 nfsnode_t np;
1648
1649 lck_mtx_lock(&nmp->nm_lock);
1650
1651 while (!(nmp->nm_sockflags & NMSOCK_READY) ||
1652 !TAILQ_EMPTY(&nmp->nm_resendq) ||
1653 !LIST_EMPTY(&nmp->nm_monlist) ||
1654 nmp->nm_deadto_start ||
1655 (nmp->nm_state & NFSSTA_RECOVER) ||
1656 ((nmp->nm_vers >= NFS_VER4) && !TAILQ_EMPTY(&nmp->nm_dreturnq)))
1657 {
1658 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
1659 break;
1660 /* do reconnect, if necessary */
1661 if (!(nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_FORCE)) {
1662 if (nmp->nm_reconnect_start <= 0) {
1663 microuptime(&now);
1664 nmp->nm_reconnect_start = now.tv_sec;
1665 }
1666 lck_mtx_unlock(&nmp->nm_lock);
1667 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname));
1668 if (nfs_reconnect(nmp) == 0)
1669 nmp->nm_reconnect_start = 0;
1670 lck_mtx_lock(&nmp->nm_lock);
1671 }
1672 if ((nmp->nm_sockflags & NMSOCK_READY) &&
1673 (nmp->nm_state & NFSSTA_RECOVER) &&
1674 !(nmp->nm_sockflags & NMSOCK_UNMOUNT) &&
1675 !(nmp->nm_state & NFSSTA_FORCE)) {
1676 /* perform state recovery */
1677 lck_mtx_unlock(&nmp->nm_lock);
1678 nfs_recover(nmp);
1679 lck_mtx_lock(&nmp->nm_lock);
1680 }
1681 /* handle NFSv4 delegation returns */
1682 while ((nmp->nm_vers >= NFS_VER4) && !(nmp->nm_state & NFSSTA_FORCE) &&
1683 (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) &&
1684 ((np = TAILQ_FIRST(&nmp->nm_dreturnq)))) {
1685 lck_mtx_unlock(&nmp->nm_lock);
1686 nfs4_delegation_return(np, R_RECOVER, thd, nmp->nm_mcred);
1687 lck_mtx_lock(&nmp->nm_lock);
1688 }
1689 /* do resends, if necessary/possible */
1690 while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || (nmp->nm_state & NFSSTA_FORCE)) &&
1691 ((req = TAILQ_FIRST(&nmp->nm_resendq)))) {
1692 if (req->r_resendtime)
1693 microuptime(&now);
1694 while (req && !(nmp->nm_state & NFSSTA_FORCE) && req->r_resendtime && (now.tv_sec < req->r_resendtime))
1695 req = TAILQ_NEXT(req, r_rchain);
1696 if (!req)
1697 break;
1698 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
1699 req->r_rchain.tqe_next = NFSREQNOLIST;
1700 lck_mtx_unlock(&nmp->nm_lock);
1701 lck_mtx_lock(&req->r_mtx);
1702 if (req->r_error || req->r_nmrep.nmc_mhead) {
1703 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
1704 req->r_flags &= ~R_RESENDQ;
1705 wakeup(req);
1706 lck_mtx_unlock(&req->r_mtx);
1707 if (dofinish)
1708 nfs_asyncio_finish(req);
1709 lck_mtx_lock(&nmp->nm_lock);
1710 continue;
1711 }
1712 if ((req->r_flags & R_RESTART) || nfs_request_using_gss(req)) {
1713 req->r_flags &= ~R_RESTART;
1714 req->r_resendtime = 0;
1715 lck_mtx_unlock(&req->r_mtx);
1716 /* async RPCs on GSS mounts need to be rebuilt and resent. */
1717 nfs_reqdequeue(req);
1718 if (nfs_request_using_gss(req)) {
1719 nfs_gss_clnt_rpcdone(req);
1720 error = nfs_gss_clnt_args_restore(req);
1721 if (error == ENEEDAUTH)
1722 req->r_xid = 0;
1723 }
1724 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
1725 nfs_request_using_gss(req) ? " gss" : "", req->r_procnum, req->r_xid,
1726 req->r_flags, req->r_rtt));
1727 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
1728 if (!error)
1729 error = nfs_sigintr(nmp, req, req->r_thread, 0);
1730 if (!error)
1731 error = nfs_request_add_header(req);
1732 if (!error)
1733 error = nfs_request_send(req, 0);
1734 lck_mtx_lock(&req->r_mtx);
1735 if (req->r_flags & R_RESENDQ)
1736 req->r_flags &= ~R_RESENDQ;
1737 if (error)
1738 req->r_error = error;
1739 wakeup(req);
1740 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
1741 lck_mtx_unlock(&req->r_mtx);
1742 if (dofinish)
1743 nfs_asyncio_finish(req);
1744 lck_mtx_lock(&nmp->nm_lock);
1745 error = 0;
1746 continue;
1747 }
1748 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
1749 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
1750 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
1751 if (!error)
1752 error = nfs_sigintr(nmp, req, req->r_thread, 0);
1753 if (!error) {
1754 req->r_flags |= R_SENDING;
1755 lck_mtx_unlock(&req->r_mtx);
1756 error = nfs_send(req, 0);
1757 lck_mtx_lock(&req->r_mtx);
1758 if (!error) {
1759 if (req->r_flags & R_RESENDQ)
1760 req->r_flags &= ~R_RESENDQ;
1761 wakeup(req);
1762 lck_mtx_unlock(&req->r_mtx);
1763 lck_mtx_lock(&nmp->nm_lock);
1764 continue;
1765 }
1766 }
1767 req->r_error = error;
1768 if (req->r_flags & R_RESENDQ)
1769 req->r_flags &= ~R_RESENDQ;
1770 wakeup(req);
1771 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
1772 lck_mtx_unlock(&req->r_mtx);
1773 if (dofinish)
1774 nfs_asyncio_finish(req);
1775 lck_mtx_lock(&nmp->nm_lock);
1776 }
1777 if (nmp->nm_deadto_start)
1778 nfs_mount_check_dead_timeout(nmp);
1779 if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))
1780 break;
1781 /* check monitored nodes, if necessary/possible */
1782 if (!LIST_EMPTY(&nmp->nm_monlist)) {
1783 nmp->nm_state |= NFSSTA_MONITOR_SCAN;
1784 LIST_FOREACH(np, &nmp->nm_monlist, n_monlink) {
1785 if (!(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE)))
1786 break;
1787 np->n_mflag |= NMMONSCANINPROG;
1788 lck_mtx_unlock(&nmp->nm_lock);
1789 error = nfs_getattr(np, NULL, vfs_context_kernel(), (NGA_UNCACHED|NGA_MONITOR));
1790 if (!error && ISSET(np->n_flag, NUPDATESIZE)) /* update quickly to avoid multiple events */
1791 nfs_data_update_size(np, 0);
1792 lck_mtx_lock(&nmp->nm_lock);
1793 np->n_mflag &= ~NMMONSCANINPROG;
1794 if (np->n_mflag & NMMONSCANWANT) {
1795 np->n_mflag &= ~NMMONSCANWANT;
1796 wakeup(&np->n_mflag);
1797 }
1798 if (error || !(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE)))
1799 break;
1800 }
1801 nmp->nm_state &= ~NFSSTA_MONITOR_SCAN;
1802 if (nmp->nm_state & NFSSTA_UNMOUNTING)
1803 wakeup(&nmp->nm_state); /* let unmounting thread know scan is done */
1804 }
1805 if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING))) {
1806 if (nmp->nm_deadto_start || !TAILQ_EMPTY(&nmp->nm_resendq) ||
1807 (nmp->nm_state & NFSSTA_RECOVER))
1808 ts.tv_sec = 1;
1809 else
1810 ts.tv_sec = 5;
1811 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts);
1812 }
1813 }
1814
1815 /* If we're unmounting, send the unmount RPC, if requested/appropriate. */
1816 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) &&
1817 (nmp->nm_state & NFSSTA_MOUNTED) && NMFLAG(nmp, CALLUMNT) &&
1818 (nmp->nm_vers < NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) {
1819 lck_mtx_unlock(&nmp->nm_lock);
1820 nfs3_umount_rpc(nmp, vfs_context_kernel(),
1821 (nmp->nm_sockflags & NMSOCK_READY) ? 6 : 2);
1822 lck_mtx_lock(&nmp->nm_lock);
1823 }
1824
1825 if (nmp->nm_sockthd == thd)
1826 nmp->nm_sockthd = NULL;
1827 lck_mtx_unlock(&nmp->nm_lock);
1828 wakeup(&nmp->nm_sockthd);
1829 thread_terminate(thd);
1830 }
1831
1832 /* start or wake a mount's socket thread */
1833 void
1834 nfs_mount_sock_thread_wake(struct nfsmount *nmp)
1835 {
1836 if (nmp->nm_sockthd)
1837 wakeup(&nmp->nm_sockthd);
1838 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS)
1839 thread_deallocate(nmp->nm_sockthd);
1840 }
1841
1842 /*
1843 * Check if we should mark the mount dead because the
1844 * unresponsive mount has reached the dead timeout.
1845 * (must be called with nmp locked)
1846 */
1847 void
1848 nfs_mount_check_dead_timeout(struct nfsmount *nmp)
1849 {
1850 struct timeval now;
1851
1852 if (nmp->nm_deadtimeout <= 0)
1853 return;
1854 if (nmp->nm_deadto_start == 0)
1855 return;
1856 if (nmp->nm_state & NFSSTA_DEAD)
1857 return;
1858 microuptime(&now);
1859 if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_deadtimeout)
1860 return;
1861 printf("nfs server %s: dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1862 nmp->nm_state |= NFSSTA_DEAD;
1863 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0);
1864 }
1865
1866 /*
1867 * NFS callback channel socket state
1868 */
1869 struct nfs_callback_socket
1870 {
1871 TAILQ_ENTRY(nfs_callback_socket) ncbs_link;
1872 socket_t ncbs_so; /* the socket */
1873 struct sockaddr_storage ncbs_saddr; /* socket address */
1874 struct nfs_rpc_record_state ncbs_rrs; /* RPC record parsing state */
1875 time_t ncbs_stamp; /* last accessed at */
1876 uint32_t ncbs_flags; /* see below */
1877 };
1878 #define NCBSOCK_UPCALL 0x0001
1879 #define NCBSOCK_UPCALLWANT 0x0002
1880 #define NCBSOCK_DEAD 0x0004
1881
1882 /*
1883 * NFS callback channel state
1884 *
1885 * One listening socket for accepting socket connections from servers and
1886 * a list of connected sockets to handle callback requests on.
1887 * Mounts registered with the callback channel are assigned IDs and
1888 * put on a list so that the callback request handling code can match
1889 * the requests up with mounts.
1890 */
1891 socket_t nfs4_cb_so = NULL;
1892 socket_t nfs4_cb_so6 = NULL;
1893 in_port_t nfs4_cb_port = 0;
1894 in_port_t nfs4_cb_port6 = 0;
1895 uint32_t nfs4_cb_id = 0;
1896 uint32_t nfs4_cb_so_usecount = 0;
1897 TAILQ_HEAD(nfs4_cb_sock_list,nfs_callback_socket) nfs4_cb_socks;
1898 TAILQ_HEAD(nfs4_cb_mount_list,nfsmount) nfs4_cb_mounts;
1899
1900 int nfs4_cb_handler(struct nfs_callback_socket *, mbuf_t);
1901
1902 /*
1903 * Set up the callback channel for the NFS mount.
1904 *
1905 * Initializes the callback channel socket state and
1906 * assigns a callback ID to the mount.
1907 */
1908 void
1909 nfs4_mount_callback_setup(struct nfsmount *nmp)
1910 {
1911 struct sockaddr_in sin;
1912 struct sockaddr_in6 sin6;
1913 socket_t so = NULL;
1914 socket_t so6 = NULL;
1915 struct timeval timeo;
1916 int error, on = 1;
1917 in_port_t port;
1918
1919 lck_mtx_lock(nfs_global_mutex);
1920 if (nfs4_cb_id == 0) {
1921 TAILQ_INIT(&nfs4_cb_mounts);
1922 TAILQ_INIT(&nfs4_cb_socks);
1923 nfs4_cb_id++;
1924 }
1925 nmp->nm_cbid = nfs4_cb_id++;
1926 if (nmp->nm_cbid == 0)
1927 nmp->nm_cbid = nfs4_cb_id++;
1928 nfs4_cb_so_usecount++;
1929 TAILQ_INSERT_HEAD(&nfs4_cb_mounts, nmp, nm_cblink);
1930
1931 if (nfs4_cb_so) {
1932 lck_mtx_unlock(nfs_global_mutex);
1933 return;
1934 }
1935
1936 /* IPv4 */
1937 error = sock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so);
1938 if (error) {
1939 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv4 socket\n", error);
1940 goto fail;
1941 }
1942 so = nfs4_cb_so;
1943
1944 sock_setsockopt(so, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
1945 sin.sin_len = sizeof(struct sockaddr_in);
1946 sin.sin_family = AF_INET;
1947 sin.sin_addr.s_addr = htonl(INADDR_ANY);
1948 sin.sin_port = htons(nfs_callback_port); /* try to use specified port */
1949 error = sock_bind(so, (struct sockaddr *)&sin);
1950 if (error) {
1951 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv4 socket\n", error);
1952 goto fail;
1953 }
1954 error = sock_getsockname(so, (struct sockaddr *)&sin, sin.sin_len);
1955 if (error) {
1956 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv4 socket port\n", error);
1957 goto fail;
1958 }
1959 nfs4_cb_port = ntohs(sin.sin_port);
1960
1961 error = sock_listen(so, 32);
1962 if (error) {
1963 log(LOG_INFO, "nfs callback setup: error %d on IPv4 listen\n", error);
1964 goto fail;
1965 }
1966
1967 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
1968 timeo.tv_usec = 0;
1969 timeo.tv_sec = 60;
1970 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
1971 if (error)
1972 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket rx timeout\n", error);
1973 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
1974 if (error)
1975 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket tx timeout\n", error);
1976 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
1977 sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
1978 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
1979 error = 0;
1980
1981 /* IPv6 */
1982 error = sock_socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so6);
1983 if (error) {
1984 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv6 socket\n", error);
1985 goto fail;
1986 }
1987 so6 = nfs4_cb_so6;
1988
1989 sock_setsockopt(so6, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
1990 sock_setsockopt(so6, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on));
1991 /* try to use specified port or same port as IPv4 */
1992 port = nfs_callback_port ? nfs_callback_port : nfs4_cb_port;
1993 ipv6_bind_again:
1994 sin6.sin6_len = sizeof(struct sockaddr_in6);
1995 sin6.sin6_family = AF_INET6;
1996 sin6.sin6_addr = in6addr_any;
1997 sin6.sin6_port = htons(port);
1998 error = sock_bind(so6, (struct sockaddr *)&sin6);
1999 if (error) {
2000 if (port != nfs_callback_port) {
2001 /* if we simply tried to match the IPv4 port, then try any port */
2002 port = 0;
2003 goto ipv6_bind_again;
2004 }
2005 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv6 socket\n", error);
2006 goto fail;
2007 }
2008 error = sock_getsockname(so6, (struct sockaddr *)&sin6, sin6.sin6_len);
2009 if (error) {
2010 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv6 socket port\n", error);
2011 goto fail;
2012 }
2013 nfs4_cb_port6 = ntohs(sin6.sin6_port);
2014
2015 error = sock_listen(so6, 32);
2016 if (error) {
2017 log(LOG_INFO, "nfs callback setup: error %d on IPv6 listen\n", error);
2018 goto fail;
2019 }
2020
2021 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2022 timeo.tv_usec = 0;
2023 timeo.tv_sec = 60;
2024 error = sock_setsockopt(so6, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
2025 if (error)
2026 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket rx timeout\n", error);
2027 error = sock_setsockopt(so6, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
2028 if (error)
2029 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket tx timeout\n", error);
2030 sock_setsockopt(so6, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2031 sock_setsockopt(so6, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
2032 sock_setsockopt(so6, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
2033 error = 0;
2034
2035 fail:
2036 if (error) {
2037 nfs4_cb_so = nfs4_cb_so6 = NULL;
2038 lck_mtx_unlock(nfs_global_mutex);
2039 if (so) {
2040 sock_shutdown(so, SHUT_RDWR);
2041 sock_close(so);
2042 }
2043 if (so6) {
2044 sock_shutdown(so6, SHUT_RDWR);
2045 sock_close(so6);
2046 }
2047 } else {
2048 lck_mtx_unlock(nfs_global_mutex);
2049 }
2050 }
2051
2052 /*
2053 * Shut down the callback channel for the NFS mount.
2054 *
2055 * Clears the mount's callback ID and releases the mounts
2056 * reference on the callback socket. Last reference dropped
2057 * will also shut down the callback socket(s).
2058 */
2059 void
2060 nfs4_mount_callback_shutdown(struct nfsmount *nmp)
2061 {
2062 struct nfs_callback_socket *ncbsp;
2063 socket_t so, so6;
2064 struct nfs4_cb_sock_list cb_socks;
2065 struct timespec ts = {1,0};
2066
2067 lck_mtx_lock(nfs_global_mutex);
2068 TAILQ_REMOVE(&nfs4_cb_mounts, nmp, nm_cblink);
2069 /* wait for any callbacks in progress to complete */
2070 while (nmp->nm_cbrefs)
2071 msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait", &ts);
2072 nmp->nm_cbid = 0;
2073 if (--nfs4_cb_so_usecount) {
2074 lck_mtx_unlock(nfs_global_mutex);
2075 return;
2076 }
2077 so = nfs4_cb_so;
2078 so6 = nfs4_cb_so6;
2079 nfs4_cb_so = nfs4_cb_so6 = NULL;
2080 TAILQ_INIT(&cb_socks);
2081 TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link);
2082 lck_mtx_unlock(nfs_global_mutex);
2083 if (so) {
2084 sock_shutdown(so, SHUT_RDWR);
2085 sock_close(so);
2086 }
2087 if (so6) {
2088 sock_shutdown(so6, SHUT_RDWR);
2089 sock_close(so6);
2090 }
2091 while ((ncbsp = TAILQ_FIRST(&cb_socks))) {
2092 TAILQ_REMOVE(&cb_socks, ncbsp, ncbs_link);
2093 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
2094 sock_close(ncbsp->ncbs_so);
2095 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs);
2096 FREE(ncbsp, M_TEMP);
2097 }
2098 }
2099
2100 /*
2101 * Check periodically for stale/unused nfs callback sockets
2102 */
2103 #define NFS4_CB_TIMER_PERIOD 30
2104 #define NFS4_CB_IDLE_MAX 300
2105 void
2106 nfs4_callback_timer(__unused void *param0, __unused void *param1)
2107 {
2108 struct nfs_callback_socket *ncbsp, *nextncbsp;
2109 struct timeval now;
2110
2111 loop:
2112 lck_mtx_lock(nfs_global_mutex);
2113 if (TAILQ_EMPTY(&nfs4_cb_socks)) {
2114 nfs4_callback_timer_on = 0;
2115 lck_mtx_unlock(nfs_global_mutex);
2116 return;
2117 }
2118 microuptime(&now);
2119 TAILQ_FOREACH_SAFE(ncbsp, &nfs4_cb_socks, ncbs_link, nextncbsp) {
2120 if (!(ncbsp->ncbs_flags & NCBSOCK_DEAD) &&
2121 (now.tv_sec < (ncbsp->ncbs_stamp + NFS4_CB_IDLE_MAX)))
2122 continue;
2123 TAILQ_REMOVE(&nfs4_cb_socks, ncbsp, ncbs_link);
2124 lck_mtx_unlock(nfs_global_mutex);
2125 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
2126 sock_close(ncbsp->ncbs_so);
2127 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs);
2128 FREE(ncbsp, M_TEMP);
2129 goto loop;
2130 }
2131 nfs4_callback_timer_on = 1;
2132 nfs_interval_timer_start(nfs4_callback_timer_call,
2133 NFS4_CB_TIMER_PERIOD * 1000);
2134 lck_mtx_unlock(nfs_global_mutex);
2135 }
2136
2137 /*
2138 * Accept a new callback socket.
2139 */
2140 void
2141 nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag)
2142 {
2143 socket_t newso = NULL;
2144 struct nfs_callback_socket *ncbsp;
2145 struct nfsmount *nmp;
2146 struct timeval timeo, now;
2147 int error, on = 1, ip;
2148
2149 if (so == nfs4_cb_so)
2150 ip = 4;
2151 else if (so == nfs4_cb_so6)
2152 ip = 6;
2153 else
2154 return;
2155
2156 /* allocate/initialize a new nfs_callback_socket */
2157 MALLOC(ncbsp, struct nfs_callback_socket *, sizeof(struct nfs_callback_socket), M_TEMP, M_WAITOK);
2158 if (!ncbsp) {
2159 log(LOG_ERR, "nfs callback accept: no memory for new socket\n");
2160 return;
2161 }
2162 bzero(ncbsp, sizeof(*ncbsp));
2163 ncbsp->ncbs_saddr.ss_len = (ip == 4) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
2164 nfs_rpc_record_state_init(&ncbsp->ncbs_rrs);
2165
2166 /* accept a new socket */
2167 error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_saddr,
2168 ncbsp->ncbs_saddr.ss_len, MSG_DONTWAIT,
2169 nfs4_cb_rcv, ncbsp, &newso);
2170 if (error) {
2171 log(LOG_INFO, "nfs callback accept: error %d accepting IPv%d socket\n", error, ip);
2172 FREE(ncbsp, M_TEMP);
2173 return;
2174 }
2175
2176 /* set up the new socket */
2177 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
2178 timeo.tv_usec = 0;
2179 timeo.tv_sec = 60;
2180 error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
2181 if (error)
2182 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket rx timeout\n", error, ip);
2183 error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
2184 if (error)
2185 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket tx timeout\n", error, ip);
2186 sock_setsockopt(newso, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2187 sock_setsockopt(newso, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
2188 sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
2189 sock_setsockopt(newso, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
2190
2191 ncbsp->ncbs_so = newso;
2192 microuptime(&now);
2193 ncbsp->ncbs_stamp = now.tv_sec;
2194
2195 lck_mtx_lock(nfs_global_mutex);
2196
2197 /* add it to the list */
2198 TAILQ_INSERT_HEAD(&nfs4_cb_socks, ncbsp, ncbs_link);
2199
2200 /* verify it's from a host we have mounted */
2201 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
2202 /* check if socket's source address matches this mount's server address */
2203 if (!nmp->nm_saddr)
2204 continue;
2205 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0)
2206 break;
2207 }
2208 if (!nmp) /* we don't want this socket, mark it dead */
2209 ncbsp->ncbs_flags |= NCBSOCK_DEAD;
2210
2211 /* make sure the callback socket cleanup timer is running */
2212 /* (shorten the timer if we've got a socket we don't want) */
2213 if (!nfs4_callback_timer_on) {
2214 nfs4_callback_timer_on = 1;
2215 nfs_interval_timer_start(nfs4_callback_timer_call,
2216 !nmp ? 500 : (NFS4_CB_TIMER_PERIOD * 1000));
2217 } else if (!nmp && (nfs4_callback_timer_on < 2)) {
2218 nfs4_callback_timer_on = 2;
2219 thread_call_cancel(nfs4_callback_timer_call);
2220 nfs_interval_timer_start(nfs4_callback_timer_call, 500);
2221 }
2222
2223 lck_mtx_unlock(nfs_global_mutex);
2224 }
2225
2226 /*
2227 * Receive mbufs from callback sockets into RPC records and process each record.
2228 * Detect connection has been closed and shut down.
2229 */
2230 void
2231 nfs4_cb_rcv(socket_t so, void *arg, __unused int waitflag)
2232 {
2233 struct nfs_callback_socket *ncbsp = arg;
2234 struct timespec ts = {1,0};
2235 struct timeval now;
2236 mbuf_t m;
2237 int error = 0, recv = 1;
2238
2239 lck_mtx_lock(nfs_global_mutex);
2240 while (ncbsp->ncbs_flags & NCBSOCK_UPCALL) {
2241 /* wait if upcall is already in progress */
2242 ncbsp->ncbs_flags |= NCBSOCK_UPCALLWANT;
2243 msleep(ncbsp, nfs_global_mutex, PSOCK, "cbupcall", &ts);
2244 }
2245 ncbsp->ncbs_flags |= NCBSOCK_UPCALL;
2246 lck_mtx_unlock(nfs_global_mutex);
2247
2248 /* loop while we make error-free progress */
2249 while (!error && recv) {
2250 error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, MSG_DONTWAIT, &recv, &m);
2251 if (m) /* handle the request */
2252 error = nfs4_cb_handler(ncbsp, m);
2253 }
2254
2255 /* note: no error and no data indicates server closed its end */
2256 if ((error != EWOULDBLOCK) && (error || !recv)) {
2257 /*
2258 * Socket is either being closed or should be.
2259 * We can't close the socket in the context of the upcall.
2260 * So we mark it as dead and leave it for the cleanup timer to reap.
2261 */
2262 ncbsp->ncbs_stamp = 0;
2263 ncbsp->ncbs_flags |= NCBSOCK_DEAD;
2264 } else {
2265 microuptime(&now);
2266 ncbsp->ncbs_stamp = now.tv_sec;
2267 }
2268
2269 lck_mtx_lock(nfs_global_mutex);
2270 ncbsp->ncbs_flags &= ~NCBSOCK_UPCALL;
2271 lck_mtx_unlock(nfs_global_mutex);
2272 wakeup(ncbsp);
2273 }
2274
2275 /*
2276 * Handle an NFS callback channel request.
2277 */
2278 int
2279 nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
2280 {
2281 socket_t so = ncbsp->ncbs_so;
2282 struct nfsm_chain nmreq, nmrep;
2283 mbuf_t mhead = NULL, mrest = NULL, m;
2284 struct msghdr msg;
2285 struct nfsmount *nmp;
2286 fhandle_t fh;
2287 nfsnode_t np;
2288 nfs_stateid stateid;
2289 uint32_t bitmap[NFS_ATTR_BITMAP_LEN], rbitmap[NFS_ATTR_BITMAP_LEN], bmlen, truncate, attrbytes;
2290 uint32_t val, xid, procnum, taglen, cbid, numops, op, status;
2291 uint32_t auth_type, auth_len;
2292 uint32_t numres, *pnumres;
2293 int error = 0, replen, len;
2294 size_t sentlen = 0;
2295
2296 xid = numops = op = status = procnum = taglen = cbid = 0;
2297
2298 nfsm_chain_dissect_init(error, &nmreq, mreq);
2299 nfsm_chain_get_32(error, &nmreq, xid); // RPC XID
2300 nfsm_chain_get_32(error, &nmreq, val); // RPC Call
2301 nfsm_assert(error, (val == RPC_CALL), EBADRPC);
2302 nfsm_chain_get_32(error, &nmreq, val); // RPC Version
2303 nfsm_assert(error, (val == RPC_VER2), ERPCMISMATCH);
2304 nfsm_chain_get_32(error, &nmreq, val); // RPC Program Number
2305 nfsm_assert(error, (val == NFS4_CALLBACK_PROG), EPROGUNAVAIL);
2306 nfsm_chain_get_32(error, &nmreq, val); // NFS Callback Program Version Number
2307 nfsm_assert(error, (val == NFS4_CALLBACK_PROG_VERSION), EPROGMISMATCH);
2308 nfsm_chain_get_32(error, &nmreq, procnum); // NFS Callback Procedure Number
2309 nfsm_assert(error, (procnum <= NFSPROC4_CB_COMPOUND), EPROCUNAVAIL);
2310
2311 /* Handle authentication */
2312 /* XXX just ignore auth for now - handling kerberos may be tricky */
2313 nfsm_chain_get_32(error, &nmreq, auth_type); // RPC Auth Flavor
2314 nfsm_chain_get_32(error, &nmreq, auth_len); // RPC Auth Length
2315 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC);
2316 if (!error && (auth_len > 0))
2317 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len));
2318 nfsm_chain_adv(error, &nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
2319 nfsm_chain_get_32(error, &nmreq, auth_len); // verifier length
2320 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC);
2321 if (!error && (auth_len > 0))
2322 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len));
2323 if (error) {
2324 status = error;
2325 error = 0;
2326 goto nfsmout;
2327 }
2328
2329 switch (procnum) {
2330 case NFSPROC4_CB_NULL:
2331 status = NFSERR_RETVOID;
2332 break;
2333 case NFSPROC4_CB_COMPOUND:
2334 /* tag, minorversion, cb ident, numops, op array */
2335 nfsm_chain_get_32(error, &nmreq, taglen); /* tag length */
2336 nfsm_assert(error, (val <= NFS4_OPAQUE_LIMIT), EBADRPC);
2337
2338 /* start building the body of the response */
2339 nfsm_mbuf_get(error, &mrest, nfsm_rndup(taglen) + 5*NFSX_UNSIGNED);
2340 nfsm_chain_init(&nmrep, mrest);
2341
2342 /* copy tag from request to response */
2343 nfsm_chain_add_32(error, &nmrep, taglen); /* tag length */
2344 for (len = (int)taglen; !error && (len > 0); len -= NFSX_UNSIGNED) {
2345 nfsm_chain_get_32(error, &nmreq, val);
2346 nfsm_chain_add_32(error, &nmrep, val);
2347 }
2348
2349 /* insert number of results placeholder */
2350 numres = 0;
2351 nfsm_chain_add_32(error, &nmrep, numres);
2352 pnumres = (uint32_t*)(nmrep.nmc_ptr - NFSX_UNSIGNED);
2353
2354 nfsm_chain_get_32(error, &nmreq, val); /* minorversion */
2355 nfsm_assert(error, (val == 0), NFSERR_MINOR_VERS_MISMATCH);
2356 nfsm_chain_get_32(error, &nmreq, cbid); /* callback ID */
2357 nfsm_chain_get_32(error, &nmreq, numops); /* number of operations */
2358 if (error) {
2359 if ((error == EBADRPC) || (error == NFSERR_MINOR_VERS_MISMATCH))
2360 status = error;
2361 else if ((error == ENOBUFS) || (error == ENOMEM))
2362 status = NFSERR_RESOURCE;
2363 else
2364 status = NFSERR_SERVERFAULT;
2365 error = 0;
2366 nfsm_chain_null(&nmrep);
2367 goto nfsmout;
2368 }
2369 /* match the callback ID to a registered mount */
2370 lck_mtx_lock(nfs_global_mutex);
2371 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
2372 if (nmp->nm_cbid != cbid)
2373 continue;
2374 /* verify socket's source address matches this mount's server address */
2375 if (!nmp->nm_saddr)
2376 continue;
2377 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0)
2378 break;
2379 }
2380 /* mark the NFS mount as busy */
2381 if (nmp)
2382 nmp->nm_cbrefs++;
2383 lck_mtx_unlock(nfs_global_mutex);
2384 if (!nmp) {
2385 /* if no mount match, just drop socket. */
2386 error = EPERM;
2387 nfsm_chain_null(&nmrep);
2388 goto out;
2389 }
2390
2391 /* process ops, adding results to mrest */
2392 while (numops > 0) {
2393 numops--;
2394 nfsm_chain_get_32(error, &nmreq, op);
2395 if (error)
2396 break;
2397 switch (op) {
2398 case NFS_OP_CB_GETATTR:
2399 // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS)
2400 np = NULL;
2401 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh);
2402 bmlen = NFS_ATTR_BITMAP_LEN;
2403 nfsm_chain_get_bitmap(error, &nmreq, bitmap, bmlen);
2404 if (error) {
2405 status = error;
2406 error = 0;
2407 numops = 0; /* don't process any more ops */
2408 } else {
2409 /* find the node for the file handle */
2410 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
2411 if (error || !np) {
2412 status = NFSERR_BADHANDLE;
2413 error = 0;
2414 np = NULL;
2415 numops = 0; /* don't process any more ops */
2416 }
2417 }
2418 nfsm_chain_add_32(error, &nmrep, op);
2419 nfsm_chain_add_32(error, &nmrep, status);
2420 if (!error && (status == EBADRPC))
2421 error = status;
2422 if (np) {
2423 /* only allow returning size, change, and mtime attrs */
2424 NFS_CLEAR_ATTRIBUTES(&rbitmap);
2425 attrbytes = 0;
2426 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) {
2427 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_CHANGE);
2428 attrbytes += 2 * NFSX_UNSIGNED;
2429 }
2430 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) {
2431 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_SIZE);
2432 attrbytes += 2 * NFSX_UNSIGNED;
2433 }
2434 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) {
2435 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_TIME_MODIFY);
2436 attrbytes += 3 * NFSX_UNSIGNED;
2437 }
2438 nfsm_chain_add_bitmap(error, &nmrep, rbitmap, NFS_ATTR_BITMAP_LEN);
2439 nfsm_chain_add_32(error, &nmrep, attrbytes);
2440 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE))
2441 nfsm_chain_add_64(error, &nmrep,
2442 np->n_vattr.nva_change + ((np->n_flag & NMODIFIED) ? 1 : 0));
2443 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE))
2444 nfsm_chain_add_64(error, &nmrep, np->n_size);
2445 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) {
2446 nfsm_chain_add_64(error, &nmrep, np->n_vattr.nva_timesec[NFSTIME_MODIFY]);
2447 nfsm_chain_add_32(error, &nmrep, np->n_vattr.nva_timensec[NFSTIME_MODIFY]);
2448 }
2449 nfs_node_unlock(np);
2450 vnode_put(NFSTOV(np));
2451 np = NULL;
2452 }
2453 /*
2454 * If we hit an error building the reply, we can't easily back up.
2455 * So we'll just update the status and hope the server ignores the
2456 * extra garbage.
2457 */
2458 break;
2459 case NFS_OP_CB_RECALL:
2460 // (STATEID, TRUNCATE, FH) -> (STATUS)
2461 np = NULL;
2462 nfsm_chain_get_stateid(error, &nmreq, &stateid);
2463 nfsm_chain_get_32(error, &nmreq, truncate);
2464 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh);
2465 if (error) {
2466 status = error;
2467 error = 0;
2468 numops = 0; /* don't process any more ops */
2469 } else {
2470 /* find the node for the file handle */
2471 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
2472 if (error || !np) {
2473 status = NFSERR_BADHANDLE;
2474 error = 0;
2475 np = NULL;
2476 numops = 0; /* don't process any more ops */
2477 } else if (!(np->n_openflags & N_DELEG_MASK) ||
2478 bcmp(&np->n_dstateid, &stateid, sizeof(stateid))) {
2479 /* delegation stateid state doesn't match */
2480 status = NFSERR_BAD_STATEID;
2481 numops = 0; /* don't process any more ops */
2482 }
2483 if (!status) /* add node to recall queue, and wake socket thread */
2484 nfs4_delegation_return_enqueue(np);
2485 if (np) {
2486 nfs_node_unlock(np);
2487 vnode_put(NFSTOV(np));
2488 }
2489 }
2490 nfsm_chain_add_32(error, &nmrep, op);
2491 nfsm_chain_add_32(error, &nmrep, status);
2492 if (!error && (status == EBADRPC))
2493 error = status;
2494 break;
2495 case NFS_OP_CB_ILLEGAL:
2496 default:
2497 nfsm_chain_add_32(error, &nmrep, NFS_OP_CB_ILLEGAL);
2498 status = NFSERR_OP_ILLEGAL;
2499 nfsm_chain_add_32(error, &nmrep, status);
2500 numops = 0; /* don't process any more ops */
2501 break;
2502 }
2503 numres++;
2504 }
2505
2506 if (!status && error) {
2507 if (error == EBADRPC)
2508 status = error;
2509 else if ((error == ENOBUFS) || (error == ENOMEM))
2510 status = NFSERR_RESOURCE;
2511 else
2512 status = NFSERR_SERVERFAULT;
2513 error = 0;
2514 }
2515
2516 /* Now, set the numres field */
2517 *pnumres = txdr_unsigned(numres);
2518 nfsm_chain_build_done(error, &nmrep);
2519 nfsm_chain_null(&nmrep);
2520
2521 /* drop the callback reference on the mount */
2522 lck_mtx_lock(nfs_global_mutex);
2523 nmp->nm_cbrefs--;
2524 if (!nmp->nm_cbid)
2525 wakeup(&nmp->nm_cbrefs);
2526 lck_mtx_unlock(nfs_global_mutex);
2527 break;
2528 }
2529
2530 nfsmout:
2531 if (status == EBADRPC)
2532 OSAddAtomic(1, &nfsstats.rpcinvalid);
2533
2534 /* build reply header */
2535 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mhead);
2536 nfsm_chain_init(&nmrep, mhead);
2537 nfsm_chain_add_32(error, &nmrep, 0); /* insert space for an RPC record mark */
2538 nfsm_chain_add_32(error, &nmrep, xid);
2539 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
2540 if ((status == ERPCMISMATCH) || (status & NFSERR_AUTHERR)) {
2541 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
2542 if (status & NFSERR_AUTHERR) {
2543 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
2544 nfsm_chain_add_32(error, &nmrep, (status & ~NFSERR_AUTHERR));
2545 } else {
2546 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
2547 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2548 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2549 }
2550 } else {
2551 /* reply status */
2552 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
2553 /* XXX RPCAUTH_NULL verifier */
2554 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
2555 nfsm_chain_add_32(error, &nmrep, 0);
2556 /* accepted status */
2557 switch (status) {
2558 case EPROGUNAVAIL:
2559 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
2560 break;
2561 case EPROGMISMATCH:
2562 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
2563 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION);
2564 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION);
2565 break;
2566 case EPROCUNAVAIL:
2567 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
2568 break;
2569 case EBADRPC:
2570 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
2571 break;
2572 default:
2573 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
2574 if (status != NFSERR_RETVOID)
2575 nfsm_chain_add_32(error, &nmrep, status);
2576 break;
2577 }
2578 }
2579 nfsm_chain_build_done(error, &nmrep);
2580 if (error) {
2581 nfsm_chain_null(&nmrep);
2582 goto out;
2583 }
2584 error = mbuf_setnext(nmrep.nmc_mcur, mrest);
2585 if (error) {
2586 printf("nfs cb: mbuf_setnext failed %d\n", error);
2587 goto out;
2588 }
2589 mrest = NULL;
2590 /* Calculate the size of the reply */
2591 replen = 0;
2592 for (m = nmrep.nmc_mhead; m; m = mbuf_next(m))
2593 replen += mbuf_len(m);
2594 mbuf_pkthdr_setlen(mhead, replen);
2595 error = mbuf_pkthdr_setrcvif(mhead, NULL);
2596 nfsm_chain_set_recmark(error, &nmrep, (replen - NFSX_UNSIGNED) | 0x80000000);
2597 nfsm_chain_null(&nmrep);
2598
2599 /* send the reply */
2600 bzero(&msg, sizeof(msg));
2601 error = sock_sendmbuf(so, &msg, mhead, 0, &sentlen);
2602 mhead = NULL;
2603 if (!error && ((int)sentlen != replen))
2604 error = EWOULDBLOCK;
2605 if (error == EWOULDBLOCK) /* inability to send response is considered fatal */
2606 error = ETIMEDOUT;
2607 out:
2608 if (error)
2609 nfsm_chain_cleanup(&nmrep);
2610 if (mhead)
2611 mbuf_freem(mhead);
2612 if (mrest)
2613 mbuf_freem(mrest);
2614 if (mreq)
2615 mbuf_freem(mreq);
2616 return (error);
2617 }
2618
2619
2620 /*
2621 * Initialize an nfs_rpc_record_state structure.
2622 */
2623 void
2624 nfs_rpc_record_state_init(struct nfs_rpc_record_state *nrrsp)
2625 {
2626 bzero(nrrsp, sizeof(*nrrsp));
2627 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft);
2628 }
2629
2630 /*
2631 * Clean up an nfs_rpc_record_state structure.
2632 */
2633 void
2634 nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state *nrrsp)
2635 {
2636 if (nrrsp->nrrs_m) {
2637 mbuf_freem(nrrsp->nrrs_m);
2638 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL;
2639 }
2640 }
2641
2642 /*
2643 * Read the next (marked) RPC record from the socket.
2644 *
2645 * *recvp returns if any data was received.
2646 * *mp returns the next complete RPC record
2647 */
2648 int
2649 nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int flags, int *recvp, mbuf_t *mp)
2650 {
2651 struct iovec aio;
2652 struct msghdr msg;
2653 size_t rcvlen;
2654 int error = 0;
2655 mbuf_t m;
2656
2657 *recvp = 0;
2658 *mp = NULL;
2659
2660 /* read the TCP RPC record marker */
2661 while (!error && nrrsp->nrrs_markerleft) {
2662 aio.iov_base = ((char*)&nrrsp->nrrs_fragleft +
2663 sizeof(nrrsp->nrrs_fragleft) - nrrsp->nrrs_markerleft);
2664 aio.iov_len = nrrsp->nrrs_markerleft;
2665 bzero(&msg, sizeof(msg));
2666 msg.msg_iov = &aio;
2667 msg.msg_iovlen = 1;
2668 error = sock_receive(so, &msg, flags, &rcvlen);
2669 if (error || !rcvlen)
2670 break;
2671 *recvp = 1;
2672 nrrsp->nrrs_markerleft -= rcvlen;
2673 if (nrrsp->nrrs_markerleft)
2674 continue;
2675 /* record marker complete */
2676 nrrsp->nrrs_fragleft = ntohl(nrrsp->nrrs_fragleft);
2677 if (nrrsp->nrrs_fragleft & 0x80000000) {
2678 nrrsp->nrrs_lastfrag = 1;
2679 nrrsp->nrrs_fragleft &= ~0x80000000;
2680 }
2681 nrrsp->nrrs_reclen += nrrsp->nrrs_fragleft;
2682 if (nrrsp->nrrs_reclen > NFS_MAXPACKET) {
2683 /* This is SERIOUS! We are out of sync with the sender. */
2684 log(LOG_ERR, "impossible RPC record length (%d) on callback", nrrsp->nrrs_reclen);
2685 error = EFBIG;
2686 }
2687 }
2688
2689 /* read the TCP RPC record fragment */
2690 while (!error && !nrrsp->nrrs_markerleft && nrrsp->nrrs_fragleft) {
2691 m = NULL;
2692 rcvlen = nrrsp->nrrs_fragleft;
2693 error = sock_receivembuf(so, NULL, &m, flags, &rcvlen);
2694 if (error || !rcvlen || !m)
2695 break;
2696 *recvp = 1;
2697 /* append mbufs to list */
2698 nrrsp->nrrs_fragleft -= rcvlen;
2699 if (!nrrsp->nrrs_m) {
2700 nrrsp->nrrs_m = m;
2701 } else {
2702 error = mbuf_setnext(nrrsp->nrrs_mlast, m);
2703 if (error) {
2704 printf("nfs tcp rcv: mbuf_setnext failed %d\n", error);
2705 mbuf_freem(m);
2706 break;
2707 }
2708 }
2709 while (mbuf_next(m))
2710 m = mbuf_next(m);
2711 nrrsp->nrrs_mlast = m;
2712 }
2713
2714 /* done reading fragment? */
2715 if (!error && !nrrsp->nrrs_markerleft && !nrrsp->nrrs_fragleft) {
2716 /* reset socket fragment parsing state */
2717 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft);
2718 if (nrrsp->nrrs_lastfrag) {
2719 /* RPC record complete */
2720 *mp = nrrsp->nrrs_m;
2721 /* reset socket record parsing state */
2722 nrrsp->nrrs_reclen = 0;
2723 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL;
2724 nrrsp->nrrs_lastfrag = 0;
2725 }
2726 }
2727
2728 return (error);
2729 }
2730
2731
2732
2733 /*
2734 * The NFS client send routine.
2735 *
2736 * Send the given NFS request out the mount's socket.
2737 * Holds nfs_sndlock() for the duration of this call.
2738 *
2739 * - check for request termination (sigintr)
2740 * - wait for reconnect, if necessary
2741 * - UDP: check the congestion window
2742 * - make a copy of the request to send
2743 * - UDP: update the congestion window
2744 * - send the request
2745 *
2746 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
2747 * rexmit count is also updated if this isn't the first send.
2748 *
2749 * If the send is not successful, make sure R_MUSTRESEND is set.
2750 * If this wasn't the first transmit, set R_RESENDERR.
2751 * Also, undo any UDP congestion window changes made.
2752 *
2753 * If the error appears to indicate that the socket should
2754 * be reconnected, mark the socket for reconnection.
2755 *
2756 * Only return errors when the request should be aborted.
2757 */
2758 int
2759 nfs_send(struct nfsreq *req, int wait)
2760 {
2761 struct nfsmount *nmp;
2762 struct nfs_socket *nso;
2763 int error, error2, sotype, rexmit, slpflag = 0, needrecon;
2764 struct msghdr msg;
2765 struct sockaddr *sendnam;
2766 mbuf_t mreqcopy;
2767 size_t sentlen = 0;
2768 struct timespec ts = { 2, 0 };
2769
2770 again:
2771 error = nfs_sndlock(req);
2772 if (error) {
2773 lck_mtx_lock(&req->r_mtx);
2774 req->r_error = error;
2775 req->r_flags &= ~R_SENDING;
2776 lck_mtx_unlock(&req->r_mtx);
2777 return (error);
2778 }
2779
2780 error = nfs_sigintr(req->r_nmp, req, NULL, 0);
2781 if (error) {
2782 nfs_sndunlock(req);
2783 lck_mtx_lock(&req->r_mtx);
2784 req->r_error = error;
2785 req->r_flags &= ~R_SENDING;
2786 lck_mtx_unlock(&req->r_mtx);
2787 return (error);
2788 }
2789 nmp = req->r_nmp;
2790 sotype = nmp->nm_sotype;
2791
2792 /*
2793 * If it's a setup RPC but we're not in SETUP... must need reconnect.
2794 * If it's a recovery RPC but the socket's not ready... must need reconnect.
2795 */
2796 if (((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) ||
2797 ((req->r_flags & R_RECOVER) && !(nmp->nm_sockflags & NMSOCK_READY))) {
2798 error = ETIMEDOUT;
2799 nfs_sndunlock(req);
2800 lck_mtx_lock(&req->r_mtx);
2801 req->r_error = error;
2802 req->r_flags &= ~R_SENDING;
2803 lck_mtx_unlock(&req->r_mtx);
2804 return (error);
2805 }
2806
2807 /* If the socket needs reconnection, do that now. */
2808 /* wait until socket is ready - unless this request is part of setup */
2809 lck_mtx_lock(&nmp->nm_lock);
2810 if (!(nmp->nm_sockflags & NMSOCK_READY) &&
2811 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) {
2812 if (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR))
2813 slpflag |= PCATCH;
2814 lck_mtx_unlock(&nmp->nm_lock);
2815 nfs_sndunlock(req);
2816 if (!wait) {
2817 lck_mtx_lock(&req->r_mtx);
2818 req->r_flags &= ~R_SENDING;
2819 req->r_flags |= R_MUSTRESEND;
2820 req->r_rtt = 0;
2821 lck_mtx_unlock(&req->r_mtx);
2822 return (0);
2823 }
2824 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req->r_xid));
2825 lck_mtx_lock(&req->r_mtx);
2826 req->r_flags &= ~R_MUSTRESEND;
2827 req->r_rtt = 0;
2828 lck_mtx_unlock(&req->r_mtx);
2829 lck_mtx_lock(&nmp->nm_lock);
2830 while (!(nmp->nm_sockflags & NMSOCK_READY)) {
2831 /* don't bother waiting if the socket thread won't be reconnecting it */
2832 if (nmp->nm_state & NFSSTA_FORCE) {
2833 error = EIO;
2834 break;
2835 }
2836 if (NMFLAG(nmp, SOFT) && (nmp->nm_reconnect_start > 0)) {
2837 struct timeval now;
2838 microuptime(&now);
2839 if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) {
2840 /* soft mount in reconnect for a while... terminate ASAP */
2841 OSAddAtomic(1, &nfsstats.rpctimeouts);
2842 req->r_flags |= R_SOFTTERM;
2843 req->r_error = error = ETIMEDOUT;
2844 break;
2845 }
2846 }
2847 /* make sure socket thread is running, then wait */
2848 nfs_mount_sock_thread_wake(nmp);
2849 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
2850 break;
2851 msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait", &ts);
2852 slpflag = 0;
2853 }
2854 lck_mtx_unlock(&nmp->nm_lock);
2855 if (error) {
2856 lck_mtx_lock(&req->r_mtx);
2857 req->r_error = error;
2858 req->r_flags &= ~R_SENDING;
2859 lck_mtx_unlock(&req->r_mtx);
2860 return (error);
2861 }
2862 goto again;
2863 }
2864 nso = nmp->nm_nso;
2865 /* note that we're using the mount's socket to do the send */
2866 nmp->nm_state |= NFSSTA_SENDING; /* will be cleared by nfs_sndunlock() */
2867 lck_mtx_unlock(&nmp->nm_lock);
2868 if (!nso) {
2869 nfs_sndunlock(req);
2870 lck_mtx_lock(&req->r_mtx);
2871 req->r_flags &= ~R_SENDING;
2872 req->r_flags |= R_MUSTRESEND;
2873 req->r_rtt = 0;
2874 lck_mtx_unlock(&req->r_mtx);
2875 return (0);
2876 }
2877
2878 lck_mtx_lock(&req->r_mtx);
2879 rexmit = (req->r_flags & R_SENT);
2880
2881 if (sotype == SOCK_DGRAM) {
2882 lck_mtx_lock(&nmp->nm_lock);
2883 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) {
2884 /* if we can't send this out yet, wait on the cwnd queue */
2885 slpflag = (NMFLAG(nmp, INTR) && req->r_thread) ? PCATCH : 0;
2886 lck_mtx_unlock(&nmp->nm_lock);
2887 nfs_sndunlock(req);
2888 req->r_flags &= ~R_SENDING;
2889 req->r_flags |= R_MUSTRESEND;
2890 lck_mtx_unlock(&req->r_mtx);
2891 if (!wait) {
2892 req->r_rtt = 0;
2893 return (0);
2894 }
2895 lck_mtx_lock(&nmp->nm_lock);
2896 while (nmp->nm_sent >= nmp->nm_cwnd) {
2897 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
2898 break;
2899 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain);
2900 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts);
2901 slpflag = 0;
2902 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) {
2903 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
2904 req->r_cchain.tqe_next = NFSREQNOLIST;
2905 }
2906 }
2907 lck_mtx_unlock(&nmp->nm_lock);
2908 goto again;
2909 }
2910 /*
2911 * We update these *before* the send to avoid racing
2912 * against others who may be looking to send requests.
2913 */
2914 if (!rexmit) {
2915 /* first transmit */
2916 req->r_flags |= R_CWND;
2917 nmp->nm_sent += NFS_CWNDSCALE;
2918 } else {
2919 /*
2920 * When retransmitting, turn timing off
2921 * and divide congestion window by 2.
2922 */
2923 req->r_flags &= ~R_TIMING;
2924 nmp->nm_cwnd >>= 1;
2925 if (nmp->nm_cwnd < NFS_CWNDSCALE)
2926 nmp->nm_cwnd = NFS_CWNDSCALE;
2927 }
2928 lck_mtx_unlock(&nmp->nm_lock);
2929 }
2930
2931 req->r_flags &= ~R_MUSTRESEND;
2932 lck_mtx_unlock(&req->r_mtx);
2933
2934 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL,
2935 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy);
2936 if (error) {
2937 if (wait)
2938 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error);
2939 nfs_sndunlock(req);
2940 lck_mtx_lock(&req->r_mtx);
2941 req->r_flags &= ~R_SENDING;
2942 req->r_flags |= R_MUSTRESEND;
2943 req->r_rtt = 0;
2944 lck_mtx_unlock(&req->r_mtx);
2945 return (0);
2946 }
2947
2948 bzero(&msg, sizeof(msg));
2949 if ((sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so) && ((sendnam = nmp->nm_saddr))) {
2950 msg.msg_name = (caddr_t)sendnam;
2951 msg.msg_namelen = sendnam->sa_len;
2952 }
2953 error = sock_sendmbuf(nso->nso_so, &msg, mreqcopy, 0, &sentlen);
2954 #ifdef NFS_SOCKET_DEBUGGING
2955 if (error || (sentlen != req->r_mreqlen))
2956 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
2957 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error));
2958 #endif
2959 if (!error && (sentlen != req->r_mreqlen))
2960 error = EWOULDBLOCK;
2961 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen));
2962
2963 lck_mtx_lock(&req->r_mtx);
2964 req->r_flags &= ~R_SENDING;
2965 req->r_rtt = 0;
2966 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT))
2967 req->r_rexmit = NFS_MAXREXMIT;
2968
2969 if (!error) {
2970 /* SUCCESS */
2971 req->r_flags &= ~R_RESENDERR;
2972 if (rexmit)
2973 OSAddAtomic(1, &nfsstats.rpcretries);
2974 req->r_flags |= R_SENT;
2975 if (req->r_flags & R_WAITSENT) {
2976 req->r_flags &= ~R_WAITSENT;
2977 wakeup(req);
2978 }
2979 nfs_sndunlock(req);
2980 lck_mtx_unlock(&req->r_mtx);
2981 return (0);
2982 }
2983
2984 /* send failed */
2985 req->r_flags |= R_MUSTRESEND;
2986 if (rexmit)
2987 req->r_flags |= R_RESENDERR;
2988 if ((error == EINTR) || (error == ERESTART))
2989 req->r_error = error;
2990 lck_mtx_unlock(&req->r_mtx);
2991
2992 if (sotype == SOCK_DGRAM) {
2993 /*
2994 * Note: even though a first send may fail, we consider
2995 * the request sent for congestion window purposes.
2996 * So we don't need to undo any of the changes made above.
2997 */
2998 /*
2999 * Socket errors ignored for connectionless sockets??
3000 * For now, ignore them all
3001 */
3002 if ((error != EINTR) && (error != ERESTART) &&
3003 (error != EWOULDBLOCK) && (error != EIO) && (nso == nmp->nm_nso)) {
3004 int clearerror = 0, optlen = sizeof(clearerror);
3005 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
3006 #ifdef NFS_SOCKET_DEBUGGING
3007 if (clearerror)
3008 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
3009 error, clearerror));
3010 #endif
3011 }
3012 }
3013
3014 /* check if it appears we should reconnect the socket */
3015 switch (error) {
3016 case EWOULDBLOCK:
3017 /* if send timed out, reconnect if on TCP */
3018 if (sotype != SOCK_STREAM)
3019 break;
3020 case EPIPE:
3021 case EADDRNOTAVAIL:
3022 case ENETDOWN:
3023 case ENETUNREACH:
3024 case ENETRESET:
3025 case ECONNABORTED:
3026 case ECONNRESET:
3027 case ENOTCONN:
3028 case ESHUTDOWN:
3029 case ECONNREFUSED:
3030 case EHOSTDOWN:
3031 case EHOSTUNREACH:
3032 needrecon = 1;
3033 break;
3034 }
3035 if (needrecon && (nso == nmp->nm_nso)) { /* mark socket as needing reconnect */
3036 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error));
3037 nfs_need_reconnect(nmp);
3038 }
3039
3040 nfs_sndunlock(req);
3041
3042 /*
3043 * Don't log some errors:
3044 * EPIPE errors may be common with servers that drop idle connections.
3045 * EADDRNOTAVAIL may occur on network transitions.
3046 * ENOTCONN may occur under some network conditions.
3047 */
3048 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN))
3049 error = 0;
3050 if (error && (error != EINTR) && (error != ERESTART))
3051 log(LOG_INFO, "nfs send error %d for server %s\n", error,
3052 !req->r_nmp ? "<unmounted>" :
3053 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
3054
3055 /* prefer request termination error over other errors */
3056 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
3057 if (error2)
3058 error = error2;
3059
3060 /* only allow the following errors to be returned */
3061 if ((error != EINTR) && (error != ERESTART) && (error != EIO) &&
3062 (error != ENXIO) && (error != ETIMEDOUT))
3063 error = 0;
3064 return (error);
3065 }
3066
3067 /*
3068 * NFS client socket upcalls
3069 *
3070 * Pull RPC replies out of an NFS mount's socket and match them
3071 * up with the pending request.
3072 *
3073 * The datagram code is simple because we always get whole
3074 * messages out of the socket.
3075 *
3076 * The stream code is more involved because we have to parse
3077 * the RPC records out of the stream.
3078 */
3079
3080 /* NFS client UDP socket upcall */
3081 void
3082 nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag)
3083 {
3084 struct nfsmount *nmp = arg;
3085 struct nfs_socket *nso = nmp->nm_nso;
3086 size_t rcvlen;
3087 mbuf_t m;
3088 int error = 0;
3089
3090 if (nmp->nm_sockflags & NMSOCK_CONNECTING)
3091 return;
3092
3093 do {
3094 /* make sure we're on the current socket */
3095 if (!nso || (nso->nso_so != so))
3096 return;
3097
3098 m = NULL;
3099 rcvlen = 1000000;
3100 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
3101 if (m)
3102 nfs_request_match_reply(nmp, m);
3103 } while (m && !error);
3104
3105 if (error && (error != EWOULDBLOCK)) {
3106 /* problems with the socket... mark for reconnection */
3107 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error));
3108 nfs_need_reconnect(nmp);
3109 }
3110 }
3111
3112 /* NFS client TCP socket upcall */
3113 void
3114 nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag)
3115 {
3116 struct nfsmount *nmp = arg;
3117 struct nfs_socket *nso = nmp->nm_nso;
3118 struct nfs_rpc_record_state nrrs;
3119 mbuf_t m;
3120 int error = 0;
3121 int recv = 1;
3122
3123 if (nmp->nm_sockflags & NMSOCK_CONNECTING)
3124 return;
3125
3126 /* make sure we're on the current socket */
3127 lck_mtx_lock(&nmp->nm_lock);
3128 nso = nmp->nm_nso;
3129 if (!nso || (nso->nso_so != so) || (nmp->nm_sockflags & (NMSOCK_DISCONNECTING))) {
3130 lck_mtx_unlock(&nmp->nm_lock);
3131 return;
3132 }
3133 lck_mtx_unlock(&nmp->nm_lock);
3134
3135 /* make sure this upcall should be trying to do work */
3136 lck_mtx_lock(&nso->nso_lock);
3137 if (nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) {
3138 lck_mtx_unlock(&nso->nso_lock);
3139 return;
3140 }
3141 nso->nso_flags |= NSO_UPCALL;
3142 nrrs = nso->nso_rrs;
3143 lck_mtx_unlock(&nso->nso_lock);
3144
3145 /* loop while we make error-free progress */
3146 while (!error && recv) {
3147 error = nfs_rpc_record_read(so, &nrrs, MSG_DONTWAIT, &recv, &m);
3148 if (m) /* match completed response with request */
3149 nfs_request_match_reply(nmp, m);
3150 }
3151
3152 lck_mtx_lock(&nmp->nm_lock);
3153 if (nmp->nm_nso == nso) {
3154 /* still the same socket, so update socket's RPC parsing state */
3155 lck_mtx_unlock(&nmp->nm_lock);
3156 lck_mtx_lock(&nso->nso_lock);
3157 nso->nso_rrs = nrrs;
3158 nso->nso_flags &= ~NSO_UPCALL;
3159 lck_mtx_unlock(&nso->nso_lock);
3160 if (nmp->nm_sockflags & NMSOCK_DISCONNECTING)
3161 wakeup(&nmp->nm_sockflags);
3162 } else {
3163 lck_mtx_unlock(&nmp->nm_lock);
3164 }
3165 #ifdef NFS_SOCKET_DEBUGGING
3166 if (!recv && (error != EWOULDBLOCK))
3167 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error));
3168 #endif
3169 /* note: no error and no data indicates server closed its end */
3170 if ((error != EWOULDBLOCK) && (error || !recv)) {
3171 /* problems with the socket... mark for reconnection */
3172 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error));
3173 nfs_need_reconnect(nmp);
3174 }
3175 }
3176
3177 /*
3178 * "poke" a socket to try to provoke any pending errors
3179 */
3180 void
3181 nfs_sock_poke(struct nfsmount *nmp)
3182 {
3183 struct iovec aio;
3184 struct msghdr msg;
3185 size_t len;
3186 int error = 0;
3187 int dummy;
3188
3189 lck_mtx_lock(&nmp->nm_lock);
3190 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) ||
3191 !(nmp->nm_sockflags & NMSOCK_READY) || !nmp->nm_nso || !nmp->nm_nso->nso_so) {
3192 lck_mtx_unlock(&nmp->nm_lock);
3193 return;
3194 }
3195 lck_mtx_unlock(&nmp->nm_lock);
3196 aio.iov_base = &dummy;
3197 aio.iov_len = 0;
3198 len = 0;
3199 bzero(&msg, sizeof(msg));
3200 msg.msg_iov = &aio;
3201 msg.msg_iovlen = 1;
3202 error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len);
3203 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error));
3204 }
3205
3206 /*
3207 * Match an RPC reply with the corresponding request
3208 */
3209 void
3210 nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
3211 {
3212 struct nfsreq *req;
3213 struct nfsm_chain nmrep;
3214 u_int32_t reply = 0, rxid = 0;
3215 int error = 0, asyncioq, t1;
3216
3217 /* Get the xid and check that it is an rpc reply */
3218 nfsm_chain_dissect_init(error, &nmrep, mrep);
3219 nfsm_chain_get_32(error, &nmrep, rxid);
3220 nfsm_chain_get_32(error, &nmrep, reply);
3221 if (error || (reply != RPC_REPLY)) {
3222 OSAddAtomic(1, &nfsstats.rpcinvalid);
3223 mbuf_freem(mrep);
3224 return;
3225 }
3226
3227 /*
3228 * Loop through the request list to match up the reply
3229 * Iff no match, just drop it.
3230 */
3231 lck_mtx_lock(nfs_request_mutex);
3232 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
3233 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid)))
3234 continue;
3235 /* looks like we have it, grab lock and double check */
3236 lck_mtx_lock(&req->r_mtx);
3237 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
3238 lck_mtx_unlock(&req->r_mtx);
3239 continue;
3240 }
3241 /* Found it.. */
3242 req->r_nmrep = nmrep;
3243 lck_mtx_lock(&nmp->nm_lock);
3244 if (nmp->nm_sotype == SOCK_DGRAM) {
3245 /*
3246 * Update congestion window.
3247 * Do the additive increase of one rpc/rtt.
3248 */
3249 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
3250 if (nmp->nm_cwnd <= nmp->nm_sent) {
3251 nmp->nm_cwnd +=
3252 ((NFS_CWNDSCALE * NFS_CWNDSCALE) +
3253 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
3254 if (nmp->nm_cwnd > NFS_MAXCWND)
3255 nmp->nm_cwnd = NFS_MAXCWND;
3256 }
3257 if (req->r_flags & R_CWND) {
3258 nmp->nm_sent -= NFS_CWNDSCALE;
3259 req->r_flags &= ~R_CWND;
3260 }
3261 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
3262 /* congestion window is open, poke the cwnd queue */
3263 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
3264 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
3265 req2->r_cchain.tqe_next = NFSREQNOLIST;
3266 wakeup(req2);
3267 }
3268 }
3269 /*
3270 * Update rtt using a gain of 0.125 on the mean
3271 * and a gain of 0.25 on the deviation.
3272 */
3273 if (req->r_flags & R_TIMING) {
3274 /*
3275 * Since the timer resolution of
3276 * NFS_HZ is so course, it can often
3277 * result in r_rtt == 0. Since
3278 * r_rtt == N means that the actual
3279 * rtt is between N+dt and N+2-dt ticks,
3280 * add 1.
3281 */
3282 if (proct[req->r_procnum] == 0)
3283 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum);
3284 t1 = req->r_rtt + 1;
3285 t1 -= (NFS_SRTT(req) >> 3);
3286 NFS_SRTT(req) += t1;
3287 if (t1 < 0)
3288 t1 = -t1;
3289 t1 -= (NFS_SDRTT(req) >> 2);
3290 NFS_SDRTT(req) += t1;
3291 }
3292 nmp->nm_timeouts = 0;
3293 lck_mtx_unlock(&nmp->nm_lock);
3294 /* signal anyone waiting on this request */
3295 wakeup(req);
3296 asyncioq = (req->r_callback.rcb_func != NULL);
3297 if (nfs_request_using_gss(req))
3298 nfs_gss_clnt_rpcdone(req);
3299 lck_mtx_unlock(&req->r_mtx);
3300 lck_mtx_unlock(nfs_request_mutex);
3301 /* if it's an async RPC with a callback, queue it up */
3302 if (asyncioq)
3303 nfs_asyncio_finish(req);
3304 break;
3305 }
3306
3307 if (!req) {
3308 /* not matched to a request, so drop it. */
3309 lck_mtx_unlock(nfs_request_mutex);
3310 OSAddAtomic(1, &nfsstats.rpcunexpected);
3311 mbuf_freem(mrep);
3312 }
3313 }
3314
3315 /*
3316 * Wait for the reply for a given request...
3317 * ...potentially resending the request if necessary.
3318 */
3319 int
3320 nfs_wait_reply(struct nfsreq *req)
3321 {
3322 struct timespec ts = { 2, 0 };
3323 int error = 0, slpflag, first = 1;
3324
3325 if (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR))
3326 slpflag = PCATCH;
3327 else
3328 slpflag = 0;
3329
3330 lck_mtx_lock(&req->r_mtx);
3331 while (!req->r_nmrep.nmc_mhead) {
3332 if ((error = nfs_sigintr(req->r_nmp, req, first ? NULL : req->r_thread, 0)))
3333 break;
3334 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
3335 break;
3336 /* check if we need to resend */
3337 if (req->r_flags & R_MUSTRESEND) {
3338 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
3339 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
3340 req->r_flags |= R_SENDING;
3341 lck_mtx_unlock(&req->r_mtx);
3342 if (nfs_request_using_gss(req)) {
3343 /*
3344 * It's an RPCSEC_GSS request.
3345 * Can't just resend the original request
3346 * without bumping the cred sequence number.
3347 * Go back and re-build the request.
3348 */
3349 lck_mtx_lock(&req->r_mtx);
3350 req->r_flags &= ~R_SENDING;
3351 lck_mtx_unlock(&req->r_mtx);
3352 return (EAGAIN);
3353 }
3354 error = nfs_send(req, 1);
3355 lck_mtx_lock(&req->r_mtx);
3356 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
3357 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error));
3358 if (error)
3359 break;
3360 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
3361 break;
3362 }
3363 /* need to poll if we're P_NOREMOTEHANG */
3364 if (nfs_noremotehang(req->r_thread))
3365 ts.tv_sec = 1;
3366 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts);
3367 first = slpflag = 0;
3368 }
3369 lck_mtx_unlock(&req->r_mtx);
3370
3371 return (error);
3372 }
3373
3374 /*
3375 * An NFS request goes something like this:
3376 * (nb: always frees up mreq mbuf list)
3377 * nfs_request_create()
3378 * - allocates a request struct if one is not provided
3379 * - initial fill-in of the request struct
3380 * nfs_request_add_header()
3381 * - add the RPC header
3382 * nfs_request_send()
3383 * - link it into list
3384 * - call nfs_send() for first transmit
3385 * nfs_request_wait()
3386 * - call nfs_wait_reply() to wait for the reply
3387 * nfs_request_finish()
3388 * - break down rpc header and return with error or nfs reply
3389 * pointed to by nmrep.
3390 * nfs_request_rele()
3391 * nfs_request_destroy()
3392 * - clean up the request struct
3393 * - free the request struct if it was allocated by nfs_request_create()
3394 */
3395
3396 /*
3397 * Set up an NFS request struct (allocating if no request passed in).
3398 */
3399 int
3400 nfs_request_create(
3401 nfsnode_t np,
3402 mount_t mp, /* used only if !np */
3403 struct nfsm_chain *nmrest,
3404 int procnum,
3405 thread_t thd,
3406 kauth_cred_t cred,
3407 struct nfsreq **reqp)
3408 {
3409 struct nfsreq *req, *newreq = NULL;
3410 struct nfsmount *nmp;
3411
3412 req = *reqp;
3413 if (!req) {
3414 /* allocate a new NFS request structure */
3415 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK);
3416 if (!newreq) {
3417 mbuf_freem(nmrest->nmc_mhead);
3418 nmrest->nmc_mhead = NULL;
3419 return (ENOMEM);
3420 }
3421 req = newreq;
3422 }
3423
3424 bzero(req, sizeof(*req));
3425 if (req == newreq)
3426 req->r_flags = R_ALLOCATED;
3427
3428 nmp = VFSTONFS(np ? NFSTOMP(np) : mp);
3429 if (!nmp) {
3430 if (newreq)
3431 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
3432 return (ENXIO);
3433 }
3434 lck_mtx_lock(&nmp->nm_lock);
3435 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
3436 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
3437 lck_mtx_unlock(&nmp->nm_lock);
3438 mbuf_freem(nmrest->nmc_mhead);
3439 nmrest->nmc_mhead = NULL;
3440 if (newreq)
3441 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
3442 return (ENXIO);
3443 }
3444
3445 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS))
3446 OSAddAtomic(1, &nfsstats.rpccnt[procnum]);
3447 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL))
3448 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
3449
3450 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL);
3451 req->r_nmp = nmp;
3452 req->r_np = np;
3453 req->r_thread = thd;
3454 if (!thd)
3455 req->r_flags |= R_NOINTR;
3456 if (IS_VALID_CRED(cred)) {
3457 kauth_cred_ref(cred);
3458 req->r_cred = cred;
3459 }
3460 req->r_procnum = procnum;
3461 if (proct[procnum] > 0)
3462 req->r_flags |= R_TIMING;
3463 req->r_nmrep.nmc_mhead = NULL;
3464 SLIST_INIT(&req->r_gss_seqlist);
3465 req->r_achain.tqe_next = NFSREQNOLIST;
3466 req->r_rchain.tqe_next = NFSREQNOLIST;
3467 req->r_cchain.tqe_next = NFSREQNOLIST;
3468
3469 /* set auth flavor to use for request */
3470 if (!req->r_cred)
3471 req->r_auth = RPCAUTH_NONE;
3472 else if (req->r_np && (req->r_np->n_auth != RPCAUTH_INVALID))
3473 req->r_auth = req->r_np->n_auth;
3474 else
3475 req->r_auth = nmp->nm_auth;
3476
3477 lck_mtx_unlock(&nmp->nm_lock);
3478
3479 /* move the request mbuf chain to the nfsreq */
3480 req->r_mrest = nmrest->nmc_mhead;
3481 nmrest->nmc_mhead = NULL;
3482
3483 req->r_flags |= R_INITTED;
3484 req->r_refs = 1;
3485 if (newreq)
3486 *reqp = req;
3487 return (0);
3488 }
3489
3490 /*
3491 * Clean up and free an NFS request structure.
3492 */
3493 void
3494 nfs_request_destroy(struct nfsreq *req)
3495 {
3496 struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3497 struct gss_seq *gsp, *ngsp;
3498 struct timespec ts = { 1, 0 };
3499 int clearjbtimeo = 0;
3500
3501 if (!req || !(req->r_flags & R_INITTED))
3502 return;
3503 req->r_flags &= ~R_INITTED;
3504 if (req->r_lflags & RL_QUEUED)
3505 nfs_reqdequeue(req);
3506 if (req->r_achain.tqe_next != NFSREQNOLIST) {
3507 /* still on an async I/O queue? */
3508 lck_mtx_lock(nfsiod_mutex);
3509 if (nmp && (req->r_achain.tqe_next != NFSREQNOLIST)) {
3510 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
3511 req->r_achain.tqe_next = NFSREQNOLIST;
3512 }
3513 lck_mtx_unlock(nfsiod_mutex);
3514 }
3515 lck_mtx_lock(&req->r_mtx);
3516 if (nmp) {
3517 lck_mtx_lock(&nmp->nm_lock);
3518 if (req->r_flags & R_CWND) {
3519 /* Decrement the outstanding request count. */
3520 req->r_flags &= ~R_CWND;
3521 nmp->nm_sent -= NFS_CWNDSCALE;
3522 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
3523 /* congestion window is open, poke the cwnd queue */
3524 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
3525 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
3526 req2->r_cchain.tqe_next = NFSREQNOLIST;
3527 wakeup(req2);
3528 }
3529 }
3530 if (req->r_rchain.tqe_next != NFSREQNOLIST) {
3531 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
3532 req->r_rchain.tqe_next = NFSREQNOLIST;
3533 if (req->r_flags & R_RESENDQ)
3534 req->r_flags &= ~R_RESENDQ;
3535 }
3536 if (req->r_cchain.tqe_next != NFSREQNOLIST) {
3537 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
3538 req->r_cchain.tqe_next = NFSREQNOLIST;
3539 }
3540 if (req->r_flags & R_JBTPRINTFMSG) {
3541 req->r_flags &= ~R_JBTPRINTFMSG;
3542 nmp->nm_jbreqs--;
3543 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
3544 }
3545 lck_mtx_unlock(&nmp->nm_lock);
3546 }
3547 while (req->r_flags & R_RESENDQ)
3548 msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts);
3549 lck_mtx_unlock(&req->r_mtx);
3550 if (clearjbtimeo)
3551 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL);
3552 if (req->r_mhead)
3553 mbuf_freem(req->r_mhead);
3554 else if (req->r_mrest)
3555 mbuf_freem(req->r_mrest);
3556 if (req->r_nmrep.nmc_mhead)
3557 mbuf_freem(req->r_nmrep.nmc_mhead);
3558 if (IS_VALID_CRED(req->r_cred))
3559 kauth_cred_unref(&req->r_cred);
3560 if (nfs_request_using_gss(req))
3561 nfs_gss_clnt_rpcdone(req);
3562 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp)
3563 FREE(gsp, M_TEMP);
3564 if (req->r_gss_ctx)
3565 nfs_gss_clnt_ctx_unref(req);
3566 if (req->r_wrongsec)
3567 FREE(req->r_wrongsec, M_TEMP);
3568
3569 lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
3570 if (req->r_flags & R_ALLOCATED)
3571 FREE_ZONE(req, sizeof(*req), M_NFSREQ);
3572 }
3573
3574 void
3575 nfs_request_ref(struct nfsreq *req, int locked)
3576 {
3577 if (!locked)
3578 lck_mtx_lock(&req->r_mtx);
3579 if (req->r_refs <= 0)
3580 panic("nfsreq reference error");
3581 req->r_refs++;
3582 if (!locked)
3583 lck_mtx_unlock(&req->r_mtx);
3584 }
3585
3586 void
3587 nfs_request_rele(struct nfsreq *req)
3588 {
3589 int destroy;
3590
3591 lck_mtx_lock(&req->r_mtx);
3592 if (req->r_refs <= 0)
3593 panic("nfsreq reference underflow");
3594 req->r_refs--;
3595 destroy = (req->r_refs == 0);
3596 lck_mtx_unlock(&req->r_mtx);
3597 if (destroy)
3598 nfs_request_destroy(req);
3599 }
3600
3601
3602 /*
3603 * Add an (updated) RPC header with authorization to an NFS request.
3604 */
3605 int
3606 nfs_request_add_header(struct nfsreq *req)
3607 {
3608 struct nfsmount *nmp;
3609 int error = 0;
3610 mbuf_t m;
3611
3612 /* free up any previous header */
3613 if ((m = req->r_mhead)) {
3614 while (m && (m != req->r_mrest))
3615 m = mbuf_free(m);
3616 req->r_mhead = NULL;
3617 }
3618
3619 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3620 if (!nmp)
3621 return (ENXIO);
3622
3623 error = nfsm_rpchead(req, req->r_mrest, &req->r_xid, &req->r_mhead);
3624 if (error)
3625 return (error);
3626
3627 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead);
3628 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3629 if (!nmp)
3630 return (ENXIO);
3631 lck_mtx_lock(&nmp->nm_lock);
3632 if (NMFLAG(nmp, SOFT))
3633 req->r_retry = nmp->nm_retry;
3634 else
3635 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
3636 lck_mtx_unlock(&nmp->nm_lock);
3637
3638 return (error);
3639 }
3640
3641
3642 /*
3643 * Queue an NFS request up and send it out.
3644 */
3645 int
3646 nfs_request_send(struct nfsreq *req, int wait)
3647 {
3648 struct nfsmount *nmp;
3649 struct timeval now;
3650
3651 lck_mtx_lock(&req->r_mtx);
3652 req->r_flags |= R_SENDING;
3653 lck_mtx_unlock(&req->r_mtx);
3654
3655 lck_mtx_lock(nfs_request_mutex);
3656
3657 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3658 if (!nmp) {
3659 lck_mtx_unlock(nfs_request_mutex);
3660 return (ENXIO);
3661 }
3662
3663 microuptime(&now);
3664 if (!req->r_start) {
3665 req->r_start = now.tv_sec;
3666 req->r_lastmsg = now.tv_sec -
3667 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
3668 }
3669
3670 OSAddAtomic(1, &nfsstats.rpcrequests);
3671
3672 /*
3673 * Chain request into list of outstanding requests. Be sure
3674 * to put it LAST so timer finds oldest requests first.
3675 * Make sure that the request queue timer is running
3676 * to check for possible request timeout.
3677 */
3678 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain);
3679 req->r_lflags |= RL_QUEUED;
3680 if (!nfs_request_timer_on) {
3681 nfs_request_timer_on = 1;
3682 nfs_interval_timer_start(nfs_request_timer_call,
3683 NFS_REQUESTDELAY);
3684 }
3685 lck_mtx_unlock(nfs_request_mutex);
3686
3687 /* Send the request... */
3688 return (nfs_send(req, wait));
3689 }
3690
3691 /*
3692 * Call nfs_wait_reply() to wait for the reply.
3693 */
3694 void
3695 nfs_request_wait(struct nfsreq *req)
3696 {
3697 req->r_error = nfs_wait_reply(req);
3698 }
3699
3700 /*
3701 * Finish up an NFS request by dequeueing it and
3702 * doing the initial NFS request reply processing.
3703 */
3704 int
3705 nfs_request_finish(
3706 struct nfsreq *req,
3707 struct nfsm_chain *nmrepp,
3708 int *status)
3709 {
3710 struct nfsmount *nmp;
3711 mbuf_t mrep;
3712 int verf_type = 0;
3713 uint32_t verf_len = 0;
3714 uint32_t reply_status = 0;
3715 uint32_t rejected_status = 0;
3716 uint32_t auth_status = 0;
3717 uint32_t accepted_status = 0;
3718 struct nfsm_chain nmrep;
3719 int error, clearjbtimeo;
3720
3721 error = req->r_error;
3722
3723 if (nmrepp)
3724 nmrepp->nmc_mhead = NULL;
3725
3726 /* RPC done, unlink the request. */
3727 nfs_reqdequeue(req);
3728
3729 mrep = req->r_nmrep.nmc_mhead;
3730
3731 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
3732
3733 if ((req->r_flags & R_CWND) && nmp) {
3734 /*
3735 * Decrement the outstanding request count.
3736 */
3737 req->r_flags &= ~R_CWND;
3738 lck_mtx_lock(&nmp->nm_lock);
3739 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
3740 nmp->nm_sent -= NFS_CWNDSCALE;
3741 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
3742 /* congestion window is open, poke the cwnd queue */
3743 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
3744 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
3745 req2->r_cchain.tqe_next = NFSREQNOLIST;
3746 wakeup(req2);
3747 }
3748 lck_mtx_unlock(&nmp->nm_lock);
3749 }
3750
3751 if (nfs_request_using_gss(req)) {
3752 /*
3753 * If the request used an RPCSEC_GSS credential
3754 * then reset its sequence number bit in the
3755 * request window.
3756 */
3757 nfs_gss_clnt_rpcdone(req);
3758
3759 /*
3760 * If we need to re-send, go back and re-build the
3761 * request based on a new sequence number.
3762 * Note that we're using the original XID.
3763 */
3764 if (error == EAGAIN) {
3765 req->r_error = 0;
3766 if (mrep)
3767 mbuf_freem(mrep);
3768 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs
3769 req->r_nmrep.nmc_mhead = NULL;
3770 req->r_flags |= R_RESTART;
3771 if (error == ENEEDAUTH) {
3772 req->r_xid = 0; // get a new XID
3773 error = 0;
3774 }
3775 goto nfsmout;
3776 }
3777 }
3778
3779 /*
3780 * If there was a successful reply, make sure to mark the mount as up.
3781 * If a tprintf message was given (or if this is a timed-out soft mount)
3782 * then post a tprintf message indicating the server is alive again.
3783 */
3784 if (!error) {
3785 if ((req->r_flags & R_TPRINTFMSG) ||
3786 (nmp && NMFLAG(nmp, SOFT) &&
3787 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO)))
3788 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again");
3789 else
3790 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL);
3791 }
3792 if (!error && !nmp)
3793 error = ENXIO;
3794 nfsmout_if(error);
3795
3796 /*
3797 * break down the RPC header and check if ok
3798 */
3799 nmrep = req->r_nmrep;
3800 nfsm_chain_get_32(error, &nmrep, reply_status);
3801 nfsmout_if(error);
3802 if (reply_status == RPC_MSGDENIED) {
3803 nfsm_chain_get_32(error, &nmrep, rejected_status);
3804 nfsmout_if(error);
3805 if (rejected_status == RPC_MISMATCH) {
3806 error = ENOTSUP;
3807 goto nfsmout;
3808 }
3809 nfsm_chain_get_32(error, &nmrep, auth_status);
3810 nfsmout_if(error);
3811 switch (auth_status) {
3812 case RPCSEC_GSS_CREDPROBLEM:
3813 case RPCSEC_GSS_CTXPROBLEM:
3814 /*
3815 * An RPCSEC_GSS cred or context problem.
3816 * We can't use it anymore.
3817 * Restore the args, renew the context
3818 * and set up for a resend.
3819 */
3820 error = nfs_gss_clnt_args_restore(req);
3821 if (error && error != ENEEDAUTH)
3822 break;
3823
3824 if (!error) {
3825 error = nfs_gss_clnt_ctx_renew(req);
3826 if (error)
3827 break;
3828 }
3829 mbuf_freem(mrep);
3830 req->r_nmrep.nmc_mhead = NULL;
3831 req->r_xid = 0; // get a new XID
3832 req->r_flags |= R_RESTART;
3833 goto nfsmout;
3834 default:
3835 error = EACCES;
3836 break;
3837 }
3838 goto nfsmout;
3839 }
3840
3841 /* Now check the verifier */
3842 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor
3843 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length
3844 nfsmout_if(error);
3845
3846 switch (req->r_auth) {
3847 case RPCAUTH_NONE:
3848 case RPCAUTH_SYS:
3849 /* Any AUTH_SYS verifier is ignored */
3850 if (verf_len > 0)
3851 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
3852 nfsm_chain_get_32(error, &nmrep, accepted_status);
3853 break;
3854 case RPCAUTH_KRB5:
3855 case RPCAUTH_KRB5I:
3856 case RPCAUTH_KRB5P:
3857 error = nfs_gss_clnt_verf_get(req, &nmrep,
3858 verf_type, verf_len, &accepted_status);
3859 break;
3860 }
3861 nfsmout_if(error);
3862
3863 switch (accepted_status) {
3864 case RPC_SUCCESS:
3865 if (req->r_procnum == NFSPROC_NULL) {
3866 /*
3867 * The NFS null procedure is unique,
3868 * in not returning an NFS status.
3869 */
3870 *status = NFS_OK;
3871 } else {
3872 nfsm_chain_get_32(error, &nmrep, *status);
3873 nfsmout_if(error);
3874 }
3875
3876 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) {
3877 /*
3878 * It's a JUKEBOX error - delay and try again
3879 */
3880 int delay, slpflag = (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
3881
3882 mbuf_freem(mrep);
3883 req->r_nmrep.nmc_mhead = NULL;
3884 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) {
3885 /* we're not yet completely mounted and */
3886 /* we can't complete an RPC, so we fail */
3887 OSAddAtomic(1, &nfsstats.rpctimeouts);
3888 nfs_softterm(req);
3889 error = req->r_error;
3890 goto nfsmout;
3891 }
3892 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2);
3893 if (req->r_delay > 30)
3894 req->r_delay = 30;
3895 if (nmp->nm_tprintf_initial_delay && (req->r_delay >= nmp->nm_tprintf_initial_delay)) {
3896 if (!(req->r_flags & R_JBTPRINTFMSG)) {
3897 req->r_flags |= R_JBTPRINTFMSG;
3898 lck_mtx_lock(&nmp->nm_lock);
3899 nmp->nm_jbreqs++;
3900 lck_mtx_unlock(&nmp->nm_lock);
3901 }
3902 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO,
3903 "resource temporarily unavailable (jukebox)");
3904 }
3905 if (NMFLAG(nmp, SOFT) && (req->r_delay == 30) && !(req->r_flags & R_NOINTR)) {
3906 /* for soft mounts, just give up after a short while */
3907 OSAddAtomic(1, &nfsstats.rpctimeouts);
3908 nfs_softterm(req);
3909 error = req->r_error;
3910 goto nfsmout;
3911 }
3912 delay = req->r_delay;
3913 if (req->r_callback.rcb_func) {
3914 struct timeval now;
3915 microuptime(&now);
3916 req->r_resendtime = now.tv_sec + delay;
3917 } else {
3918 do {
3919 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
3920 goto nfsmout;
3921 tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0);
3922 slpflag = 0;
3923 } while (--delay > 0);
3924 }
3925 req->r_xid = 0; // get a new XID
3926 req->r_flags |= R_RESTART;
3927 req->r_start = 0;
3928 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER);
3929 return (0);
3930 }
3931
3932 if (req->r_flags & R_JBTPRINTFMSG) {
3933 req->r_flags &= ~R_JBTPRINTFMSG;
3934 lck_mtx_lock(&nmp->nm_lock);
3935 nmp->nm_jbreqs--;
3936 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
3937 lck_mtx_unlock(&nmp->nm_lock);
3938 nfs_up(nmp, req->r_thread, clearjbtimeo, "resource available again");
3939 }
3940
3941 if ((nmp->nm_vers >= NFS_VER4) && (*status == NFSERR_WRONGSEC)) {
3942 /*
3943 * Hmmm... we need to try a different security flavor.
3944 * The first time a request hits this, we will allocate an array
3945 * to track flavors to try. We fill the array with the mount's
3946 * preferred flavors or the server's preferred flavors or just the
3947 * flavors we support.
3948 */
3949 uint32_t srvflavors[NX_MAX_SEC_FLAVORS];
3950 int srvcount, i, j;
3951
3952 /* Call SECINFO to try to get list of flavors from server. */
3953 srvcount = NX_MAX_SEC_FLAVORS;
3954 nfs4_secinfo_rpc(nmp, &req->r_secinfo, req->r_cred, srvflavors, &srvcount);
3955
3956 if (!req->r_wrongsec) {
3957 /* first time... set up flavor array */
3958 MALLOC(req->r_wrongsec, uint32_t*, NX_MAX_SEC_FLAVORS*sizeof(uint32_t), M_TEMP, M_WAITOK);
3959 if (!req->r_wrongsec) {
3960 error = EACCES;
3961 goto nfsmout;
3962 }
3963 i=0;
3964 if (nmp->nm_sec.count) { /* use the mount's preferred list of flavors */
3965 for(; i < nmp->nm_sec.count; i++)
3966 req->r_wrongsec[i] = nmp->nm_sec.flavors[i];
3967 } else if (srvcount) { /* otherwise use the server's list of flavors */
3968 for(; i < srvcount; i++)
3969 req->r_wrongsec[i] = srvflavors[i];
3970 } else { /* otherwise, just try the flavors we support. */
3971 req->r_wrongsec[i++] = RPCAUTH_KRB5P;
3972 req->r_wrongsec[i++] = RPCAUTH_KRB5I;
3973 req->r_wrongsec[i++] = RPCAUTH_KRB5;
3974 req->r_wrongsec[i++] = RPCAUTH_SYS;
3975 req->r_wrongsec[i++] = RPCAUTH_NONE;
3976 }
3977 for(; i < NX_MAX_SEC_FLAVORS; i++) /* invalidate any remaining slots */
3978 req->r_wrongsec[i] = RPCAUTH_INVALID;
3979 }
3980
3981 /* clear the current flavor from the list */
3982 for(i=0; i < NX_MAX_SEC_FLAVORS; i++)
3983 if (req->r_wrongsec[i] == req->r_auth)
3984 req->r_wrongsec[i] = RPCAUTH_INVALID;
3985
3986 /* find the next flavor to try */
3987 for(i=0; i < NX_MAX_SEC_FLAVORS; i++)
3988 if (req->r_wrongsec[i] != RPCAUTH_INVALID) {
3989 if (((req->r_wrongsec[i] == RPCAUTH_KRB5P) ||
3990 (req->r_wrongsec[i] == RPCAUTH_KRB5I) ||
3991 (req->r_wrongsec[i] == RPCAUTH_KRB5)) && (req->r_gss_ctx &&
3992 (req->r_gss_ctx->gss_clnt_service == RPCSEC_GSS_SVC_SYS))) {
3993 /* don't bother trying Kerberos if we've already got a fallback context */
3994 req->r_wrongsec[i] = RPCAUTH_INVALID;
3995 continue;
3996 }
3997 if (!srvcount) /* no server list, just try it */
3998 break;
3999 /* check that it's in the server's list */
4000 for(j=0; j < srvcount; j++)
4001 if (req->r_wrongsec[i] == srvflavors[j])
4002 break;
4003 if (j < srvcount) /* found */
4004 break;
4005 /* not found in server list */
4006 req->r_wrongsec[i] = RPCAUTH_INVALID;
4007 }
4008 if (i == NX_MAX_SEC_FLAVORS) {
4009 /* nothing left to try! */
4010 error = EACCES;
4011 goto nfsmout;
4012 }
4013
4014 /* retry with the next auth flavor */
4015 req->r_auth = req->r_wrongsec[i];
4016 req->r_xid = 0; // get a new XID
4017 req->r_flags |= R_RESTART;
4018 req->r_start = 0;
4019 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_WRONGSEC);
4020 return (0);
4021 }
4022 if ((nmp->nm_vers >= NFS_VER4) && req->r_wrongsec) {
4023 /*
4024 * We renegotiated security for this request; so update the
4025 * default security flavor for the associated node.
4026 */
4027 if (req->r_np)
4028 req->r_np->n_auth = req->r_auth;
4029 }
4030
4031 if (*status == NFS_OK) {
4032 /*
4033 * Successful NFS request
4034 */
4035 *nmrepp = nmrep;
4036 req->r_nmrep.nmc_mhead = NULL;
4037 break;
4038 }
4039 /* Got an NFS error of some kind */
4040
4041 /*
4042 * If the File Handle was stale, invalidate the
4043 * lookup cache, just in case.
4044 */
4045 if ((*status == ESTALE) && req->r_np) {
4046 cache_purge(NFSTOV(req->r_np));
4047 /* if monitored, also send delete event */
4048 if (vnode_ismonitored(NFSTOV(req->r_np)))
4049 nfs_vnode_notify(req->r_np, (VNODE_EVENT_ATTRIB|VNODE_EVENT_DELETE));
4050 }
4051 if (nmp->nm_vers == NFS_VER2)
4052 mbuf_freem(mrep);
4053 else
4054 *nmrepp = nmrep;
4055 req->r_nmrep.nmc_mhead = NULL;
4056 error = 0;
4057 break;
4058 case RPC_PROGUNAVAIL:
4059 error = EPROGUNAVAIL;
4060 break;
4061 case RPC_PROGMISMATCH:
4062 error = ERPCMISMATCH;
4063 break;
4064 case RPC_PROCUNAVAIL:
4065 error = EPROCUNAVAIL;
4066 break;
4067 case RPC_GARBAGE:
4068 error = EBADRPC;
4069 break;
4070 case RPC_SYSTEM_ERR:
4071 default:
4072 error = EIO;
4073 break;
4074 }
4075 nfsmout:
4076 if (req->r_flags & R_JBTPRINTFMSG) {
4077 req->r_flags &= ~R_JBTPRINTFMSG;
4078 lck_mtx_lock(&nmp->nm_lock);
4079 nmp->nm_jbreqs--;
4080 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
4081 lck_mtx_unlock(&nmp->nm_lock);
4082 if (clearjbtimeo)
4083 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL);
4084 }
4085 FSDBG(273, R_XID32(req->r_xid), nmp, req,
4086 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error);
4087 return (error);
4088 }
4089
4090 /*
4091 * NFS request using a GSS/Kerberos security flavor?
4092 */
4093 int
4094 nfs_request_using_gss(struct nfsreq *req)
4095 {
4096 if (!req->r_gss_ctx)
4097 return (0);
4098 switch (req->r_auth) {
4099 case RPCAUTH_KRB5:
4100 case RPCAUTH_KRB5I:
4101 case RPCAUTH_KRB5P:
4102 return (1);
4103 }
4104 return (0);
4105 }
4106
4107 /*
4108 * Perform an NFS request synchronously.
4109 */
4110
4111 int
4112 nfs_request(
4113 nfsnode_t np,
4114 mount_t mp, /* used only if !np */
4115 struct nfsm_chain *nmrest,
4116 int procnum,
4117 vfs_context_t ctx,
4118 struct nfsreq_secinfo_args *si,
4119 struct nfsm_chain *nmrepp,
4120 u_int64_t *xidp,
4121 int *status)
4122 {
4123 return nfs_request2(np, mp, nmrest, procnum,
4124 vfs_context_thread(ctx), vfs_context_ucred(ctx),
4125 si, 0, nmrepp, xidp, status);
4126 }
4127
4128 int
4129 nfs_request2(
4130 nfsnode_t np,
4131 mount_t mp, /* used only if !np */
4132 struct nfsm_chain *nmrest,
4133 int procnum,
4134 thread_t thd,
4135 kauth_cred_t cred,
4136 struct nfsreq_secinfo_args *si,
4137 int flags,
4138 struct nfsm_chain *nmrepp,
4139 u_int64_t *xidp,
4140 int *status)
4141 {
4142 struct nfsreq rq, *req = &rq;
4143 int error;
4144
4145 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req)))
4146 return (error);
4147 req->r_flags |= (flags & R_OPTMASK);
4148 if (si)
4149 req->r_secinfo = *si;
4150
4151 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0);
4152 do {
4153 req->r_error = 0;
4154 req->r_flags &= ~R_RESTART;
4155 if ((error = nfs_request_add_header(req)))
4156 break;
4157 if (xidp)
4158 *xidp = req->r_xid;
4159 if ((error = nfs_request_send(req, 1)))
4160 break;
4161 nfs_request_wait(req);
4162 if ((error = nfs_request_finish(req, nmrepp, status)))
4163 break;
4164 } while (req->r_flags & R_RESTART);
4165
4166 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error);
4167 nfs_request_rele(req);
4168 return (error);
4169 }
4170
4171
4172 /*
4173 * Set up a new null proc request to exchange GSS context tokens with the
4174 * server. Associate the context that we are setting up with the request that we
4175 * are sending.
4176 */
4177
4178 int
4179 nfs_request_gss(
4180 mount_t mp,
4181 struct nfsm_chain *nmrest,
4182 thread_t thd,
4183 kauth_cred_t cred,
4184 int flags,
4185 struct nfs_gss_clnt_ctx *cp, /* Set to gss context to renew or setup */
4186 struct nfsm_chain *nmrepp,
4187 int *status)
4188 {
4189 struct nfsreq rq, *req = &rq;
4190 int error;
4191
4192 if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req)))
4193 return (error);
4194 req->r_flags |= (flags & R_OPTMASK);
4195
4196 if (cp == NULL) {
4197 printf("nfs_request_gss request has no context\n");
4198 nfs_request_rele(req);
4199 return (NFSERR_EAUTH);
4200 }
4201 nfs_gss_clnt_ctx_ref(req, cp);
4202
4203 FSDBG_TOP(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, 0);
4204 do {
4205 req->r_error = 0;
4206 req->r_flags &= ~R_RESTART;
4207 if ((error = nfs_request_add_header(req)))
4208 break;
4209
4210 if ((error = nfs_request_send(req, 1)))
4211 break;
4212 nfs_request_wait(req);
4213 if ((error = nfs_request_finish(req, nmrepp, status)))
4214 break;
4215 } while (req->r_flags & R_RESTART);
4216
4217 FSDBG_BOT(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, error);
4218 nfs_request_rele(req);
4219 return (error);
4220 }
4221
4222 /*
4223 * Create and start an asynchronous NFS request.
4224 */
4225 int
4226 nfs_request_async(
4227 nfsnode_t np,
4228 mount_t mp, /* used only if !np */
4229 struct nfsm_chain *nmrest,
4230 int procnum,
4231 thread_t thd,
4232 kauth_cred_t cred,
4233 struct nfsreq_secinfo_args *si,
4234 int flags,
4235 struct nfsreq_cbinfo *cb,
4236 struct nfsreq **reqp)
4237 {
4238 struct nfsreq *req;
4239 struct nfsmount *nmp;
4240 int error, sent;
4241
4242 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp);
4243 req = *reqp;
4244 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error);
4245 if (error)
4246 return (error);
4247 req->r_flags |= (flags & R_OPTMASK);
4248 req->r_flags |= R_ASYNC;
4249 if (si)
4250 req->r_secinfo = *si;
4251 if (cb)
4252 req->r_callback = *cb;
4253 error = nfs_request_add_header(req);
4254 if (!error) {
4255 req->r_flags |= R_WAITSENT;
4256 if (req->r_callback.rcb_func)
4257 nfs_request_ref(req, 0);
4258 error = nfs_request_send(req, 1);
4259 lck_mtx_lock(&req->r_mtx);
4260 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) {
4261 /* make sure to wait until this async I/O request gets sent */
4262 int slpflag = (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
4263 struct timespec ts = { 2, 0 };
4264 while (!(req->r_flags & R_SENT)) {
4265 if ((req->r_flags & R_RESENDQ) && ((nmp = req->r_nmp))) {
4266 lck_mtx_lock(&nmp->nm_lock);
4267 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) {
4268 /*
4269 * It's not going to get off the resend queue if we're in recovery.
4270 * So, just take it off ourselves. We could be holding mount state
4271 * busy and thus holding up the start of recovery.
4272 */
4273 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
4274 req->r_rchain.tqe_next = NFSREQNOLIST;
4275 if (req->r_flags & R_RESENDQ)
4276 req->r_flags &= ~R_RESENDQ;
4277 lck_mtx_unlock(&nmp->nm_lock);
4278 req->r_flags |= R_SENDING;
4279 lck_mtx_unlock(&req->r_mtx);
4280 error = nfs_send(req, 1);
4281 lck_mtx_lock(&req->r_mtx);
4282 if (error)
4283 break;
4284 continue;
4285 }
4286 lck_mtx_unlock(&nmp->nm_lock);
4287 }
4288 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
4289 break;
4290 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts);
4291 slpflag = 0;
4292 }
4293 }
4294 sent = req->r_flags & R_SENT;
4295 lck_mtx_unlock(&req->r_mtx);
4296 if (error && req->r_callback.rcb_func && !sent)
4297 nfs_request_rele(req);
4298 }
4299 FSDBG(274, R_XID32(req->r_xid), np, procnum, error);
4300 if (error || req->r_callback.rcb_func)
4301 nfs_request_rele(req);
4302 return (error);
4303 }
4304
4305 /*
4306 * Wait for and finish an asynchronous NFS request.
4307 */
4308 int
4309 nfs_request_async_finish(
4310 struct nfsreq *req,
4311 struct nfsm_chain *nmrepp,
4312 u_int64_t *xidp,
4313 int *status)
4314 {
4315 int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0;
4316 struct nfsmount *nmp;
4317
4318 lck_mtx_lock(&req->r_mtx);
4319 if (!asyncio)
4320 req->r_flags |= R_ASYNCWAIT;
4321 while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */
4322 struct timespec ts = { 2, 0 };
4323 if ((nmp = req->r_nmp)) {
4324 lck_mtx_lock(&nmp->nm_lock);
4325 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) {
4326 /*
4327 * It's not going to get off the resend queue if we're in recovery.
4328 * So, just take it off ourselves. We could be holding mount state
4329 * busy and thus holding up the start of recovery.
4330 */
4331 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
4332 req->r_rchain.tqe_next = NFSREQNOLIST;
4333 if (req->r_flags & R_RESENDQ)
4334 req->r_flags &= ~R_RESENDQ;
4335 lck_mtx_unlock(&nmp->nm_lock);
4336 break;
4337 }
4338 lck_mtx_unlock(&nmp->nm_lock);
4339 }
4340 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
4341 break;
4342 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", &ts);
4343 }
4344 lck_mtx_unlock(&req->r_mtx);
4345
4346 if (!error) {
4347 nfs_request_wait(req);
4348 error = nfs_request_finish(req, nmrepp, status);
4349 }
4350
4351 while (!error && (req->r_flags & R_RESTART)) {
4352 if (asyncio && req->r_resendtime) { /* send later */
4353 lck_mtx_lock(&req->r_mtx);
4354 nfs_asyncio_resend(req);
4355 lck_mtx_unlock(&req->r_mtx);
4356 return (EINPROGRESS);
4357 }
4358 req->r_error = 0;
4359 req->r_flags &= ~R_RESTART;
4360 if ((error = nfs_request_add_header(req)))
4361 break;
4362 if ((error = nfs_request_send(req, !asyncio)))
4363 break;
4364 if (asyncio)
4365 return (EINPROGRESS);
4366 nfs_request_wait(req);
4367 if ((error = nfs_request_finish(req, nmrepp, status)))
4368 break;
4369 }
4370 if (xidp)
4371 *xidp = req->r_xid;
4372
4373 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error);
4374 nfs_request_rele(req);
4375 return (error);
4376 }
4377
4378 /*
4379 * Cancel a pending asynchronous NFS request.
4380 */
4381 void
4382 nfs_request_async_cancel(struct nfsreq *req)
4383 {
4384 nfs_reqdequeue(req);
4385 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E);
4386 nfs_request_rele(req);
4387 }
4388
4389 /*
4390 * Flag a request as being terminated.
4391 */
4392 void
4393 nfs_softterm(struct nfsreq *req)
4394 {
4395 struct nfsmount *nmp = req->r_nmp;
4396 req->r_flags |= R_SOFTTERM;
4397 req->r_error = ETIMEDOUT;
4398 if (!(req->r_flags & R_CWND) || !nmp)
4399 return;
4400 /* update congestion window */
4401 req->r_flags &= ~R_CWND;
4402 lck_mtx_lock(&nmp->nm_lock);
4403 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
4404 nmp->nm_sent -= NFS_CWNDSCALE;
4405 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
4406 /* congestion window is open, poke the cwnd queue */
4407 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
4408 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
4409 req2->r_cchain.tqe_next = NFSREQNOLIST;
4410 wakeup(req2);
4411 }
4412 lck_mtx_unlock(&nmp->nm_lock);
4413 }
4414
4415 /*
4416 * Ensure req isn't in use by the timer, then dequeue it.
4417 */
4418 void
4419 nfs_reqdequeue(struct nfsreq *req)
4420 {
4421 lck_mtx_lock(nfs_request_mutex);
4422 while (req->r_lflags & RL_BUSY) {
4423 req->r_lflags |= RL_WAITING;
4424 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL);
4425 }
4426 if (req->r_lflags & RL_QUEUED) {
4427 TAILQ_REMOVE(&nfs_reqq, req, r_chain);
4428 req->r_lflags &= ~RL_QUEUED;
4429 }
4430 lck_mtx_unlock(nfs_request_mutex);
4431 }
4432
4433 /*
4434 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
4435 * free()'d out from under it.
4436 */
4437 void
4438 nfs_reqbusy(struct nfsreq *req)
4439 {
4440 if (req->r_lflags & RL_BUSY)
4441 panic("req locked");
4442 req->r_lflags |= RL_BUSY;
4443 }
4444
4445 /*
4446 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
4447 */
4448 struct nfsreq *
4449 nfs_reqnext(struct nfsreq *req)
4450 {
4451 struct nfsreq * nextreq;
4452
4453 if (req == NULL)
4454 return (NULL);
4455 /*
4456 * We need to get and busy the next req before signalling the
4457 * current one, otherwise wakeup() may block us and we'll race to
4458 * grab the next req.
4459 */
4460 nextreq = TAILQ_NEXT(req, r_chain);
4461 if (nextreq != NULL)
4462 nfs_reqbusy(nextreq);
4463 /* unbusy and signal. */
4464 req->r_lflags &= ~RL_BUSY;
4465 if (req->r_lflags & RL_WAITING) {
4466 req->r_lflags &= ~RL_WAITING;
4467 wakeup(&req->r_lflags);
4468 }
4469 return (nextreq);
4470 }
4471
4472 /*
4473 * NFS request queue timer routine
4474 *
4475 * Scan the NFS request queue for any requests that have timed out.
4476 *
4477 * Alert the system of unresponsive servers.
4478 * Mark expired requests on soft mounts as terminated.
4479 * For UDP, mark/signal requests for retransmission.
4480 */
4481 void
4482 nfs_request_timer(__unused void *param0, __unused void *param1)
4483 {
4484 struct nfsreq *req;
4485 struct nfsmount *nmp;
4486 int timeo, maxtime, finish_asyncio, error;
4487 struct timeval now;
4488 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue;
4489
4490 lck_mtx_lock(nfs_request_mutex);
4491 req = TAILQ_FIRST(&nfs_reqq);
4492 if (req == NULL) { /* no requests - turn timer off */
4493 nfs_request_timer_on = 0;
4494 lck_mtx_unlock(nfs_request_mutex);
4495 return;
4496 }
4497
4498 nfs_reqbusy(req);
4499 TAILQ_INIT(&nfs_mount_poke_queue);
4500
4501 microuptime(&now);
4502 for ( ; req != NULL ; req = nfs_reqnext(req)) {
4503 nmp = req->r_nmp;
4504 if (!nmp) /* unmounted */
4505 continue;
4506 if (req->r_error || req->r_nmrep.nmc_mhead)
4507 continue;
4508 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) {
4509 if (req->r_callback.rcb_func != NULL) {
4510 /* async I/O RPC needs to be finished */
4511 lck_mtx_lock(&req->r_mtx);
4512 req->r_error = error;
4513 finish_asyncio = !(req->r_flags & R_WAITSENT);
4514 wakeup(req);
4515 lck_mtx_unlock(&req->r_mtx);
4516 if (finish_asyncio)
4517 nfs_asyncio_finish(req);
4518 }
4519 continue;
4520 }
4521
4522 lck_mtx_lock(&req->r_mtx);
4523
4524 if (nmp->nm_tprintf_initial_delay &&
4525 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) &&
4526 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
4527 req->r_lastmsg = now.tv_sec;
4528 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
4529 "not responding");
4530 req->r_flags |= R_TPRINTFMSG;
4531 lck_mtx_lock(&nmp->nm_lock);
4532 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
4533 lck_mtx_unlock(&nmp->nm_lock);
4534 /* we're not yet completely mounted and */
4535 /* we can't complete an RPC, so we fail */
4536 OSAddAtomic(1, &nfsstats.rpctimeouts);
4537 nfs_softterm(req);
4538 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
4539 wakeup(req);
4540 lck_mtx_unlock(&req->r_mtx);
4541 if (finish_asyncio)
4542 nfs_asyncio_finish(req);
4543 continue;
4544 }
4545 lck_mtx_unlock(&nmp->nm_lock);
4546 }
4547
4548 /*
4549 * Put a reasonable limit on the maximum timeout,
4550 * and reduce that limit when soft mounts get timeouts or are in reconnect.
4551 */
4552 if (!NMFLAG(nmp, SOFT))
4553 maxtime = NFS_MAXTIMEO;
4554 else if ((req->r_flags & (R_SETUP|R_RECOVER)) ||
4555 ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
4556 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2;
4557 else
4558 maxtime = NFS_MINTIMEO/4;
4559
4560 /*
4561 * Check for request timeout.
4562 */
4563 if (req->r_rtt >= 0) {
4564 req->r_rtt++;
4565 lck_mtx_lock(&nmp->nm_lock);
4566 if (req->r_flags & R_RESENDERR) {
4567 /* with resend errors, retry every few seconds */
4568 timeo = 4*hz;
4569 } else {
4570 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL)
4571 timeo = NFS_MINIDEMTIMEO; // gss context setup
4572 else if (NMFLAG(nmp, DUMBTIMER))
4573 timeo = nmp->nm_timeo;
4574 else
4575 timeo = NFS_RTO(nmp, proct[req->r_procnum]);
4576
4577 /* ensure 62.5 ms floor */
4578 while (16 * timeo < hz)
4579 timeo *= 2;
4580 if (nmp->nm_timeouts > 0)
4581 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
4582 }
4583 /* limit timeout to max */
4584 if (timeo > maxtime)
4585 timeo = maxtime;
4586 if (req->r_rtt <= timeo) {
4587 lck_mtx_unlock(&nmp->nm_lock);
4588 lck_mtx_unlock(&req->r_mtx);
4589 continue;
4590 }
4591 /* The request has timed out */
4592 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
4593 req->r_procnum, proct[req->r_procnum],
4594 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts,
4595 (now.tv_sec - req->r_start)*NFS_HZ, maxtime));
4596 if (nmp->nm_timeouts < 8)
4597 nmp->nm_timeouts++;
4598 nfs_mount_check_dead_timeout(nmp);
4599 /* if it's been a few seconds, try poking the socket */
4600 if ((nmp->nm_sotype == SOCK_STREAM) &&
4601 ((now.tv_sec - req->r_start) >= 3) &&
4602 !(nmp->nm_sockflags & (NMSOCK_POKE|NMSOCK_UNMOUNT)) &&
4603 (nmp->nm_sockflags & NMSOCK_READY)) {
4604 nmp->nm_sockflags |= NMSOCK_POKE;
4605 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq);
4606 }
4607 lck_mtx_unlock(&nmp->nm_lock);
4608 }
4609
4610 /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */
4611 if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP|R_RECOVER))) &&
4612 ((req->r_rexmit >= req->r_retry) || /* too many */
4613 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */
4614 OSAddAtomic(1, &nfsstats.rpctimeouts);
4615 lck_mtx_lock(&nmp->nm_lock);
4616 if (!(nmp->nm_state & NFSSTA_TIMEO)) {
4617 lck_mtx_unlock(&nmp->nm_lock);
4618 /* make sure we note the unresponsive server */
4619 /* (maxtime may be less than tprintf delay) */
4620 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
4621 "not responding");
4622 req->r_lastmsg = now.tv_sec;
4623 req->r_flags |= R_TPRINTFMSG;
4624 } else {
4625 lck_mtx_unlock(&nmp->nm_lock);
4626 }
4627 if (req->r_flags & R_NOINTR) {
4628 /* don't terminate nointr requests on timeout */
4629 lck_mtx_unlock(&req->r_mtx);
4630 continue;
4631 }
4632 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
4633 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt,
4634 now.tv_sec - req->r_start));
4635 nfs_softterm(req);
4636 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
4637 wakeup(req);
4638 lck_mtx_unlock(&req->r_mtx);
4639 if (finish_asyncio)
4640 nfs_asyncio_finish(req);
4641 continue;
4642 }
4643
4644 /* for TCP, only resend if explicitly requested */
4645 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) {
4646 if (++req->r_rexmit > NFS_MAXREXMIT)
4647 req->r_rexmit = NFS_MAXREXMIT;
4648 req->r_rtt = 0;
4649 lck_mtx_unlock(&req->r_mtx);
4650 continue;
4651 }
4652
4653 /*
4654 * The request needs to be (re)sent. Kick the requester to resend it.
4655 * (unless it's already marked as needing a resend)
4656 */
4657 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) {
4658 lck_mtx_unlock(&req->r_mtx);
4659 continue;
4660 }
4661 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
4662 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
4663 req->r_flags |= R_MUSTRESEND;
4664 req->r_rtt = -1;
4665 wakeup(req);
4666 if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
4667 nfs_asyncio_resend(req);
4668 lck_mtx_unlock(&req->r_mtx);
4669 }
4670
4671 lck_mtx_unlock(nfs_request_mutex);
4672
4673 /* poke any sockets */
4674 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) {
4675 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
4676 nfs_sock_poke(nmp);
4677 lck_mtx_lock(&nmp->nm_lock);
4678 nmp->nm_sockflags &= ~NMSOCK_POKE;
4679 wakeup(&nmp->nm_sockflags);
4680 lck_mtx_unlock(&nmp->nm_lock);
4681 }
4682
4683 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY);
4684 }
4685
4686 /*
4687 * check a thread's proc for the "noremotehang" flag.
4688 */
4689 int
4690 nfs_noremotehang(thread_t thd)
4691 {
4692 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
4693 return (p && proc_noremotehang(p));
4694 }
4695
4696 /*
4697 * Test for a termination condition pending on the process.
4698 * This is used to determine if we need to bail on a mount.
4699 * ETIMEDOUT is returned if there has been a soft timeout.
4700 * EINTR is returned if there is a signal pending that is not being ignored
4701 * and the mount is interruptable, or if we are a thread that is in the process
4702 * of cancellation (also SIGKILL posted).
4703 */
4704 extern int sigprop[NSIG+1];
4705 int
4706 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked)
4707 {
4708 proc_t p;
4709 int error = 0;
4710
4711 if (nmp == NULL)
4712 return (ENXIO);
4713
4714 if (req && (req->r_flags & R_SOFTTERM))
4715 return (ETIMEDOUT); /* request has been terminated. */
4716 if (req && (req->r_flags & R_NOINTR))
4717 thd = NULL; /* don't check for signal on R_NOINTR */
4718
4719 if (!nmplocked)
4720 lck_mtx_lock(&nmp->nm_lock);
4721 if (nmp->nm_state & NFSSTA_FORCE) {
4722 /* If a force unmount is in progress then fail. */
4723 error = EIO;
4724 } else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) {
4725 /* Someone is unmounting us, go soft and mark it. */
4726 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT);
4727 nmp->nm_state |= NFSSTA_FORCE;
4728 }
4729
4730 /* Check if the mount is marked dead. */
4731 if (!error && (nmp->nm_state & NFSSTA_DEAD))
4732 error = ENXIO;
4733
4734 /*
4735 * If the mount is hung and we've requested not to hang
4736 * on remote filesystems, then bail now.
4737 */
4738 if (!error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd))
4739 error = EIO;
4740
4741 if (!nmplocked)
4742 lck_mtx_unlock(&nmp->nm_lock);
4743 if (error)
4744 return (error);
4745
4746 /* may not have a thread for async I/O */
4747 if (thd == NULL)
4748 return (0);
4749
4750 /*
4751 * Check if the process is aborted, but don't interrupt if we
4752 * were killed by a signal and this is the exiting thread which
4753 * is attempting to dump core.
4754 */
4755 if (((p = current_proc()) != kernproc) && current_thread_aborted() &&
4756 (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) ||
4757 (p->p_sigacts == NULL) ||
4758 (p->p_sigacts->ps_sig < 1) || (p->p_sigacts->ps_sig > NSIG) ||
4759 !(sigprop[p->p_sigacts->ps_sig] & SA_CORE)))
4760 return (EINTR);
4761
4762 /* mask off thread and process blocked signals. */
4763 if (NMFLAG(nmp, INTR) && ((p = get_bsdthreadtask_info(thd))) &&
4764 proc_pendingsignals(p, NFSINT_SIGMASK))
4765 return (EINTR);
4766 return (0);
4767 }
4768
4769 /*
4770 * Lock a socket against others.
4771 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
4772 * and also to avoid race conditions between the processes with nfs requests
4773 * in progress when a reconnect is necessary.
4774 */
4775 int
4776 nfs_sndlock(struct nfsreq *req)
4777 {
4778 struct nfsmount *nmp = req->r_nmp;
4779 int *statep;
4780 int error = 0, slpflag = 0;
4781 struct timespec ts = { 0, 0 };
4782
4783 if (nmp == NULL)
4784 return (ENXIO);
4785
4786 lck_mtx_lock(&nmp->nm_lock);
4787 statep = &nmp->nm_state;
4788
4789 if (NMFLAG(nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR))
4790 slpflag = PCATCH;
4791 while (*statep & NFSSTA_SNDLOCK) {
4792 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1)))
4793 break;
4794 *statep |= NFSSTA_WANTSND;
4795 if (nfs_noremotehang(req->r_thread))
4796 ts.tv_sec = 1;
4797 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts);
4798 if (slpflag == PCATCH) {
4799 slpflag = 0;
4800 ts.tv_sec = 2;
4801 }
4802 }
4803 if (!error)
4804 *statep |= NFSSTA_SNDLOCK;
4805 lck_mtx_unlock(&nmp->nm_lock);
4806 return (error);
4807 }
4808
4809 /*
4810 * Unlock the stream socket for others.
4811 */
4812 void
4813 nfs_sndunlock(struct nfsreq *req)
4814 {
4815 struct nfsmount *nmp = req->r_nmp;
4816 int *statep, wake = 0;
4817
4818 if (nmp == NULL)
4819 return;
4820 lck_mtx_lock(&nmp->nm_lock);
4821 statep = &nmp->nm_state;
4822 if ((*statep & NFSSTA_SNDLOCK) == 0)
4823 panic("nfs sndunlock");
4824 *statep &= ~(NFSSTA_SNDLOCK|NFSSTA_SENDING);
4825 if (*statep & NFSSTA_WANTSND) {
4826 *statep &= ~NFSSTA_WANTSND;
4827 wake = 1;
4828 }
4829 lck_mtx_unlock(&nmp->nm_lock);
4830 if (wake)
4831 wakeup(statep);
4832 }
4833
4834 int
4835 nfs_aux_request(
4836 struct nfsmount *nmp,
4837 thread_t thd,
4838 struct sockaddr *saddr,
4839 socket_t so,
4840 int sotype,
4841 mbuf_t mreq,
4842 uint32_t xid,
4843 int bindresv,
4844 int timeo,
4845 struct nfsm_chain *nmrep)
4846 {
4847 int error = 0, on = 1, try, sendat = 2, soproto, recv, optlen, restoreto = 0;
4848 socket_t newso = NULL;
4849 struct sockaddr_storage ss;
4850 struct timeval orig_rcvto, orig_sndto, tv = { 1, 0 };
4851 mbuf_t m, mrep = NULL;
4852 struct msghdr msg;
4853 uint32_t rxid = 0, reply = 0, reply_status, rejected_status;
4854 uint32_t verf_type, verf_len, accepted_status;
4855 size_t readlen, sentlen;
4856 struct nfs_rpc_record_state nrrs;
4857
4858 if (!so) {
4859 /* create socket and set options */
4860 soproto = (sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP;
4861 if ((error = sock_socket(saddr->sa_family, sotype, soproto, NULL, NULL, &newso)))
4862 goto nfsmout;
4863
4864 if (bindresv) {
4865 int level = (saddr->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
4866 int optname = (saddr->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE;
4867 int portrange = IP_PORTRANGE_LOW;
4868 error = sock_setsockopt(newso, level, optname, &portrange, sizeof(portrange));
4869 nfsmout_if(error);
4870 ss.ss_len = saddr->sa_len;
4871 ss.ss_family = saddr->sa_family;
4872 if (ss.ss_family == AF_INET) {
4873 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY;
4874 ((struct sockaddr_in*)&ss)->sin_port = htons(0);
4875 } else if (ss.ss_family == AF_INET6) {
4876 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any;
4877 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
4878 } else {
4879 error = EINVAL;
4880 }
4881 if (!error)
4882 error = sock_bind(newso, (struct sockaddr *)&ss);
4883 nfsmout_if(error);
4884 }
4885
4886 if (sotype == SOCK_STREAM) {
4887 on = 4; /* don't wait too long for the socket to connect */
4888 sock_setsockopt(newso, IPPROTO_TCP, TCP_CONNECTIONTIMEOUT, &on, sizeof(on));
4889 error = sock_connect(newso, saddr, 0);
4890 nfsmout_if(error);
4891 }
4892 if (((error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) ||
4893 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) ||
4894 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)))))
4895 goto nfsmout;
4896 so = newso;
4897 } else {
4898 /* make sure socket is using a one second timeout in this function */
4899 optlen = sizeof(orig_rcvto);
4900 error = sock_getsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, &optlen);
4901 if (!error) {
4902 optlen = sizeof(orig_sndto);
4903 error = sock_getsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, &optlen);
4904 }
4905 if (!error) {
4906 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
4907 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
4908 restoreto = 1;
4909 }
4910 }
4911
4912 if (sotype == SOCK_STREAM) {
4913 sendat = 0; /* we only resend the request for UDP */
4914 nfs_rpc_record_state_init(&nrrs);
4915 }
4916
4917 for (try=0; try < timeo; try++) {
4918 if ((error = nfs_sigintr(nmp, NULL, !try ? NULL : thd, 0)))
4919 break;
4920 if (!try || (try == sendat)) {
4921 /* send the request (resending periodically for UDP) */
4922 if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m)))
4923 goto nfsmout;
4924 bzero(&msg, sizeof(msg));
4925 if ((sotype == SOCK_DGRAM) && !sock_isconnected(so)) {
4926 msg.msg_name = saddr;
4927 msg.msg_namelen = saddr->sa_len;
4928 }
4929 if ((error = sock_sendmbuf(so, &msg, m, 0, &sentlen)))
4930 goto nfsmout;
4931 sendat *= 2;
4932 if (sendat > 30)
4933 sendat = 30;
4934 }
4935 /* wait for the response */
4936 if (sotype == SOCK_STREAM) {
4937 /* try to read (more of) record */
4938 error = nfs_rpc_record_read(so, &nrrs, 0, &recv, &mrep);
4939 /* if we don't have the whole record yet, we'll keep trying */
4940 } else {
4941 readlen = 1<<18;
4942 bzero(&msg, sizeof(msg));
4943 error = sock_receivembuf(so, &msg, &mrep, 0, &readlen);
4944 }
4945 if (error == EWOULDBLOCK)
4946 continue;
4947 nfsmout_if(error);
4948 /* parse the response */
4949 nfsm_chain_dissect_init(error, nmrep, mrep);
4950 nfsm_chain_get_32(error, nmrep, rxid);
4951 nfsm_chain_get_32(error, nmrep, reply);
4952 nfsmout_if(error);
4953 if ((rxid != xid) || (reply != RPC_REPLY))
4954 error = EBADRPC;
4955 nfsm_chain_get_32(error, nmrep, reply_status);
4956 nfsmout_if(error);
4957 if (reply_status == RPC_MSGDENIED) {
4958 nfsm_chain_get_32(error, nmrep, rejected_status);
4959 nfsmout_if(error);
4960 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES;
4961 goto nfsmout;
4962 }
4963 nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */
4964 nfsm_chain_get_32(error, nmrep, verf_len); /* verifier length */
4965 nfsmout_if(error);
4966 if (verf_len)
4967 nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len));
4968 nfsm_chain_get_32(error, nmrep, accepted_status);
4969 nfsmout_if(error);
4970 switch (accepted_status) {
4971 case RPC_SUCCESS:
4972 error = 0;
4973 break;
4974 case RPC_PROGUNAVAIL:
4975 error = EPROGUNAVAIL;
4976 break;
4977 case RPC_PROGMISMATCH:
4978 error = EPROGMISMATCH;
4979 break;
4980 case RPC_PROCUNAVAIL:
4981 error = EPROCUNAVAIL;
4982 break;
4983 case RPC_GARBAGE:
4984 error = EBADRPC;
4985 break;
4986 case RPC_SYSTEM_ERR:
4987 default:
4988 error = EIO;
4989 break;
4990 }
4991 break;
4992 }
4993 nfsmout:
4994 if (restoreto) {
4995 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, sizeof(tv));
4996 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, sizeof(tv));
4997 }
4998 if (newso) {
4999 sock_shutdown(newso, SHUT_RDWR);
5000 sock_close(newso);
5001 }
5002 mbuf_freem(mreq);
5003 return (error);
5004 }
5005
5006 int
5007 nfs_portmap_lookup(
5008 struct nfsmount *nmp,
5009 vfs_context_t ctx,
5010 struct sockaddr *sa,
5011 socket_t so,
5012 uint32_t protocol,
5013 uint32_t vers,
5014 uint32_t ipproto,
5015 int timeo)
5016 {
5017 thread_t thd = vfs_context_thread(ctx);
5018 kauth_cred_t cred = vfs_context_ucred(ctx);
5019 struct sockaddr_storage ss;
5020 struct sockaddr *saddr = (struct sockaddr*)&ss;
5021 struct nfsm_chain nmreq, nmrep;
5022 mbuf_t mreq;
5023 int error = 0, ip, pmprog, pmvers, pmproc, ualen = 0;
5024 uint32_t port;
5025 uint64_t xid = 0;
5026 char uaddr[MAX_IPv6_STR_LEN+16];
5027
5028 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5029 if (saddr->sa_family == AF_INET) {
5030 ip = 4;
5031 pmprog = PMAPPROG;
5032 pmvers = PMAPVERS;
5033 pmproc = PMAPPROC_GETPORT;
5034 } else if (saddr->sa_family == AF_INET6) {
5035 ip = 6;
5036 pmprog = RPCBPROG;
5037 pmvers = RPCBVERS4;
5038 pmproc = RPCBPROC_GETVERSADDR;
5039 } else {
5040 return (EINVAL);
5041 }
5042 nfsm_chain_null(&nmreq);
5043 nfsm_chain_null(&nmrep);
5044
5045 tryagain:
5046 /* send portmapper request to get port/uaddr */
5047 if (ip == 4)
5048 ((struct sockaddr_in*)saddr)->sin_port = htons(PMAPPORT);
5049 else
5050 ((struct sockaddr_in6*)saddr)->sin6_port = htons(PMAPPORT);
5051 nfsm_chain_build_alloc_init(error, &nmreq, 8*NFSX_UNSIGNED);
5052 nfsm_chain_add_32(error, &nmreq, protocol);
5053 nfsm_chain_add_32(error, &nmreq, vers);
5054 if (ip == 4) {
5055 nfsm_chain_add_32(error, &nmreq, ipproto);
5056 nfsm_chain_add_32(error, &nmreq, 0);
5057 } else {
5058 if (ipproto == IPPROTO_TCP)
5059 nfsm_chain_add_string(error, &nmreq, "tcp6", 4);
5060 else
5061 nfsm_chain_add_string(error, &nmreq, "udp6", 4);
5062 nfsm_chain_add_string(error, &nmreq, "", 0); /* uaddr */
5063 nfsm_chain_add_string(error, &nmreq, "", 0); /* owner */
5064 }
5065 nfsm_chain_build_done(error, &nmreq);
5066 nfsmout_if(error);
5067 error = nfsm_rpchead2(nmp, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
5068 pmprog, pmvers, pmproc, RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead,
5069 &xid, &mreq);
5070 nfsmout_if(error);
5071 nmreq.nmc_mhead = NULL;
5072 error = nfs_aux_request(nmp, thd, saddr, so, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
5073 mreq, R_XID32(xid), 0, timeo, &nmrep);
5074
5075 /* grab port from portmap response */
5076 if (ip == 4) {
5077 nfsm_chain_get_32(error, &nmrep, port);
5078 if (!error)
5079 ((struct sockaddr_in*)sa)->sin_port = htons(port);
5080 } else {
5081 /* get uaddr string and convert to sockaddr */
5082 nfsm_chain_get_32(error, &nmrep, ualen);
5083 if (!error) {
5084 if (ualen > ((int)sizeof(uaddr)-1))
5085 error = EIO;
5086 if (ualen < 1) {
5087 /* program is not available, just return a zero port */
5088 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5089 ((struct sockaddr_in6*)saddr)->sin6_port = htons(0);
5090 } else {
5091 nfsm_chain_get_opaque(error, &nmrep, ualen, uaddr);
5092 if (!error) {
5093 uaddr[ualen] = '\0';
5094 if (!nfs_uaddr2sockaddr(uaddr, saddr))
5095 error = EIO;
5096 }
5097 }
5098 }
5099 if ((error == EPROGMISMATCH) || (error == EPROCUNAVAIL) || (error == EIO) || (error == EBADRPC)) {
5100 /* remote doesn't support rpcbind version or proc (or we couldn't parse uaddr) */
5101 if (pmvers == RPCBVERS4) {
5102 /* fall back to v3 and GETADDR */
5103 pmvers = RPCBVERS3;
5104 pmproc = RPCBPROC_GETADDR;
5105 nfsm_chain_cleanup(&nmreq);
5106 nfsm_chain_cleanup(&nmrep);
5107 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
5108 xid = 0;
5109 error = 0;
5110 goto tryagain;
5111 }
5112 }
5113 if (!error)
5114 bcopy(saddr, sa, min(saddr->sa_len, sa->sa_len));
5115 }
5116 nfsmout:
5117 nfsm_chain_cleanup(&nmreq);
5118 nfsm_chain_cleanup(&nmrep);
5119 return (error);
5120 }
5121
5122 int
5123 nfs_msg(thread_t thd,
5124 const char *server,
5125 const char *msg,
5126 int error)
5127 {
5128 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
5129 tpr_t tpr;
5130
5131 if (p)
5132 tpr = tprintf_open(p);
5133 else
5134 tpr = NULL;
5135 if (error)
5136 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error);
5137 else
5138 tprintf(tpr, "nfs server %s: %s\n", server, msg);
5139 tprintf_close(tpr);
5140 return (0);
5141 }
5142
5143 void
5144 nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg)
5145 {
5146 int timeoutmask, wasunresponsive, unresponsive, softnobrowse;
5147 uint32_t do_vfs_signal;
5148 struct timeval now;
5149
5150 if (nmp == NULL)
5151 return;
5152
5153 lck_mtx_lock(&nmp->nm_lock);
5154
5155 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
5156 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */
5157 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
5158 wasunresponsive = (nmp->nm_state & timeoutmask);
5159
5160 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
5161 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
5162
5163 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO))
5164 nmp->nm_state |= NFSSTA_TIMEO;
5165 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO))
5166 nmp->nm_state |= NFSSTA_LOCKTIMEO;
5167 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(nmp->nm_state & NFSSTA_JUKEBOXTIMEO))
5168 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO;
5169
5170 unresponsive = (nmp->nm_state & timeoutmask);
5171
5172 if (unresponsive && (nmp->nm_deadtimeout > 0)) {
5173 microuptime(&now);
5174 if (!wasunresponsive) {
5175 nmp->nm_deadto_start = now.tv_sec;
5176 nfs_mount_sock_thread_wake(nmp);
5177 } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_deadtimeout) {
5178 if (!(nmp->nm_state & NFSSTA_DEAD))
5179 printf("nfs server %s: dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
5180 nmp->nm_state |= NFSSTA_DEAD;
5181 }
5182 }
5183 lck_mtx_unlock(&nmp->nm_lock);
5184
5185 if (nmp->nm_state & NFSSTA_DEAD)
5186 do_vfs_signal = VQ_DEAD;
5187 else if (softnobrowse || wasunresponsive || !unresponsive)
5188 do_vfs_signal = 0;
5189 else
5190 do_vfs_signal = VQ_NOTRESP;
5191 if (do_vfs_signal)
5192 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, do_vfs_signal, 0);
5193
5194 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
5195 }
5196
5197 void
5198 nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
5199 {
5200 int timeoutmask, wasunresponsive, unresponsive, softnobrowse;
5201 int do_vfs_signal;
5202
5203 if (nmp == NULL)
5204 return;
5205
5206 if (msg)
5207 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
5208
5209 lck_mtx_lock(&nmp->nm_lock);
5210
5211 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
5212 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */
5213 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
5214 wasunresponsive = (nmp->nm_state & timeoutmask);
5215
5216 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
5217 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
5218
5219 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO))
5220 nmp->nm_state &= ~NFSSTA_TIMEO;
5221 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO))
5222 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
5223 if ((flags & NFSSTA_JUKEBOXTIMEO) && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO))
5224 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO;
5225
5226 unresponsive = (nmp->nm_state & timeoutmask);
5227
5228 if (nmp->nm_deadto_start)
5229 nmp->nm_deadto_start = 0;
5230 lck_mtx_unlock(&nmp->nm_lock);
5231
5232 if (softnobrowse)
5233 do_vfs_signal = 0;
5234 else
5235 do_vfs_signal = (wasunresponsive && !unresponsive);
5236 if (do_vfs_signal)
5237 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
5238 }
5239
5240
5241 #endif /* NFSCLIENT */
5242
5243 #if NFSSERVER
5244
5245 /*
5246 * Generate the rpc reply header
5247 * siz arg. is used to decide if adding a cluster is worthwhile
5248 */
5249 int
5250 nfsrv_rephead(
5251 struct nfsrv_descript *nd,
5252 __unused struct nfsrv_sock *slp,
5253 struct nfsm_chain *nmrepp,
5254 size_t siz)
5255 {
5256 mbuf_t mrep;
5257 u_int32_t *tl;
5258 struct nfsm_chain nmrep;
5259 int err, error;
5260
5261 err = nd->nd_repstat;
5262 if (err && (nd->nd_vers == NFS_VER2))
5263 siz = 0;
5264
5265 /*
5266 * If this is a big reply, use a cluster else
5267 * try and leave leading space for the lower level headers.
5268 */
5269 siz += RPC_REPLYSIZ;
5270 if (siz >= nfs_mbuf_minclsize) {
5271 error = mbuf_getpacket(MBUF_WAITOK, &mrep);
5272 } else {
5273 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
5274 }
5275 if (error) {
5276 /* unable to allocate packet */
5277 /* XXX should we keep statistics for these errors? */
5278 return (error);
5279 }
5280 if (siz < nfs_mbuf_minclsize) {
5281 /* leave space for lower level headers */
5282 tl = mbuf_data(mrep);
5283 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */
5284 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
5285 }
5286 nfsm_chain_init(&nmrep, mrep);
5287 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
5288 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
5289 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
5290 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
5291 if (err & NFSERR_AUTHERR) {
5292 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
5293 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
5294 } else {
5295 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
5296 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
5297 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
5298 }
5299 } else {
5300 /* reply status */
5301 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
5302 if (nd->nd_gss_context != NULL) {
5303 /* RPCSEC_GSS verifier */
5304 error = nfs_gss_svc_verf_put(nd, &nmrep);
5305 if (error) {
5306 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
5307 goto done;
5308 }
5309 } else {
5310 /* RPCAUTH_NULL verifier */
5311 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
5312 nfsm_chain_add_32(error, &nmrep, 0);
5313 }
5314 /* accepted status */
5315 switch (err) {
5316 case EPROGUNAVAIL:
5317 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
5318 break;
5319 case EPROGMISMATCH:
5320 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
5321 /* XXX hard coded versions? */
5322 nfsm_chain_add_32(error, &nmrep, NFS_VER2);
5323 nfsm_chain_add_32(error, &nmrep, NFS_VER3);
5324 break;
5325 case EPROCUNAVAIL:
5326 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
5327 break;
5328 case EBADRPC:
5329 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
5330 break;
5331 default:
5332 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
5333 if (nd->nd_gss_context != NULL)
5334 error = nfs_gss_svc_prepare_reply(nd, &nmrep);
5335 if (err != NFSERR_RETVOID)
5336 nfsm_chain_add_32(error, &nmrep,
5337 (err ? nfsrv_errmap(nd, err) : 0));
5338 break;
5339 }
5340 }
5341
5342 done:
5343 nfsm_chain_build_done(error, &nmrep);
5344 if (error) {
5345 /* error composing reply header */
5346 /* XXX should we keep statistics for these errors? */
5347 mbuf_freem(mrep);
5348 return (error);
5349 }
5350
5351 *nmrepp = nmrep;
5352 if ((err != 0) && (err != NFSERR_RETVOID))
5353 OSAddAtomic(1, &nfsstats.srvrpc_errs);
5354 return (0);
5355 }
5356
5357 /*
5358 * The nfs server send routine.
5359 *
5360 * - return EINTR or ERESTART if interrupted by a signal
5361 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
5362 * - do any cleanup required by recoverable socket errors (???)
5363 */
5364 int
5365 nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
5366 {
5367 int error;
5368 socket_t so = slp->ns_so;
5369 struct sockaddr *sendnam;
5370 struct msghdr msg;
5371
5372 bzero(&msg, sizeof(msg));
5373 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
5374 if ((sendnam = mbuf_data(nam))) {
5375 msg.msg_name = (caddr_t)sendnam;
5376 msg.msg_namelen = sendnam->sa_len;
5377 }
5378 }
5379 error = sock_sendmbuf(so, &msg, top, 0, NULL);
5380 if (!error)
5381 return (0);
5382 log(LOG_INFO, "nfsd send error %d\n", error);
5383
5384 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM))
5385 error = EPIPE; /* zap TCP sockets if they time out on send */
5386
5387 /* Handle any recoverable (soft) socket errors here. (???) */
5388 if (error != EINTR && error != ERESTART && error != EIO &&
5389 error != EWOULDBLOCK && error != EPIPE)
5390 error = 0;
5391
5392 return (error);
5393 }
5394
5395 /*
5396 * Socket upcall routine for the nfsd sockets.
5397 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
5398 * Essentially do as much as possible non-blocking, else punt and it will
5399 * be called with MBUF_WAITOK from an nfsd.
5400 */
5401 void
5402 nfsrv_rcv(socket_t so, void *arg, int waitflag)
5403 {
5404 struct nfsrv_sock *slp = arg;
5405
5406 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID))
5407 return;
5408
5409 lck_rw_lock_exclusive(&slp->ns_rwlock);
5410 nfsrv_rcv_locked(so, slp, waitflag);
5411 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
5412 }
5413 void
5414 nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
5415 {
5416 mbuf_t m, mp, mhck, m2;
5417 int ns_flag=0, error;
5418 struct msghdr msg;
5419 size_t bytes_read;
5420
5421 if ((slp->ns_flag & SLP_VALID) == 0) {
5422 if (waitflag == MBUF_DONTWAIT)
5423 lck_rw_done(&slp->ns_rwlock);
5424 return;
5425 }
5426
5427 #ifdef notdef
5428 /*
5429 * Define this to test for nfsds handling this under heavy load.
5430 */
5431 if (waitflag == MBUF_DONTWAIT) {
5432 ns_flag = SLP_NEEDQ;
5433 goto dorecs;
5434 }
5435 #endif
5436 if (slp->ns_sotype == SOCK_STREAM) {
5437 /*
5438 * If there are already records on the queue, defer soreceive()
5439 * to an(other) nfsd so that there is feedback to the TCP layer that
5440 * the nfs servers are heavily loaded.
5441 */
5442 if (slp->ns_rec) {
5443 ns_flag = SLP_NEEDQ;
5444 goto dorecs;
5445 }
5446
5447 /*
5448 * Do soreceive().
5449 */
5450 bytes_read = 1000000000;
5451 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
5452 if (error || mp == NULL) {
5453 if (error == EWOULDBLOCK)
5454 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
5455 else
5456 ns_flag = SLP_DISCONN;
5457 goto dorecs;
5458 }
5459 m = mp;
5460 if (slp->ns_rawend) {
5461 if ((error = mbuf_setnext(slp->ns_rawend, m)))
5462 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
5463 slp->ns_cc += bytes_read;
5464 } else {
5465 slp->ns_raw = m;
5466 slp->ns_cc = bytes_read;
5467 }
5468 while ((m2 = mbuf_next(m)))
5469 m = m2;
5470 slp->ns_rawend = m;
5471
5472 /*
5473 * Now try and parse record(s) out of the raw stream data.
5474 */
5475 error = nfsrv_getstream(slp, waitflag);
5476 if (error) {
5477 if (error == EPERM)
5478 ns_flag = SLP_DISCONN;
5479 else
5480 ns_flag = SLP_NEEDQ;
5481 }
5482 } else {
5483 struct sockaddr_storage nam;
5484
5485 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
5486 /* already have max # RPC records queued on this socket */
5487 ns_flag = SLP_NEEDQ;
5488 goto dorecs;
5489 }
5490
5491 bzero(&msg, sizeof(msg));
5492 msg.msg_name = (caddr_t)&nam;
5493 msg.msg_namelen = sizeof(nam);
5494
5495 do {
5496 bytes_read = 1000000000;
5497 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
5498 if (mp) {
5499 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
5500 mbuf_setlen(mhck, nam.ss_len);
5501 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
5502 m = mhck;
5503 if (mbuf_setnext(m, mp)) {
5504 /* trouble... just drop it */
5505 printf("nfsrv_rcv: mbuf_setnext failed\n");
5506 mbuf_free(mhck);
5507 m = mp;
5508 }
5509 } else {
5510 m = mp;
5511 }
5512 if (slp->ns_recend)
5513 mbuf_setnextpkt(slp->ns_recend, m);
5514 else {
5515 slp->ns_rec = m;
5516 slp->ns_flag |= SLP_DOREC;
5517 }
5518 slp->ns_recend = m;
5519 mbuf_setnextpkt(m, NULL);
5520 slp->ns_reccnt++;
5521 }
5522 } while (mp);
5523 }
5524
5525 /*
5526 * Now try and process the request records, non-blocking.
5527 */
5528 dorecs:
5529 if (ns_flag)
5530 slp->ns_flag |= ns_flag;
5531 if (waitflag == MBUF_DONTWAIT) {
5532 int wake = (slp->ns_flag & SLP_WORKTODO);
5533 lck_rw_done(&slp->ns_rwlock);
5534 if (wake && nfsd_thread_count) {
5535 lck_mtx_lock(nfsd_mutex);
5536 nfsrv_wakenfsd(slp);
5537 lck_mtx_unlock(nfsd_mutex);
5538 }
5539 }
5540 }
5541
5542 /*
5543 * Try and extract an RPC request from the mbuf data list received on a
5544 * stream socket. The "waitflag" argument indicates whether or not it
5545 * can sleep.
5546 */
5547 int
5548 nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
5549 {
5550 mbuf_t m;
5551 char *cp1, *cp2, *mdata;
5552 int len, mlen, error;
5553 mbuf_t om, m2, recm;
5554 u_int32_t recmark;
5555
5556 if (slp->ns_flag & SLP_GETSTREAM)
5557 panic("nfs getstream");
5558 slp->ns_flag |= SLP_GETSTREAM;
5559 for (;;) {
5560 if (slp->ns_reclen == 0) {
5561 if (slp->ns_cc < NFSX_UNSIGNED) {
5562 slp->ns_flag &= ~SLP_GETSTREAM;
5563 return (0);
5564 }
5565 m = slp->ns_raw;
5566 mdata = mbuf_data(m);
5567 mlen = mbuf_len(m);
5568 if (mlen >= NFSX_UNSIGNED) {
5569 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
5570 mdata += NFSX_UNSIGNED;
5571 mlen -= NFSX_UNSIGNED;
5572 mbuf_setdata(m, mdata, mlen);
5573 } else {
5574 cp1 = (caddr_t)&recmark;
5575 cp2 = mdata;
5576 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
5577 while (mlen == 0) {
5578 m = mbuf_next(m);
5579 cp2 = mbuf_data(m);
5580 mlen = mbuf_len(m);
5581 }
5582 *cp1++ = *cp2++;
5583 mlen--;
5584 mbuf_setdata(m, cp2, mlen);
5585 }
5586 }
5587 slp->ns_cc -= NFSX_UNSIGNED;
5588 recmark = ntohl(recmark);
5589 slp->ns_reclen = recmark & ~0x80000000;
5590 if (recmark & 0x80000000)
5591 slp->ns_flag |= SLP_LASTFRAG;
5592 else
5593 slp->ns_flag &= ~SLP_LASTFRAG;
5594 if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) {
5595 slp->ns_flag &= ~SLP_GETSTREAM;
5596 return (EPERM);
5597 }
5598 }
5599
5600 /*
5601 * Now get the record part.
5602 *
5603 * Note that slp->ns_reclen may be 0. Linux sometimes
5604 * generates 0-length RPCs
5605 */
5606 recm = NULL;
5607 if (slp->ns_cc == slp->ns_reclen) {
5608 recm = slp->ns_raw;
5609 slp->ns_raw = slp->ns_rawend = NULL;
5610 slp->ns_cc = slp->ns_reclen = 0;
5611 } else if (slp->ns_cc > slp->ns_reclen) {
5612 len = 0;
5613 m = slp->ns_raw;
5614 mlen = mbuf_len(m);
5615 mdata = mbuf_data(m);
5616 om = NULL;
5617 while (len < slp->ns_reclen) {
5618 if ((len + mlen) > slp->ns_reclen) {
5619 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
5620 slp->ns_flag &= ~SLP_GETSTREAM;
5621 return (EWOULDBLOCK);
5622 }
5623 if (om) {
5624 if (mbuf_setnext(om, m2)) {
5625 /* trouble... just drop it */
5626 printf("nfsrv_getstream: mbuf_setnext failed\n");
5627 mbuf_freem(m2);
5628 slp->ns_flag &= ~SLP_GETSTREAM;
5629 return (EWOULDBLOCK);
5630 }
5631 recm = slp->ns_raw;
5632 } else {
5633 recm = m2;
5634 }
5635 mdata += slp->ns_reclen - len;
5636 mlen -= slp->ns_reclen - len;
5637 mbuf_setdata(m, mdata, mlen);
5638 len = slp->ns_reclen;
5639 } else if ((len + mlen) == slp->ns_reclen) {
5640 om = m;
5641 len += mlen;
5642 m = mbuf_next(m);
5643 recm = slp->ns_raw;
5644 if (mbuf_setnext(om, NULL)) {
5645 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
5646 slp->ns_flag &= ~SLP_GETSTREAM;
5647 return (EWOULDBLOCK);
5648 }
5649 mlen = mbuf_len(m);
5650 mdata = mbuf_data(m);
5651 } else {
5652 om = m;
5653 len += mlen;
5654 m = mbuf_next(m);
5655 mlen = mbuf_len(m);
5656 mdata = mbuf_data(m);
5657 }
5658 }
5659 slp->ns_raw = m;
5660 slp->ns_cc -= len;
5661 slp->ns_reclen = 0;
5662 } else {
5663 slp->ns_flag &= ~SLP_GETSTREAM;
5664 return (0);
5665 }
5666
5667 /*
5668 * Accumulate the fragments into a record.
5669 */
5670 if (slp->ns_frag == NULL) {
5671 slp->ns_frag = recm;
5672 } else {
5673 m = slp->ns_frag;
5674 while ((m2 = mbuf_next(m)))
5675 m = m2;
5676 if ((error = mbuf_setnext(m, recm)))
5677 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
5678 }
5679 if (slp->ns_flag & SLP_LASTFRAG) {
5680 if (slp->ns_recend)
5681 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
5682 else {
5683 slp->ns_rec = slp->ns_frag;
5684 slp->ns_flag |= SLP_DOREC;
5685 }
5686 slp->ns_recend = slp->ns_frag;
5687 slp->ns_frag = NULL;
5688 }
5689 }
5690 }
5691
5692 /*
5693 * Parse an RPC header.
5694 */
5695 int
5696 nfsrv_dorec(
5697 struct nfsrv_sock *slp,
5698 struct nfsd *nfsd,
5699 struct nfsrv_descript **ndp)
5700 {
5701 mbuf_t m;
5702 mbuf_t nam;
5703 struct nfsrv_descript *nd;
5704 int error = 0;
5705
5706 *ndp = NULL;
5707 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL))
5708 return (ENOBUFS);
5709 MALLOC_ZONE(nd, struct nfsrv_descript *,
5710 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
5711 if (!nd)
5712 return (ENOMEM);
5713 m = slp->ns_rec;
5714 slp->ns_rec = mbuf_nextpkt(m);
5715 if (slp->ns_rec)
5716 mbuf_setnextpkt(m, NULL);
5717 else {
5718 slp->ns_flag &= ~SLP_DOREC;
5719 slp->ns_recend = NULL;
5720 }
5721 slp->ns_reccnt--;
5722 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
5723 nam = m;
5724 m = mbuf_next(m);
5725 if ((error = mbuf_setnext(nam, NULL)))
5726 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
5727 } else
5728 nam = NULL;
5729 nd->nd_nam2 = nam;
5730 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
5731 if (!error)
5732 error = nfsrv_getreq(nd);
5733 if (error) {
5734 if (nam)
5735 mbuf_freem(nam);
5736 if (nd->nd_gss_context)
5737 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
5738 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
5739 return (error);
5740 }
5741 nd->nd_mrep = NULL;
5742 *ndp = nd;
5743 nfsd->nfsd_nd = nd;
5744 return (0);
5745 }
5746
5747 /*
5748 * Parse an RPC request
5749 * - verify it
5750 * - fill in the cred struct.
5751 */
5752 int
5753 nfsrv_getreq(struct nfsrv_descript *nd)
5754 {
5755 struct nfsm_chain *nmreq;
5756 int len, i;
5757 u_int32_t nfsvers, auth_type;
5758 int error = 0;
5759 uid_t user_id;
5760 gid_t group_id;
5761 int ngroups;
5762 uint32_t val;
5763
5764 nd->nd_cr = NULL;
5765 nd->nd_gss_context = NULL;
5766 nd->nd_gss_seqnum = 0;
5767 nd->nd_gss_mb = NULL;
5768
5769 user_id = group_id = -2;
5770 val = auth_type = len = 0;
5771
5772 nmreq = &nd->nd_nmreq;
5773 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
5774 nfsm_chain_get_32(error, nmreq, val); // RPC Call
5775 if (!error && (val != RPC_CALL))
5776 error = EBADRPC;
5777 nfsmout_if(error);
5778 nd->nd_repstat = 0;
5779 nfsm_chain_get_32(error, nmreq, val); // RPC Version
5780 nfsmout_if(error);
5781 if (val != RPC_VER2) {
5782 nd->nd_repstat = ERPCMISMATCH;
5783 nd->nd_procnum = NFSPROC_NOOP;
5784 return (0);
5785 }
5786 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number
5787 nfsmout_if(error);
5788 if (val != NFS_PROG) {
5789 nd->nd_repstat = EPROGUNAVAIL;
5790 nd->nd_procnum = NFSPROC_NOOP;
5791 return (0);
5792 }
5793 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
5794 nfsmout_if(error);
5795 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
5796 nd->nd_repstat = EPROGMISMATCH;
5797 nd->nd_procnum = NFSPROC_NOOP;
5798 return (0);
5799 }
5800 nd->nd_vers = nfsvers;
5801 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
5802 nfsmout_if(error);
5803 if ((nd->nd_procnum >= NFS_NPROCS) ||
5804 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
5805 nd->nd_repstat = EPROCUNAVAIL;
5806 nd->nd_procnum = NFSPROC_NOOP;
5807 return (0);
5808 }
5809 if (nfsvers != NFS_VER3)
5810 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
5811 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor
5812 nfsm_chain_get_32(error, nmreq, len); // Auth Length
5813 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ))
5814 error = EBADRPC;
5815 nfsmout_if(error);
5816
5817 /* Handle authentication */
5818 if (auth_type == RPCAUTH_SYS) {
5819 struct posix_cred temp_pcred;
5820 if (nd->nd_procnum == NFSPROC_NULL)
5821 return (0);
5822 nd->nd_sec = RPCAUTH_SYS;
5823 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp
5824 nfsm_chain_get_32(error, nmreq, len); // hostname length
5825 if (len < 0 || len > NFS_MAXNAMLEN)
5826 error = EBADRPC;
5827 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname
5828 nfsmout_if(error);
5829
5830 /* create a temporary credential using the bits from the wire */
5831 bzero(&temp_pcred, sizeof(temp_pcred));
5832 nfsm_chain_get_32(error, nmreq, user_id);
5833 nfsm_chain_get_32(error, nmreq, group_id);
5834 temp_pcred.cr_groups[0] = group_id;
5835 nfsm_chain_get_32(error, nmreq, len); // extra GID count
5836 if ((len < 0) || (len > RPCAUTH_UNIXGIDS))
5837 error = EBADRPC;
5838 nfsmout_if(error);
5839 for (i = 1; i <= len; i++)
5840 if (i < NGROUPS)
5841 nfsm_chain_get_32(error, nmreq, temp_pcred.cr_groups[i]);
5842 else
5843 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
5844 nfsmout_if(error);
5845 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
5846 if (ngroups > 1)
5847 nfsrv_group_sort(&temp_pcred.cr_groups[0], ngroups);
5848 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
5849 nfsm_chain_get_32(error, nmreq, len); // verifier length
5850 if (len < 0 || len > RPCAUTH_MAXSIZ)
5851 error = EBADRPC;
5852 if (len > 0)
5853 nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
5854
5855 /* request creation of a real credential */
5856 temp_pcred.cr_uid = user_id;
5857 temp_pcred.cr_ngroups = ngroups;
5858 nd->nd_cr = posix_cred_create(&temp_pcred);
5859 if (nd->nd_cr == NULL) {
5860 nd->nd_repstat = ENOMEM;
5861 nd->nd_procnum = NFSPROC_NOOP;
5862 return (0);
5863 }
5864 } else if (auth_type == RPCSEC_GSS) {
5865 error = nfs_gss_svc_cred_get(nd, nmreq);
5866 if (error) {
5867 if (error == EINVAL)
5868 goto nfsmout; // drop the request
5869 nd->nd_repstat = error;
5870 nd->nd_procnum = NFSPROC_NOOP;
5871 return (0);
5872 }
5873 } else {
5874 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE
5875 return (0);
5876 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
5877 nd->nd_procnum = NFSPROC_NOOP;
5878 return (0);
5879 }
5880 return (0);
5881 nfsmout:
5882 if (IS_VALID_CRED(nd->nd_cr))
5883 kauth_cred_unref(&nd->nd_cr);
5884 nfsm_chain_cleanup(nmreq);
5885 return (error);
5886 }
5887
5888 /*
5889 * Search for a sleeping nfsd and wake it up.
5890 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
5891 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
5892 * Note: Must be called with nfsd_mutex held.
5893 */
5894 void
5895 nfsrv_wakenfsd(struct nfsrv_sock *slp)
5896 {
5897 struct nfsd *nd;
5898
5899 if ((slp->ns_flag & SLP_VALID) == 0)
5900 return;
5901
5902 lck_rw_lock_exclusive(&slp->ns_rwlock);
5903 /* if there's work to do on this socket, make sure it's queued up */
5904 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
5905 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
5906 slp->ns_flag |= SLP_WAITQ;
5907 }
5908 lck_rw_done(&slp->ns_rwlock);
5909
5910 /* wake up a waiting nfsd, if possible */
5911 nd = TAILQ_FIRST(&nfsd_queue);
5912 if (!nd)
5913 return;
5914
5915 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
5916 nd->nfsd_flag &= ~NFSD_WAITING;
5917 wakeup(nd);
5918 }
5919
5920 #endif /* NFSSERVER */
5921