]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66 */
67
68 /*
69 * Socket operations for use by nfs
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/proc.h>
75 #include <sys/kauth.h>
76 #include <sys/mount_internal.h>
77 #include <sys/kernel.h>
78 #include <sys/kpi_mbuf.h>
79 #include <sys/malloc.h>
80 #include <sys/vnode.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/socket.h>
84 #include <sys/syslog.h>
85 #include <sys/tprintf.h>
86 #include <libkern/OSAtomic.h>
87
88 #include <sys/time.h>
89 #include <kern/clock.h>
90 #include <kern/task.h>
91 #include <kern/thread.h>
92 #include <kern/thread_call.h>
93 #include <sys/user.h>
94
95 #include <netinet/in.h>
96 #include <netinet/tcp.h>
97
98 #include <nfs/rpcv2.h>
99 #include <nfs/nfsproto.h>
100 #include <nfs/nfs.h>
101 #include <nfs/xdr_subs.h>
102 #include <nfs/nfsm_subs.h>
103 #include <nfs/nfs_gss.h>
104 #include <nfs/nfsmount.h>
105 #include <nfs/nfsnode.h>
106
107 /* XXX */
108 boolean_t current_thread_aborted(void);
109 kern_return_t thread_terminate(thread_t);
110
111
112 #if NFSSERVER
113 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
114
115 int nfsrv_getstream(struct nfsrv_sock *,int);
116 int nfsrv_getreq(struct nfsrv_descript *);
117 extern int nfsv3_procid[NFS_NPROCS];
118 #endif /* NFSSERVER */
119
120 #if NFSCLIENT
121
122 int nfs_reconnect(struct nfsmount *);
123 int nfs_connect_setup(struct nfsmount *);
124 void nfs_mount_sock_thread(void *, wait_result_t);
125 void nfs_udp_rcv(socket_t, void*, int);
126 void nfs_tcp_rcv(socket_t, void*, int);
127 void nfs_sock_poke(struct nfsmount *);
128 void nfs_request_match_reply(struct nfsmount *, mbuf_t);
129 void nfs_reqdequeue(struct nfsreq *);
130 void nfs_reqbusy(struct nfsreq *);
131 struct nfsreq *nfs_reqnext(struct nfsreq *);
132 int nfs_wait_reply(struct nfsreq *);
133 void nfs_softterm(struct nfsreq *);
134
135 #ifdef NFS_SOCKET_DEBUGGING
136 #define NFS_SOCK_DBG(X) printf X
137 #else
138 #define NFS_SOCK_DBG(X)
139 #endif
140
141 /*
142 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
143 * Use the mean and mean deviation of rtt for the appropriate type of rpc
144 * for the frequent rpcs and a default for the others.
145 * The justification for doing "other" this way is that these rpcs
146 * happen so infrequently that timer est. would probably be stale.
147 * Also, since many of these rpcs are
148 * non-idempotent, a conservative timeout is desired.
149 * getattr, lookup - A+2D
150 * read, write - A+4D
151 * other - nm_timeo
152 */
153 #define NFS_RTO(n, t) \
154 ((t) == 0 ? (n)->nm_timeo : \
155 ((t) < 3 ? \
156 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
157 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
158 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
159 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
160
161 /*
162 * Defines which timer to use for the procnum.
163 * 0 - default
164 * 1 - getattr
165 * 2 - lookup
166 * 3 - read
167 * 4 - write
168 */
169 static int proct[NFS_NPROCS] = {
170 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
171 };
172
173 /*
174 * There is a congestion window for outstanding rpcs maintained per mount
175 * point. The cwnd size is adjusted in roughly the way that:
176 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
177 * SIGCOMM '88". ACM, August 1988.
178 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
179 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
180 * of rpcs is in progress.
181 * (The sent count and cwnd are scaled for integer arith.)
182 * Variants of "slow start" were tried and were found to be too much of a
183 * performance hit (ave. rtt 3 times larger),
184 * I suspect due to the large rtt that nfs rpcs have.
185 */
186 #define NFS_CWNDSCALE 256
187 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
188 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
189
190 /*
191 * Initialize socket state and perform setup for a new NFS connection.
192 */
193 int
194 nfs_connect(struct nfsmount *nmp, int verbose)
195 {
196 socket_t so;
197 int error, on = 1, proto;
198 sock_upcall upcall;
199 struct sockaddr *saddr;
200 struct sockaddr_in sin;
201 struct timeval timeo;
202
203 lck_mtx_lock(&nmp->nm_lock);
204 nmp->nm_sockflags |= NMSOCK_CONNECTING;
205 saddr = mbuf_data(nmp->nm_nam);
206 upcall = (nmp->nm_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
207 lck_mtx_unlock(&nmp->nm_lock);
208 error = sock_socket(saddr->sa_family, nmp->nm_sotype,
209 nmp->nm_soproto, upcall, nmp, &nmp->nm_so);
210 if (error)
211 goto bad;
212 lck_mtx_lock(&nmp->nm_lock);
213 so = nmp->nm_so;
214
215 /*
216 * Some servers require that the client port be a reserved port number.
217 */
218 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
219 int portrange = IP_PORTRANGE_LOW;
220 error = sock_setsockopt(so, IPPROTO_IP, IP_PORTRANGE, &portrange, sizeof(portrange));
221 if (!error) { /* bind now to check for failure */
222 sin.sin_len = sizeof (struct sockaddr_in);
223 sin.sin_family = AF_INET;
224 sin.sin_addr.s_addr = INADDR_ANY;
225 sin.sin_port = 0;
226 error = sock_bind(so, (struct sockaddr *) &sin);
227 }
228 if (error) {
229 lck_mtx_unlock(&nmp->nm_lock);
230 goto bad;
231 }
232 }
233
234 /*
235 * Protocols that do not require connections may be optionally left
236 * unconnected for servers that reply from a different address/port.
237 */
238 if (nmp->nm_flag & NFSMNT_NOCONN) {
239 if (nmp->nm_sotype == SOCK_STREAM) {
240 error = ENOTCONN;
241 lck_mtx_unlock(&nmp->nm_lock);
242 goto bad;
243 }
244 } else {
245 int tocnt = 0, optlen = sizeof(error);
246 struct timespec ts = { 1, 0 };
247
248 lck_mtx_unlock(&nmp->nm_lock);
249 error = sock_connect(so, mbuf_data(nmp->nm_nam), MSG_DONTWAIT);
250 if (error && (error != EINPROGRESS))
251 goto bad;
252 lck_mtx_lock(&nmp->nm_lock);
253 while (!sock_isconnected(so)) {
254 nfs_mount_check_dead_timeout(nmp);
255 if ((tocnt++ == 30) && verbose) /* log a warning if connect is taking a while */
256 log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n",
257 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
258 /* check for error on socket */
259 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &error, &optlen);
260 if (error) {
261 if (verbose)
262 log(LOG_INFO, "nfs_connect: socket error %d for %s\n",
263 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
264 break;
265 }
266 /* abort if this is taking too long or we're unmounting */
267 if ((tocnt > 120) || (nmp->nm_sockflags & NMSOCK_UNMOUNT)) {
268 error = ENOTCONN;
269 break;
270 }
271 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
272 break;
273 msleep(&nmp->nm_so, &nmp->nm_lock, PSOCK, "nfs_socket_connect", &ts);
274 }
275 if ((tocnt > 30) && verbose)
276 log(LOG_INFO, "nfs_connect: socket connect %s for %s\n",
277 error ? "aborted" : "completed",
278 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
279 if (error) {
280 lck_mtx_unlock(&nmp->nm_lock);
281 goto bad;
282 }
283 }
284
285 /*
286 * Set socket send/receive timeouts
287 * - Receive timeout shouldn't matter because all receives are performed
288 * in the socket upcall non-blocking.
289 * - Send timeout should allow us to react to a blocked socket.
290 * Soft mounts will want to abort sooner.
291 */
292 timeo.tv_usec = 0;
293 timeo.tv_sec = (nmp->nm_flag & NFSMNT_SOFT) ? 10 : 60;
294 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
295 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
296 if (error) {
297 log(LOG_INFO, "nfs_connect: socket timeout setting errors for %s\n",
298 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
299 error = 0;
300 }
301
302 if (nmp->nm_sotype == SOCK_STREAM) {
303 /* Assume that SOCK_STREAM always requires a connection */
304 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
305 /* set nodelay for TCP */
306 sock_gettype(so, NULL, NULL, &proto);
307 if (proto == IPPROTO_TCP)
308 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
309 }
310
311 if (nmp->nm_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
312 int reserve = NFS_UDPSOCKBUF;
313 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
314 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
315 if (error) {
316 log(LOG_INFO, "nfs_connect: socket buffer setting errors for %s\n",
317 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
318 error = 0;
319 }
320 }
321
322 /* set SO_NOADDRERR to detect network changes ASAP */
323 error = sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
324 if (error) {
325 lck_mtx_unlock(&nmp->nm_lock);
326 goto bad;
327 }
328 /* just playin' it safe */
329 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
330
331 if (!(nmp->nm_flag & NFSMNT_INT))
332 sock_nointerrupt(so, 1);
333
334 /* Initialize socket state variables */
335 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
336 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
337 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
338 nmp->nm_sdrtt[3] = 0;
339 if (nmp->nm_sotype == SOCK_DGRAM) {
340 /* XXX do we really want to reset this on each reconnect? */
341 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
342 nmp->nm_sent = 0;
343 } else if (nmp->nm_sotype == SOCK_STREAM) {
344 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
345 nmp->nm_fragleft = nmp->nm_reclen = 0;
346 nmp->nm_timeouts = 0;
347 }
348 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
349 nmp->nm_sockflags |= NMSOCK_SETUP;
350 FSDBG(529, nmp, nmp->nm_state, nmp->nm_flag, nmp->nm_cwnd);
351 lck_mtx_unlock(&nmp->nm_lock);
352 error = nfs_connect_setup(nmp);
353 bad:
354 lck_mtx_lock(&nmp->nm_lock);
355 nmp->nm_sockflags &= ~(NMSOCK_CONNECTING|NMSOCK_SETUP);
356 if (!error) {
357 nmp->nm_sockflags |= NMSOCK_READY;
358 wakeup(&nmp->nm_sockflags);
359 }
360 lck_mtx_unlock(&nmp->nm_lock);
361 return (error);
362 }
363
364 /* setup & confirm socket connection is functional */
365 int
366 nfs_connect_setup(struct nfsmount *nmp)
367 {
368 struct nfsm_chain nmreq, nmrep;
369 int error = 0, status;
370 u_int64_t xid;
371
372 if (nmp->nm_vers >= NFS_VER4) {
373 error = nfs4_setclientid(nmp);
374 if (error)
375 return (error);
376 error = nfs4_renew(nmp, R_SETUP);
377 if ((error == NFSERR_ADMIN_REVOKED) ||
378 (error == NFSERR_EXPIRED) ||
379 (error == NFSERR_LEASE_MOVED) ||
380 (error == NFSERR_STALE_CLIENTID)) {
381 lck_mtx_lock(&nmp->nm_lock);
382 nmp->nm_state |= NFSSTA_RECOVER;
383 lck_mtx_unlock(&nmp->nm_lock);
384 }
385 } else {
386 /* verify connection's OK by sending a NULL request */
387 nfsm_chain_null(&nmreq);
388 nfsm_chain_null(&nmrep);
389 nfsm_chain_build_alloc_init(error, &nmreq, 0);
390 nfsm_chain_build_done(error, &nmreq);
391 nfsmout_if(error);
392 error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC_NULL,
393 current_thread(), NULL, R_SETUP, &nmrep, &xid, &status);
394 if (!error)
395 error = status;
396 nfsmout:
397 nfsm_chain_cleanup(&nmreq);
398 nfsm_chain_cleanup(&nmrep);
399 }
400 return (error);
401 }
402
403 /*
404 * NFS socket reconnect routine:
405 * Called when a connection is broken.
406 * - disconnect the old socket
407 * - nfs_connect() again
408 * - set R_MUSTRESEND for all outstanding requests on mount point
409 * If this fails the mount point is DEAD!
410 */
411 int
412 nfs_reconnect(struct nfsmount *nmp)
413 {
414 struct nfsreq *rq;
415 struct timeval now;
416 thread_t thd = current_thread();
417 int error, wentdown = 0, verbose = 1;
418 time_t lastmsg;
419
420 microuptime(&now);
421 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay);
422
423 nfs_disconnect(nmp);
424
425 while ((error = nfs_connect(nmp, verbose))) {
426 verbose = 0;
427 nfs_disconnect(nmp);
428 if (error == EINTR || error == ERESTART)
429 return (EINTR);
430 if (error == EIO)
431 return (EIO);
432 microuptime(&now);
433 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) {
434 lastmsg = now.tv_sec;
435 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect");
436 wentdown = 1;
437 }
438 lck_mtx_lock(&nmp->nm_lock);
439 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
440 /* we're not yet completely mounted and */
441 /* we can't reconnect, so we fail */
442 lck_mtx_unlock(&nmp->nm_lock);
443 return (error);
444 }
445 nfs_mount_check_dead_timeout(nmp);
446 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) {
447 lck_mtx_unlock(&nmp->nm_lock);
448 return (error);
449 }
450 lck_mtx_unlock(&nmp->nm_lock);
451 tsleep(&lbolt, PSOCK, "nfs_reconnect_delay", 0);
452 if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
453 return (error);
454 }
455
456 if (wentdown)
457 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected");
458
459 /*
460 * Loop through outstanding request list and mark all requests
461 * as needing a resend. (Though nfs_need_reconnect() probably
462 * marked them all already.)
463 */
464 lck_mtx_lock(nfs_request_mutex);
465 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
466 if (rq->r_nmp == nmp) {
467 lck_mtx_lock(&rq->r_mtx);
468 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
469 rq->r_flags |= R_MUSTRESEND;
470 rq->r_rtt = -1;
471 wakeup(rq);
472 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
473 nfs_asyncio_resend(rq);
474 }
475 lck_mtx_unlock(&rq->r_mtx);
476 }
477 }
478 lck_mtx_unlock(nfs_request_mutex);
479 return (0);
480 }
481
482 /*
483 * NFS disconnect. Clean up and unlink.
484 */
485 void
486 nfs_disconnect(struct nfsmount *nmp)
487 {
488 socket_t so;
489
490 lck_mtx_lock(&nmp->nm_lock);
491 if ((nmp->nm_sotype == SOCK_STREAM) && nmp->nm_m) {
492 mbuf_freem(nmp->nm_m);
493 nmp->nm_m = nmp->nm_mlast = NULL;
494 }
495 if (nmp->nm_so) {
496 so = nmp->nm_so;
497 nmp->nm_so = NULL;
498 lck_mtx_unlock(&nmp->nm_lock);
499 sock_shutdown(so, SHUT_RDWR);
500 sock_close(so);
501 } else {
502 lck_mtx_unlock(&nmp->nm_lock);
503 }
504 }
505
506 /*
507 * mark an NFS mount as needing a reconnect/resends.
508 */
509 void
510 nfs_need_reconnect(struct nfsmount *nmp)
511 {
512 struct nfsreq *rq;
513
514 lck_mtx_lock(&nmp->nm_lock);
515 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP);
516 lck_mtx_unlock(&nmp->nm_lock);
517
518 /*
519 * Loop through outstanding request list and
520 * mark all requests as needing a resend.
521 */
522 lck_mtx_lock(nfs_request_mutex);
523 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
524 if (rq->r_nmp == nmp) {
525 lck_mtx_lock(&rq->r_mtx);
526 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
527 rq->r_flags |= R_MUSTRESEND;
528 rq->r_rtt = -1;
529 wakeup(rq);
530 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
531 nfs_asyncio_resend(rq);
532 }
533 lck_mtx_unlock(&rq->r_mtx);
534 }
535 }
536 lck_mtx_unlock(nfs_request_mutex);
537 }
538
539 /*
540 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
541 */
542 void
543 nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
544 {
545 struct nfsmount *nmp = arg;
546 struct timespec ts = { 30, 0 };
547 thread_t thd = current_thread();
548 struct nfsreq *req;
549 struct timeval now;
550 int error, dofinish, force;
551 nfsnode_t np;
552 fhandle_t fh;
553 nfs_stateid dstateid;
554
555 lck_mtx_lock(&nmp->nm_lock);
556
557 while (!(nmp->nm_sockflags & NMSOCK_READY) ||
558 !TAILQ_EMPTY(&nmp->nm_resendq) ||
559 nmp->nm_deadto_start ||
560 ((nmp->nm_vers >= NFS_VER4) &&
561 ((nmp->nm_state & NFSSTA_RECOVER) || !TAILQ_EMPTY(&nmp->nm_recallq))))
562 {
563 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
564 break;
565 force = (nmp->nm_state & NFSSTA_FORCE);
566 /* do reconnect, if necessary */
567 if (!(nmp->nm_sockflags & NMSOCK_READY) && !force) {
568 if (nmp->nm_reconnect_start <= 0) {
569 microuptime(&now);
570 nmp->nm_reconnect_start = now.tv_sec;
571 }
572 lck_mtx_unlock(&nmp->nm_lock);
573 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname));
574 if (nfs_reconnect(nmp) == 0)
575 nmp->nm_reconnect_start = 0;
576 lck_mtx_lock(&nmp->nm_lock);
577 }
578 if ((nmp->nm_sockflags & NMSOCK_READY) &&
579 (nmp->nm_state & NFSSTA_RECOVER) &&
580 !(nmp->nm_sockflags & NMSOCK_UNMOUNT) && !force) {
581 /* perform state recovery */
582 lck_mtx_unlock(&nmp->nm_lock);
583 nfs4_recover(nmp);
584 lck_mtx_lock(&nmp->nm_lock);
585 }
586 /* handle NFSv4 delegation recalls */
587 while ((nmp->nm_vers >= NFS_VER4) && !force &&
588 (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) &&
589 ((np = TAILQ_FIRST(&nmp->nm_recallq)))) {
590 TAILQ_REMOVE(&nmp->nm_recallq, np, n_dlink);
591 np->n_dlink.tqe_next = NFSNOLIST;
592 lck_mtx_unlock(&nmp->nm_lock);
593 lck_mtx_lock(&np->n_openlock);
594 dstateid = np->n_dstateid;
595 if (np->n_openflags & N_DELEG_MASK) {
596 fh.fh_len = np->n_fhsize;
597 bcopy(np->n_fhp, &fh.fh_data, fh.fh_len);
598 np->n_openflags &= ~N_DELEG_MASK;
599 lck_mtx_unlock(&np->n_openlock);
600 nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, thd, nmp->nm_mcred);
601 } else {
602 lck_mtx_unlock(&np->n_openlock);
603 }
604 lck_mtx_lock(&nmp->nm_lock);
605 }
606 /* do resends, if necessary/possible */
607 while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || force) &&
608 ((req = TAILQ_FIRST(&nmp->nm_resendq)))) {
609 if (req->r_resendtime)
610 microuptime(&now);
611 while (req && !force && req->r_resendtime && (now.tv_sec < req->r_resendtime))
612 req = TAILQ_NEXT(req, r_rchain);
613 if (!req)
614 break;
615 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
616 req->r_rchain.tqe_next = NFSREQNOLIST;
617 lck_mtx_unlock(&nmp->nm_lock);
618 lck_mtx_lock(&req->r_mtx);
619 if (req->r_error || req->r_nmrep.nmc_mhead) {
620 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
621 req->r_flags &= ~R_RESENDQ;
622 wakeup(req);
623 lck_mtx_unlock(&req->r_mtx);
624 if (dofinish)
625 nfs_asyncio_finish(req);
626 lck_mtx_lock(&nmp->nm_lock);
627 continue;
628 }
629 if ((req->r_flags & R_RESTART) || req->r_gss_ctx) {
630 req->r_flags &= ~R_RESTART;
631 req->r_resendtime = 0;
632 lck_mtx_unlock(&req->r_mtx);
633 /* async RPCs on GSS mounts need to be rebuilt and resent. */
634 nfs_reqdequeue(req);
635 if (req->r_gss_ctx) {
636 nfs_gss_clnt_rpcdone(req);
637 error = nfs_gss_clnt_args_restore(req);
638 if (error == ENEEDAUTH)
639 req->r_xid = 0;
640 }
641 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
642 req->r_gss_ctx ? " gss" : "", req->r_procnum, req->r_xid,
643 req->r_flags, req->r_rtt));
644 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
645 if (!error)
646 error = nfs_sigintr(nmp, req, req->r_thread, 0);
647 if (!error)
648 error = nfs_request_add_header(req);
649 if (!error)
650 error = nfs_request_send(req, 0);
651 lck_mtx_lock(&req->r_mtx);
652 if (req->r_flags & R_RESENDQ)
653 req->r_flags &= ~R_RESENDQ;
654 if (error)
655 req->r_error = error;
656 wakeup(req);
657 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
658 lck_mtx_unlock(&req->r_mtx);
659 if (dofinish)
660 nfs_asyncio_finish(req);
661 lck_mtx_lock(&nmp->nm_lock);
662 error = 0;
663 continue;
664 }
665 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
666 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
667 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
668 if (!error)
669 error = nfs_sigintr(nmp, req, req->r_thread, 0);
670 if (!error) {
671 req->r_flags |= R_SENDING;
672 lck_mtx_unlock(&req->r_mtx);
673 error = nfs_send(req, 0);
674 lck_mtx_lock(&req->r_mtx);
675 if (!error) {
676 if (req->r_flags & R_RESENDQ)
677 req->r_flags &= ~R_RESENDQ;
678 wakeup(req);
679 lck_mtx_unlock(&req->r_mtx);
680 lck_mtx_lock(&nmp->nm_lock);
681 continue;
682 }
683 }
684 req->r_error = error;
685 if (req->r_flags & R_RESENDQ)
686 req->r_flags &= ~R_RESENDQ;
687 wakeup(req);
688 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
689 lck_mtx_unlock(&req->r_mtx);
690 if (dofinish)
691 nfs_asyncio_finish(req);
692 lck_mtx_lock(&nmp->nm_lock);
693 }
694 if (nmp->nm_deadto_start)
695 nfs_mount_check_dead_timeout(nmp);
696 if (force || (nmp->nm_state & NFSSTA_DEAD))
697 break;
698 if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & NFSSTA_RECOVER)) {
699 if (nmp->nm_deadto_start || !TAILQ_EMPTY(&nmp->nm_resendq) ||
700 (nmp->nm_state & NFSSTA_RECOVER))
701 ts.tv_sec = 1;
702 else
703 ts.tv_sec = 30;
704 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts);
705 }
706 }
707
708 /* If we're unmounting, send the unmount RPC, if requested/appropriate. */
709 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) && (nmp->nm_flag & NFSMNT_CALLUMNT) &&
710 (nmp->nm_vers < NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) {
711 lck_mtx_unlock(&nmp->nm_lock);
712 nfs3_umount_rpc(nmp, vfs_context_kernel(),
713 (nmp->nm_sockflags & NMSOCK_READY) ? 6 : 2);
714 lck_mtx_lock(&nmp->nm_lock);
715 }
716
717 if (nmp->nm_sockthd == thd)
718 nmp->nm_sockthd = NULL;
719 lck_mtx_unlock(&nmp->nm_lock);
720 wakeup(&nmp->nm_sockthd);
721 thread_terminate(thd);
722 }
723
724 /* start or wake a mount's socket thread */
725 void
726 nfs_mount_sock_thread_wake(struct nfsmount *nmp)
727 {
728 if (nmp->nm_sockthd)
729 wakeup(&nmp->nm_sockthd);
730 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS)
731 thread_deallocate(nmp->nm_sockthd);
732 }
733
734 /*
735 * Check if we should mark the mount dead because the
736 * unresponsive mount has reached the dead timeout.
737 * (must be called with nmp locked)
738 */
739 void
740 nfs_mount_check_dead_timeout(struct nfsmount *nmp)
741 {
742 struct timeval now;
743
744 if (!(nmp->nm_flag & NFSMNT_DEADTIMEOUT))
745 return;
746 if (nmp->nm_deadto_start == 0)
747 return;
748 if (nmp->nm_state & NFSSTA_DEAD)
749 return;
750 microuptime(&now);
751 if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_deadtimeout)
752 return;
753 printf("nfs server %s: dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
754 nmp->nm_state |= NFSSTA_DEAD;
755 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0);
756 }
757
758 /*
759 * RPC record marker parsing state
760 */
761 struct nfs_rpc_record_state
762 {
763 uint16_t nrrs_lastfrag; /* last fragment of record */
764 uint16_t nrrs_markerleft; /* marker bytes remaining */
765 uint32_t nrrs_fragleft; /* fragment bytes remaining */
766 uint32_t nrrs_reclen; /* length of RPC record */
767 mbuf_t nrrs_m; /* mbufs for current record */
768 mbuf_t nrrs_mlast;
769 };
770 int nfs_rpc_record_read(socket_t, struct nfs_rpc_record_state *, int *, mbuf_t *);
771
772 /*
773 * NFS callback channel socket state
774 */
775 struct nfs_callback_socket
776 {
777 TAILQ_ENTRY(nfs_callback_socket) ncbs_link;
778 socket_t ncbs_so; /* the socket */
779 struct sockaddr_in ncbs_sin; /* socket address */
780 struct nfs_rpc_record_state ncbs_rrs; /* RPC record parsing state */
781 time_t ncbs_stamp; /* last accessed at */
782 uint32_t ncbs_flags; /* see below */
783 };
784 #define NCBSOCK_UPCALL 0x0001
785 #define NCBSOCK_UPCALLWANT 0x0002
786 #define NCBSOCK_DEAD 0x0004
787
788 /*
789 * NFS callback channel state
790 *
791 * One listening socket for accepting socket connections from servers and
792 * a list of connected sockets to handle callback requests on.
793 * Mounts registered with the callback channel are assigned IDs and
794 * put on a list so that the callback request handling code can match
795 * the requests up with mounts.
796 */
797 socket_t nfs4_cb_so = NULL;
798 in_port_t nfs4_cb_port = 0;
799 uint32_t nfs4_cb_id = 0;
800 uint32_t nfs4_cb_so_usecount = 0;
801 TAILQ_HEAD(nfs4_cb_sock_list,nfs_callback_socket) nfs4_cb_socks;
802 TAILQ_HEAD(nfs4_cb_mount_list,nfsmount) nfs4_cb_mounts;
803
804 int nfs4_cb_handler(struct nfs_callback_socket *, mbuf_t);
805
806 /*
807 * Set up the callback channel for the NFS mount.
808 *
809 * Initializes the callback channel socket state and
810 * assigns a callback ID to the mount.
811 */
812 void
813 nfs4_mount_callback_setup(struct nfsmount *nmp)
814 {
815 struct sockaddr_in sin;
816 socket_t so = NULL;
817 struct timeval timeo;
818 int error, on = 1;
819
820 lck_mtx_lock(nfs_global_mutex);
821 if (nfs4_cb_id == 0) {
822 TAILQ_INIT(&nfs4_cb_mounts);
823 TAILQ_INIT(&nfs4_cb_socks);
824 nfs4_cb_id++;
825 }
826 nmp->nm_cbid = nfs4_cb_id++;
827 if (nmp->nm_cbid == 0)
828 nmp->nm_cbid = nfs4_cb_id++;
829 nfs4_cb_so_usecount++;
830 TAILQ_INSERT_HEAD(&nfs4_cb_mounts, nmp, nm_cblink);
831
832 if (nfs4_cb_so) {
833 lck_mtx_unlock(nfs_global_mutex);
834 return;
835 }
836
837 error = sock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so);
838 if (error) {
839 log(LOG_INFO, "nfs callback setup: error %d creating listening socket\n", error);
840 goto fail;
841 }
842 so = nfs4_cb_so;
843
844 sin.sin_len = sizeof(struct sockaddr_in);
845 sin.sin_family = AF_INET;
846 sin.sin_addr.s_addr = htonl(INADDR_ANY);
847 sin.sin_port = 0;
848 error = sock_bind(so, (struct sockaddr *)&sin);
849 if (error) {
850 log(LOG_INFO, "nfs callback setup: error %d binding listening socket\n", error);
851 goto fail;
852 }
853 error = sock_getsockname(so, (struct sockaddr *)&sin, sin.sin_len);
854 if (error) {
855 log(LOG_INFO, "nfs callback setup: error %d getting listening socket port\n", error);
856 goto fail;
857 }
858 nfs4_cb_port = ntohs(sin.sin_port);
859
860 error = sock_listen(so, 32);
861 if (error) {
862 log(LOG_INFO, "nfs callback setup: error %d on listen\n", error);
863 goto fail;
864 }
865
866 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
867 timeo.tv_usec = 0;
868 timeo.tv_sec = 60;
869 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
870 if (error)
871 log(LOG_INFO, "nfs callback setup: error %d setting socket rx timeout\n", error);
872 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
873 if (error)
874 log(LOG_INFO, "nfs callback setup: error %d setting socket tx timeout\n", error);
875 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
876 sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
877 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
878 error = 0;
879
880 fail:
881 if (error) {
882 nfs4_cb_so = NULL;
883 lck_mtx_unlock(nfs_global_mutex);
884 if (so) {
885 sock_shutdown(so, SHUT_RDWR);
886 sock_close(so);
887 }
888 } else {
889 lck_mtx_unlock(nfs_global_mutex);
890 }
891 }
892
893 /*
894 * Shut down the callback channel for the NFS mount.
895 *
896 * Clears the mount's callback ID and releases the mounts
897 * reference on the callback socket. Last reference dropped
898 * will also shut down the callback socket(s).
899 */
900 void
901 nfs4_mount_callback_shutdown(struct nfsmount *nmp)
902 {
903 struct nfs_callback_socket *ncbsp;
904 socket_t so;
905 struct nfs4_cb_sock_list cb_socks;
906 struct timespec ts = {1,0};
907
908 lck_mtx_lock(nfs_global_mutex);
909 TAILQ_REMOVE(&nfs4_cb_mounts, nmp, nm_cblink);
910 /* wait for any callbacks in progress to complete */
911 while (nmp->nm_cbrefs)
912 msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait", &ts);
913 if (--nfs4_cb_so_usecount) {
914 lck_mtx_unlock(nfs_global_mutex);
915 return;
916 }
917 so = nfs4_cb_so;
918 nfs4_cb_so = NULL;
919 TAILQ_INIT(&cb_socks);
920 TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link);
921 lck_mtx_unlock(nfs_global_mutex);
922 if (so) {
923 sock_shutdown(so, SHUT_RDWR);
924 sock_close(so);
925 }
926 while ((ncbsp = TAILQ_FIRST(&cb_socks))) {
927 TAILQ_REMOVE(&cb_socks, ncbsp, ncbs_link);
928 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
929 sock_close(ncbsp->ncbs_so);
930 FREE(ncbsp, M_TEMP);
931 }
932 }
933
934 /*
935 * Check periodically for stale/unused nfs callback sockets
936 */
937 #define NFS4_CB_TIMER_PERIOD 30
938 #define NFS4_CB_IDLE_MAX 300
939 void
940 nfs4_callback_timer(__unused void *param0, __unused void *param1)
941 {
942 struct nfs_callback_socket *ncbsp, *nextncbsp;
943 struct timeval now;
944
945 loop:
946 lck_mtx_lock(nfs_global_mutex);
947 if (TAILQ_EMPTY(&nfs4_cb_socks)) {
948 nfs4_callback_timer_on = 0;
949 lck_mtx_unlock(nfs_global_mutex);
950 return;
951 }
952 microuptime(&now);
953 TAILQ_FOREACH_SAFE(ncbsp, &nfs4_cb_socks, ncbs_link, nextncbsp) {
954 if (!(ncbsp->ncbs_flags & NCBSOCK_DEAD) &&
955 (now.tv_sec < (ncbsp->ncbs_stamp + NFS4_CB_IDLE_MAX)))
956 continue;
957 TAILQ_REMOVE(&nfs4_cb_socks, ncbsp, ncbs_link);
958 lck_mtx_unlock(nfs_global_mutex);
959 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
960 sock_close(ncbsp->ncbs_so);
961 FREE(ncbsp, M_TEMP);
962 goto loop;
963 }
964 nfs4_callback_timer_on = 1;
965 nfs_interval_timer_start(nfs4_callback_timer_call,
966 NFS4_CB_TIMER_PERIOD * 1000);
967 lck_mtx_unlock(nfs_global_mutex);
968 }
969
970 /*
971 * Accept a new callback socket.
972 */
973 void
974 nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag)
975 {
976 socket_t newso = NULL;
977 struct nfs_callback_socket *ncbsp;
978 struct nfsmount *nmp;
979 struct timeval timeo, now;
980 struct sockaddr_in *saddr;
981 int error, on = 1;
982
983 if (so != nfs4_cb_so)
984 return;
985
986 /* allocate/initialize a new nfs_callback_socket */
987 MALLOC(ncbsp, struct nfs_callback_socket *, sizeof(struct nfs_callback_socket), M_TEMP, M_WAITOK);
988 if (!ncbsp) {
989 log(LOG_ERR, "nfs callback accept: no memory for new socket\n");
990 return;
991 }
992 bzero(ncbsp, sizeof(*ncbsp));
993 ncbsp->ncbs_sin.sin_len = sizeof(struct sockaddr_in);
994 ncbsp->ncbs_rrs.nrrs_markerleft = sizeof(ncbsp->ncbs_rrs.nrrs_fragleft);
995
996 /* accept a new socket */
997 error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_sin,
998 ncbsp->ncbs_sin.sin_len, MSG_DONTWAIT,
999 nfs4_cb_rcv, ncbsp, &newso);
1000 if (error) {
1001 log(LOG_INFO, "nfs callback accept: error %d accepting socket\n", error);
1002 FREE(ncbsp, M_TEMP);
1003 return;
1004 }
1005
1006 /* set up the new socket */
1007 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */
1008 timeo.tv_usec = 0;
1009 timeo.tv_sec = 60;
1010 error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
1011 if (error)
1012 log(LOG_INFO, "nfs callback socket: error %d setting socket rx timeout\n", error);
1013 error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
1014 if (error)
1015 log(LOG_INFO, "nfs callback socket: error %d setting socket tx timeout\n", error);
1016 sock_setsockopt(newso, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
1017 sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
1018 sock_setsockopt(newso, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
1019
1020 ncbsp->ncbs_so = newso;
1021 microuptime(&now);
1022 ncbsp->ncbs_stamp = now.tv_sec;
1023
1024 lck_mtx_lock(nfs_global_mutex);
1025
1026 /* add it to the list */
1027 TAILQ_INSERT_HEAD(&nfs4_cb_socks, ncbsp, ncbs_link);
1028
1029 /* verify it's from a host we have mounted */
1030 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
1031 /* check socket's source address matches this mount's server address */
1032 saddr = mbuf_data(nmp->nm_nam);
1033 if ((ncbsp->ncbs_sin.sin_len == saddr->sin_len) &&
1034 (ncbsp->ncbs_sin.sin_family == saddr->sin_family) &&
1035 (ncbsp->ncbs_sin.sin_addr.s_addr == saddr->sin_addr.s_addr))
1036 break;
1037 }
1038 if (!nmp) /* we don't want this socket, mark it dead */
1039 ncbsp->ncbs_flags |= NCBSOCK_DEAD;
1040
1041 /* make sure the callback socket cleanup timer is running */
1042 /* (shorten the timer if we've got a socket we don't want) */
1043 if (!nfs4_callback_timer_on) {
1044 nfs4_callback_timer_on = 1;
1045 nfs_interval_timer_start(nfs4_callback_timer_call,
1046 !nmp ? 500 : (NFS4_CB_TIMER_PERIOD * 1000));
1047 } else if (!nmp && (nfs4_callback_timer_on < 2)) {
1048 nfs4_callback_timer_on = 2;
1049 thread_call_cancel(nfs4_callback_timer_call);
1050 nfs_interval_timer_start(nfs4_callback_timer_call, 500);
1051 }
1052
1053 lck_mtx_unlock(nfs_global_mutex);
1054 }
1055
1056 /*
1057 * Receive mbufs from callback sockets into RPC records and process each record.
1058 * Detect connection has been closed and shut down.
1059 */
1060 void
1061 nfs4_cb_rcv(socket_t so, void *arg, __unused int waitflag)
1062 {
1063 struct nfs_callback_socket *ncbsp = arg;
1064 struct timespec ts = {1,0};
1065 struct timeval now;
1066 mbuf_t m;
1067 int error = 0, recv = 1;
1068
1069 lck_mtx_lock(nfs_global_mutex);
1070 while (ncbsp->ncbs_flags & NCBSOCK_UPCALL) {
1071 /* wait if upcall is already in progress */
1072 ncbsp->ncbs_flags |= NCBSOCK_UPCALLWANT;
1073 msleep(ncbsp, nfs_global_mutex, PSOCK, "cbupcall", &ts);
1074 }
1075 ncbsp->ncbs_flags |= NCBSOCK_UPCALL;
1076 lck_mtx_unlock(nfs_global_mutex);
1077
1078 /* loop while we make error-free progress */
1079 while (!error && recv) {
1080 error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, &recv, &m);
1081 if (m) /* handle the request */
1082 error = nfs4_cb_handler(ncbsp, m);
1083 }
1084
1085 /* note: no error and no data indicates server closed its end */
1086 if ((error != EWOULDBLOCK) && (error || !recv)) {
1087 /*
1088 * Socket is either being closed or should be.
1089 * We can't close the socket in the context of the upcall.
1090 * So we mark it as dead and leave it for the cleanup timer to reap.
1091 */
1092 ncbsp->ncbs_stamp = 0;
1093 ncbsp->ncbs_flags |= NCBSOCK_DEAD;
1094 } else {
1095 microuptime(&now);
1096 ncbsp->ncbs_stamp = now.tv_sec;
1097 }
1098
1099 lck_mtx_lock(nfs_global_mutex);
1100 ncbsp->ncbs_flags &= ~NCBSOCK_UPCALL;
1101 lck_mtx_unlock(nfs_global_mutex);
1102 wakeup(ncbsp);
1103 }
1104
1105 /*
1106 * Handle an NFS callback channel request.
1107 */
1108 int
1109 nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
1110 {
1111 socket_t so = ncbsp->ncbs_so;
1112 struct nfsm_chain nmreq, nmrep;
1113 mbuf_t mhead = NULL, mrest = NULL, m;
1114 struct sockaddr_in *saddr;
1115 struct msghdr msg;
1116 struct nfsmount *nmp;
1117 fhandle_t fh;
1118 nfsnode_t np;
1119 nfs_stateid stateid;
1120 uint32_t bitmap[NFS_ATTR_BITMAP_LEN], rbitmap[NFS_ATTR_BITMAP_LEN], bmlen, truncate, attrbytes;
1121 uint32_t val, xid, procnum, taglen, cbid, numops, op, status;
1122 uint32_t auth_type, auth_len;
1123 uint32_t numres, *pnumres;
1124 int error = 0, replen, len;
1125 size_t sentlen = 0;
1126
1127 xid = numops = op = status = procnum = taglen = cbid = 0;
1128
1129 nfsm_chain_dissect_init(error, &nmreq, mreq);
1130 nfsm_chain_get_32(error, &nmreq, xid); // RPC XID
1131 nfsm_chain_get_32(error, &nmreq, val); // RPC Call
1132 nfsm_assert(error, (val == RPC_CALL), EBADRPC);
1133 nfsm_chain_get_32(error, &nmreq, val); // RPC Version
1134 nfsm_assert(error, (val == RPC_VER2), ERPCMISMATCH);
1135 nfsm_chain_get_32(error, &nmreq, val); // RPC Program Number
1136 nfsm_assert(error, (val == NFS4_CALLBACK_PROG), EPROGUNAVAIL);
1137 nfsm_chain_get_32(error, &nmreq, val); // NFS Callback Program Version Number
1138 nfsm_assert(error, (val == NFS4_CALLBACK_PROG_VERSION), EPROGMISMATCH);
1139 nfsm_chain_get_32(error, &nmreq, procnum); // NFS Callback Procedure Number
1140 nfsm_assert(error, (procnum <= NFSPROC4_CB_COMPOUND), EPROCUNAVAIL);
1141
1142 /* Handle authentication */
1143 /* XXX just ignore auth for now - handling kerberos may be tricky */
1144 nfsm_chain_get_32(error, &nmreq, auth_type); // RPC Auth Flavor
1145 nfsm_chain_get_32(error, &nmreq, auth_len); // RPC Auth Length
1146 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC);
1147 if (!error && (auth_len > 0))
1148 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len));
1149 nfsm_chain_adv(error, &nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
1150 nfsm_chain_get_32(error, &nmreq, auth_len); // verifier length
1151 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC);
1152 if (!error && (auth_len > 0))
1153 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len));
1154 if (error) {
1155 status = error;
1156 error = 0;
1157 goto nfsmout;
1158 }
1159
1160 switch (procnum) {
1161 case NFSPROC4_CB_NULL:
1162 status = NFSERR_RETVOID;
1163 break;
1164 case NFSPROC4_CB_COMPOUND:
1165 /* tag, minorversion, cb ident, numops, op array */
1166 nfsm_chain_get_32(error, &nmreq, taglen); /* tag length */
1167 nfsm_assert(error, (val <= NFS4_OPAQUE_LIMIT), EBADRPC);
1168
1169 /* start building the body of the response */
1170 nfsm_mbuf_get(error, &mrest, nfsm_rndup(taglen) + 5*NFSX_UNSIGNED);
1171 nfsm_chain_init(&nmrep, mrest);
1172
1173 /* copy tag from request to response */
1174 nfsm_chain_add_32(error, &nmrep, taglen); /* tag length */
1175 for (len = (int)taglen; !error && (len > 0); len -= NFSX_UNSIGNED) {
1176 nfsm_chain_get_32(error, &nmreq, val);
1177 nfsm_chain_add_32(error, &nmrep, val);
1178 }
1179
1180 /* insert number of results placeholder */
1181 numres = 0;
1182 nfsm_chain_add_32(error, &nmrep, numres);
1183 pnumres = (uint32_t*)(nmrep.nmc_ptr - NFSX_UNSIGNED);
1184
1185 nfsm_chain_get_32(error, &nmreq, val); /* minorversion */
1186 nfsm_assert(error, (val == 0), NFSERR_MINOR_VERS_MISMATCH);
1187 nfsm_chain_get_32(error, &nmreq, cbid); /* callback ID */
1188 nfsm_chain_get_32(error, &nmreq, numops); /* number of operations */
1189 if (error) {
1190 if ((error == EBADRPC) || (error == NFSERR_MINOR_VERS_MISMATCH))
1191 status = error;
1192 else if ((error == ENOBUFS) || (error == ENOMEM))
1193 status = NFSERR_RESOURCE;
1194 else
1195 status = NFSERR_SERVERFAULT;
1196 error = 0;
1197 nfsm_chain_null(&nmrep);
1198 goto nfsmout;
1199 }
1200 /* match the callback ID to a registered mount */
1201 lck_mtx_lock(nfs_global_mutex);
1202 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
1203 if (nmp->nm_cbid != cbid)
1204 continue;
1205 /* verify socket's source address matches this mount's server address */
1206 saddr = mbuf_data(nmp->nm_nam);
1207 if ((ncbsp->ncbs_sin.sin_len != saddr->sin_len) ||
1208 (ncbsp->ncbs_sin.sin_family != saddr->sin_family) ||
1209 (ncbsp->ncbs_sin.sin_addr.s_addr != saddr->sin_addr.s_addr))
1210 continue;
1211 break;
1212 }
1213 /* mark the NFS mount as busy */
1214 if (nmp)
1215 nmp->nm_cbrefs++;
1216 lck_mtx_unlock(nfs_global_mutex);
1217 if (!nmp) {
1218 /* if no mount match, just drop socket. */
1219 error = EPERM;
1220 nfsm_chain_null(&nmrep);
1221 goto out;
1222 }
1223
1224 /* process ops, adding results to mrest */
1225 while (numops > 0) {
1226 numops--;
1227 nfsm_chain_get_32(error, &nmreq, op);
1228 if (error)
1229 break;
1230 switch (op) {
1231 case NFS_OP_CB_GETATTR:
1232 // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS)
1233 np = NULL;
1234 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh);
1235 bmlen = NFS_ATTR_BITMAP_LEN;
1236 nfsm_chain_get_bitmap(error, &nmreq, bitmap, bmlen);
1237 if (error) {
1238 status = error;
1239 error = 0;
1240 numops = 0; /* don't process any more ops */
1241 } else {
1242 /* find the node for the file handle */
1243 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, NG_NOCREATE, &np);
1244 if (error || !np) {
1245 status = NFSERR_BADHANDLE;
1246 error = 0;
1247 np = NULL;
1248 numops = 0; /* don't process any more ops */
1249 }
1250 }
1251 nfsm_chain_add_32(error, &nmrep, op);
1252 nfsm_chain_add_32(error, &nmrep, status);
1253 if (!error && (status == EBADRPC))
1254 error = status;
1255 if (np) {
1256 /* only allow returning size, change, and mtime attrs */
1257 NFS_CLEAR_ATTRIBUTES(&rbitmap);
1258 attrbytes = 0;
1259 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) {
1260 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_CHANGE);
1261 attrbytes += 2 * NFSX_UNSIGNED;
1262 }
1263 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) {
1264 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_SIZE);
1265 attrbytes += 2 * NFSX_UNSIGNED;
1266 }
1267 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) {
1268 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_TIME_MODIFY);
1269 attrbytes += 3 * NFSX_UNSIGNED;
1270 }
1271 nfsm_chain_add_bitmap(error, &nmrep, rbitmap, NFS_ATTR_BITMAP_LEN);
1272 nfsm_chain_add_32(error, &nmrep, attrbytes);
1273 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE))
1274 nfsm_chain_add_64(error, &nmrep,
1275 np->n_vattr.nva_change + ((np->n_flag & NMODIFIED) ? 1 : 0));
1276 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE))
1277 nfsm_chain_add_64(error, &nmrep, np->n_size);
1278 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) {
1279 nfsm_chain_add_64(error, &nmrep, np->n_vattr.nva_timesec[NFSTIME_MODIFY]);
1280 nfsm_chain_add_32(error, &nmrep, np->n_vattr.nva_timensec[NFSTIME_MODIFY]);
1281 }
1282 nfs_node_unlock(np);
1283 vnode_put(NFSTOV(np));
1284 np = NULL;
1285 }
1286 /*
1287 * If we hit an error building the reply, we can't easily back up.
1288 * So we'll just update the status and hope the server ignores the
1289 * extra garbage.
1290 */
1291 break;
1292 case NFS_OP_CB_RECALL:
1293 // (STATEID, TRUNCATE, FH) -> (STATUS)
1294 np = NULL;
1295 nfsm_chain_get_stateid(error, &nmreq, &stateid);
1296 nfsm_chain_get_32(error, &nmreq, truncate);
1297 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh);
1298 if (error) {
1299 status = error;
1300 error = 0;
1301 numops = 0; /* don't process any more ops */
1302 } else {
1303 /* find the node for the file handle */
1304 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, NG_NOCREATE, &np);
1305 if (error || !np) {
1306 status = NFSERR_BADHANDLE;
1307 error = 0;
1308 np = NULL;
1309 numops = 0; /* don't process any more ops */
1310 } else if (!(np->n_openflags & N_DELEG_MASK) ||
1311 bcmp(&np->n_dstateid, &stateid, sizeof(stateid))) {
1312 /* delegation stateid state doesn't match */
1313 status = NFSERR_BAD_STATEID;
1314 numops = 0; /* don't process any more ops */
1315 }
1316 if (!status) {
1317 /* add node to recall queue, and wake socket thread */
1318 lck_mtx_lock(&nmp->nm_lock);
1319 if (np->n_dlink.tqe_next == NFSNOLIST)
1320 TAILQ_INSERT_TAIL(&nmp->nm_recallq, np, n_dlink);
1321 nfs_mount_sock_thread_wake(nmp);
1322 lck_mtx_unlock(&nmp->nm_lock);
1323 }
1324 if (np) {
1325 nfs_node_unlock(np);
1326 vnode_put(NFSTOV(np));
1327 }
1328 }
1329 nfsm_chain_add_32(error, &nmrep, op);
1330 nfsm_chain_add_32(error, &nmrep, status);
1331 if (!error && (status == EBADRPC))
1332 error = status;
1333 break;
1334 case NFS_OP_CB_ILLEGAL:
1335 default:
1336 nfsm_chain_add_32(error, &nmrep, NFS_OP_CB_ILLEGAL);
1337 status = NFSERR_OP_ILLEGAL;
1338 nfsm_chain_add_32(error, &nmrep, status);
1339 numops = 0; /* don't process any more ops */
1340 break;
1341 }
1342 numres++;
1343 }
1344
1345 if (!status && error) {
1346 if (error == EBADRPC)
1347 status = error;
1348 else if ((error == ENOBUFS) || (error == ENOMEM))
1349 status = NFSERR_RESOURCE;
1350 else
1351 status = NFSERR_SERVERFAULT;
1352 error = 0;
1353 }
1354
1355 /* Now, set the numres field */
1356 *pnumres = txdr_unsigned(numres);
1357 nfsm_chain_build_done(error, &nmrep);
1358 nfsm_chain_null(&nmrep);
1359
1360 /* drop the callback reference on the mount */
1361 lck_mtx_lock(nfs_global_mutex);
1362 nmp->nm_cbrefs--;
1363 if (!nmp->nm_cbid)
1364 wakeup(&nmp->nm_cbrefs);
1365 lck_mtx_unlock(nfs_global_mutex);
1366 break;
1367 }
1368
1369 nfsmout:
1370 if (status == EBADRPC)
1371 OSAddAtomic(1, &nfsstats.rpcinvalid);
1372
1373 /* build reply header */
1374 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mhead);
1375 nfsm_chain_init(&nmrep, mhead);
1376 nfsm_chain_add_32(error, &nmrep, 0); /* insert space for an RPC record mark */
1377 nfsm_chain_add_32(error, &nmrep, xid);
1378 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
1379 if ((status == ERPCMISMATCH) || (status & NFSERR_AUTHERR)) {
1380 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
1381 if (status & NFSERR_AUTHERR) {
1382 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
1383 nfsm_chain_add_32(error, &nmrep, (status & ~NFSERR_AUTHERR));
1384 } else {
1385 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
1386 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
1387 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
1388 }
1389 } else {
1390 /* reply status */
1391 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
1392 /* XXX RPCAUTH_NULL verifier */
1393 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
1394 nfsm_chain_add_32(error, &nmrep, 0);
1395 /* accepted status */
1396 switch (status) {
1397 case EPROGUNAVAIL:
1398 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
1399 break;
1400 case EPROGMISMATCH:
1401 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
1402 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION);
1403 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION);
1404 break;
1405 case EPROCUNAVAIL:
1406 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
1407 break;
1408 case EBADRPC:
1409 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
1410 break;
1411 default:
1412 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
1413 if (status != NFSERR_RETVOID)
1414 nfsm_chain_add_32(error, &nmrep, status);
1415 break;
1416 }
1417 }
1418 nfsm_chain_build_done(error, &nmrep);
1419 if (error) {
1420 nfsm_chain_null(&nmrep);
1421 goto out;
1422 }
1423 error = mbuf_setnext(nmrep.nmc_mcur, mrest);
1424 if (error) {
1425 printf("nfs cb: mbuf_setnext failed %d\n", error);
1426 goto out;
1427 }
1428 mrest = NULL;
1429 /* Calculate the size of the reply */
1430 replen = 0;
1431 for (m = nmrep.nmc_mhead; m; m = mbuf_next(m))
1432 replen += mbuf_len(m);
1433 mbuf_pkthdr_setlen(mhead, replen);
1434 error = mbuf_pkthdr_setrcvif(mhead, NULL);
1435 nfsm_chain_set_recmark(error, &nmrep, (replen - NFSX_UNSIGNED) | 0x80000000);
1436 nfsm_chain_null(&nmrep);
1437
1438 /* send the reply */
1439 bzero(&msg, sizeof(msg));
1440 error = sock_sendmbuf(so, &msg, mhead, 0, &sentlen);
1441 mhead = NULL;
1442 if (!error && ((int)sentlen != replen))
1443 error = EWOULDBLOCK;
1444 if (error == EWOULDBLOCK) /* inability to send response is considered fatal */
1445 error = ETIMEDOUT;
1446 out:
1447 if (error)
1448 nfsm_chain_cleanup(&nmrep);
1449 if (mhead)
1450 mbuf_freem(mhead);
1451 if (mrest)
1452 mbuf_freem(mrest);
1453 if (mreq)
1454 mbuf_freem(mreq);
1455 return (error);
1456 }
1457
1458
1459 /*
1460 * Read the next (marked) RPC record from the socket.
1461 *
1462 * *recvp returns if any data was received.
1463 * *mp returns the next complete RPC record
1464 */
1465 int
1466 nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int *recvp, mbuf_t *mp)
1467 {
1468 struct iovec aio;
1469 struct msghdr msg;
1470 size_t rcvlen;
1471 int error = 0;
1472 mbuf_t m;
1473
1474 *recvp = 0;
1475 *mp = NULL;
1476
1477 /* read the TCP RPC record marker */
1478 while (!error && nrrsp->nrrs_markerleft) {
1479 aio.iov_base = ((char*)&nrrsp->nrrs_fragleft +
1480 sizeof(nrrsp->nrrs_fragleft) - nrrsp->nrrs_markerleft);
1481 aio.iov_len = nrrsp->nrrs_markerleft;
1482 bzero(&msg, sizeof(msg));
1483 msg.msg_iov = &aio;
1484 msg.msg_iovlen = 1;
1485 error = sock_receive(so, &msg, MSG_DONTWAIT, &rcvlen);
1486 if (error || !rcvlen)
1487 break;
1488 *recvp = 1;
1489 nrrsp->nrrs_markerleft -= rcvlen;
1490 if (nrrsp->nrrs_markerleft)
1491 continue;
1492 /* record marker complete */
1493 nrrsp->nrrs_fragleft = ntohl(nrrsp->nrrs_fragleft);
1494 if (nrrsp->nrrs_fragleft & 0x80000000) {
1495 nrrsp->nrrs_lastfrag = 1;
1496 nrrsp->nrrs_fragleft &= ~0x80000000;
1497 }
1498 nrrsp->nrrs_reclen += nrrsp->nrrs_fragleft;
1499 if (nrrsp->nrrs_reclen > NFS_MAXPACKET) {
1500 /*
1501 * This is SERIOUS! We are out of sync with the sender
1502 * and forcing a disconnect/reconnect is all I can do.
1503 */
1504 log(LOG_ERR, "impossible RPC record length (%d) on callback", nrrsp->nrrs_reclen);
1505 error = EFBIG;
1506 }
1507 }
1508
1509 /* read the TCP RPC record fragment */
1510 while (!error && !nrrsp->nrrs_markerleft && nrrsp->nrrs_fragleft) {
1511 m = NULL;
1512 rcvlen = nrrsp->nrrs_fragleft;
1513 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
1514 if (error || !rcvlen || !m)
1515 break;
1516 *recvp = 1;
1517 /* append mbufs to list */
1518 nrrsp->nrrs_fragleft -= rcvlen;
1519 if (!nrrsp->nrrs_m) {
1520 nrrsp->nrrs_m = m;
1521 } else {
1522 error = mbuf_setnext(nrrsp->nrrs_mlast, m);
1523 if (error) {
1524 printf("nfs tcp rcv: mbuf_setnext failed %d\n", error);
1525 mbuf_freem(m);
1526 break;
1527 }
1528 }
1529 while (mbuf_next(m))
1530 m = mbuf_next(m);
1531 nrrsp->nrrs_mlast = m;
1532 }
1533
1534 /* done reading fragment? */
1535 if (!error && !nrrsp->nrrs_markerleft && !nrrsp->nrrs_fragleft) {
1536 /* reset socket fragment parsing state */
1537 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft);
1538 if (nrrsp->nrrs_lastfrag) {
1539 /* RPC record complete */
1540 *mp = nrrsp->nrrs_m;
1541 /* reset socket record parsing state */
1542 nrrsp->nrrs_reclen = 0;
1543 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL;
1544 nrrsp->nrrs_lastfrag = 0;
1545 }
1546 }
1547
1548 return (error);
1549 }
1550
1551
1552
1553 /*
1554 * The NFS client send routine.
1555 *
1556 * Send the given NFS request out the mount's socket.
1557 * Holds nfs_sndlock() for the duration of this call.
1558 *
1559 * - check for request termination (sigintr)
1560 * - wait for reconnect, if necessary
1561 * - UDP: check the congestion window
1562 * - make a copy of the request to send
1563 * - UDP: update the congestion window
1564 * - send the request
1565 *
1566 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
1567 * rexmit count is also updated if this isn't the first send.
1568 *
1569 * If the send is not successful, make sure R_MUSTRESEND is set.
1570 * If this wasn't the first transmit, set R_RESENDERR.
1571 * Also, undo any UDP congestion window changes made.
1572 *
1573 * If the error appears to indicate that the socket should
1574 * be reconnected, mark the socket for reconnection.
1575 *
1576 * Only return errors when the request should be aborted.
1577 */
1578 int
1579 nfs_send(struct nfsreq *req, int wait)
1580 {
1581 struct nfsmount *nmp;
1582 socket_t so;
1583 int error, error2, sotype, rexmit, slpflag = 0, needrecon;
1584 struct msghdr msg;
1585 struct sockaddr *sendnam;
1586 mbuf_t mreqcopy;
1587 size_t sentlen = 0;
1588 struct timespec ts = { 2, 0 };
1589
1590 again:
1591 error = nfs_sndlock(req);
1592 if (error) {
1593 lck_mtx_lock(&req->r_mtx);
1594 req->r_error = error;
1595 req->r_flags &= ~R_SENDING;
1596 lck_mtx_unlock(&req->r_mtx);
1597 return (error);
1598 }
1599
1600 error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
1601 if (error) {
1602 nfs_sndunlock(req);
1603 lck_mtx_lock(&req->r_mtx);
1604 req->r_error = error;
1605 req->r_flags &= ~R_SENDING;
1606 lck_mtx_unlock(&req->r_mtx);
1607 return (error);
1608 }
1609 nmp = req->r_nmp;
1610 sotype = nmp->nm_sotype;
1611
1612 /*
1613 * If it's a setup RPC but we're not in SETUP... must need reconnect.
1614 * If it's a recovery RPC but the socket's not ready... must need reconnect.
1615 */
1616 if (((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) ||
1617 ((req->r_flags & R_RECOVER) && !(nmp->nm_sockflags & NMSOCK_READY))) {
1618 error = ETIMEDOUT;
1619 nfs_sndunlock(req);
1620 lck_mtx_lock(&req->r_mtx);
1621 req->r_error = error;
1622 req->r_flags &= ~R_SENDING;
1623 lck_mtx_unlock(&req->r_mtx);
1624 return (error);
1625 }
1626
1627 /* If the socket needs reconnection, do that now. */
1628 /* wait until socket is ready - unless this request is part of setup */
1629 lck_mtx_lock(&nmp->nm_lock);
1630 if (!(nmp->nm_sockflags & NMSOCK_READY) &&
1631 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) {
1632 if (nmp->nm_flag & NFSMNT_INT)
1633 slpflag |= PCATCH;
1634 lck_mtx_unlock(&nmp->nm_lock);
1635 nfs_sndunlock(req);
1636 if (!wait) {
1637 lck_mtx_lock(&req->r_mtx);
1638 req->r_flags &= ~R_SENDING;
1639 req->r_flags |= R_MUSTRESEND;
1640 req->r_rtt = 0;
1641 lck_mtx_unlock(&req->r_mtx);
1642 return (0);
1643 }
1644 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req->r_xid));
1645 lck_mtx_lock(&req->r_mtx);
1646 req->r_flags &= ~R_MUSTRESEND;
1647 req->r_rtt = 0;
1648 lck_mtx_unlock(&req->r_mtx);
1649 lck_mtx_lock(&nmp->nm_lock);
1650 while (!(nmp->nm_sockflags & NMSOCK_READY)) {
1651 /* don't bother waiting if the socket thread won't be reconnecting it */
1652 if (nmp->nm_state & NFSSTA_FORCE) {
1653 error = EIO;
1654 break;
1655 }
1656 if ((nmp->nm_flag & NFSMNT_SOFT) && (nmp->nm_reconnect_start > 0)) {
1657 struct timeval now;
1658 microuptime(&now);
1659 if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) {
1660 /* soft mount in reconnect for a while... terminate ASAP */
1661 OSAddAtomic(1, &nfsstats.rpctimeouts);
1662 req->r_flags |= R_SOFTTERM;
1663 req->r_error = error = ETIMEDOUT;
1664 break;
1665 }
1666 }
1667 /* make sure socket thread is running, then wait */
1668 nfs_mount_sock_thread_wake(nmp);
1669 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
1670 break;
1671 msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait", &ts);
1672 slpflag = 0;
1673 }
1674 lck_mtx_unlock(&nmp->nm_lock);
1675 if (error) {
1676 lck_mtx_lock(&req->r_mtx);
1677 req->r_error = error;
1678 req->r_flags &= ~R_SENDING;
1679 lck_mtx_unlock(&req->r_mtx);
1680 return (error);
1681 }
1682 goto again;
1683 }
1684 so = nmp->nm_so;
1685 lck_mtx_unlock(&nmp->nm_lock);
1686 if (!so) {
1687 nfs_sndunlock(req);
1688 lck_mtx_lock(&req->r_mtx);
1689 req->r_flags &= ~R_SENDING;
1690 req->r_flags |= R_MUSTRESEND;
1691 req->r_rtt = 0;
1692 lck_mtx_unlock(&req->r_mtx);
1693 return (0);
1694 }
1695
1696 lck_mtx_lock(&req->r_mtx);
1697 rexmit = (req->r_flags & R_SENT);
1698
1699 if (sotype == SOCK_DGRAM) {
1700 lck_mtx_lock(&nmp->nm_lock);
1701 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) {
1702 /* if we can't send this out yet, wait on the cwnd queue */
1703 slpflag = ((nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
1704 lck_mtx_unlock(&nmp->nm_lock);
1705 nfs_sndunlock(req);
1706 req->r_flags &= ~R_SENDING;
1707 req->r_flags |= R_MUSTRESEND;
1708 lck_mtx_unlock(&req->r_mtx);
1709 if (!wait) {
1710 req->r_rtt = 0;
1711 return (0);
1712 }
1713 lck_mtx_lock(&nmp->nm_lock);
1714 while (nmp->nm_sent >= nmp->nm_cwnd) {
1715 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
1716 break;
1717 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain);
1718 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts);
1719 slpflag = 0;
1720 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) {
1721 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
1722 req->r_cchain.tqe_next = NFSREQNOLIST;
1723 }
1724 }
1725 lck_mtx_unlock(&nmp->nm_lock);
1726 goto again;
1727 }
1728 /*
1729 * We update these *before* the send to avoid racing
1730 * against others who may be looking to send requests.
1731 */
1732 if (!rexmit) {
1733 /* first transmit */
1734 req->r_flags |= R_CWND;
1735 nmp->nm_sent += NFS_CWNDSCALE;
1736 } else {
1737 /*
1738 * When retransmitting, turn timing off
1739 * and divide congestion window by 2.
1740 */
1741 req->r_flags &= ~R_TIMING;
1742 nmp->nm_cwnd >>= 1;
1743 if (nmp->nm_cwnd < NFS_CWNDSCALE)
1744 nmp->nm_cwnd = NFS_CWNDSCALE;
1745 }
1746 lck_mtx_unlock(&nmp->nm_lock);
1747 }
1748
1749 req->r_flags &= ~R_MUSTRESEND;
1750 lck_mtx_unlock(&req->r_mtx);
1751
1752 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL,
1753 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy);
1754 if (error) {
1755 if (wait)
1756 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error);
1757 nfs_sndunlock(req);
1758 lck_mtx_lock(&req->r_mtx);
1759 req->r_flags &= ~R_SENDING;
1760 req->r_flags |= R_MUSTRESEND;
1761 req->r_rtt = 0;
1762 lck_mtx_unlock(&req->r_mtx);
1763 return (0);
1764 }
1765
1766 bzero(&msg, sizeof(msg));
1767 if (nmp->nm_nam && (sotype != SOCK_STREAM) && !sock_isconnected(so)) {
1768 if ((sendnam = mbuf_data(nmp->nm_nam))) {
1769 msg.msg_name = (caddr_t)sendnam;
1770 msg.msg_namelen = sendnam->sa_len;
1771 }
1772 }
1773 error = sock_sendmbuf(so, &msg, mreqcopy, 0, &sentlen);
1774 #ifdef NFS_SOCKET_DEBUGGING
1775 if (error || (sentlen != req->r_mreqlen))
1776 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
1777 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error));
1778 #endif
1779 if (!error && (sentlen != req->r_mreqlen))
1780 error = EWOULDBLOCK;
1781 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen));
1782
1783 lck_mtx_lock(&req->r_mtx);
1784 req->r_flags &= ~R_SENDING;
1785 req->r_rtt = 0;
1786 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT))
1787 req->r_rexmit = NFS_MAXREXMIT;
1788
1789 if (!error) {
1790 /* SUCCESS */
1791 req->r_flags &= ~R_RESENDERR;
1792 if (rexmit)
1793 OSAddAtomic(1, &nfsstats.rpcretries);
1794 req->r_flags |= R_SENT;
1795 if (req->r_flags & R_WAITSENT) {
1796 req->r_flags &= ~R_WAITSENT;
1797 wakeup(req);
1798 }
1799 nfs_sndunlock(req);
1800 lck_mtx_unlock(&req->r_mtx);
1801 return (0);
1802 }
1803
1804 /* send failed */
1805 req->r_flags |= R_MUSTRESEND;
1806 if (rexmit)
1807 req->r_flags |= R_RESENDERR;
1808 if ((error == EINTR) || (error == ERESTART))
1809 req->r_error = error;
1810 lck_mtx_unlock(&req->r_mtx);
1811
1812 if (sotype == SOCK_DGRAM) {
1813 /*
1814 * Note: even though a first send may fail, we consider
1815 * the request sent for congestion window purposes.
1816 * So we don't need to undo any of the changes made above.
1817 */
1818 /*
1819 * Socket errors ignored for connectionless sockets??
1820 * For now, ignore them all
1821 */
1822 if ((error != EINTR) && (error != ERESTART) &&
1823 (error != EWOULDBLOCK) && (error != EIO)) {
1824 int clearerror = 0, optlen = sizeof(clearerror);
1825 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
1826 #ifdef NFS_SOCKET_DEBUGGING
1827 if (clearerror)
1828 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
1829 error, clearerror));
1830 #endif
1831 }
1832 }
1833
1834 /* check if it appears we should reconnect the socket */
1835 switch (error) {
1836 case EWOULDBLOCK:
1837 /* if send timed out, reconnect if on TCP */
1838 if (sotype != SOCK_STREAM)
1839 break;
1840 case EPIPE:
1841 case EADDRNOTAVAIL:
1842 case ENETDOWN:
1843 case ENETUNREACH:
1844 case ENETRESET:
1845 case ECONNABORTED:
1846 case ECONNRESET:
1847 case ENOTCONN:
1848 case ESHUTDOWN:
1849 case ECONNREFUSED:
1850 case EHOSTDOWN:
1851 case EHOSTUNREACH:
1852 needrecon = 1;
1853 break;
1854 }
1855 if (needrecon) { /* mark socket as needing reconnect */
1856 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error));
1857 nfs_need_reconnect(nmp);
1858 }
1859
1860 nfs_sndunlock(req);
1861
1862 /*
1863 * Don't log some errors:
1864 * EPIPE errors may be common with servers that drop idle connections.
1865 * EADDRNOTAVAIL may occur on network transitions.
1866 * ENOTCONN may occur under some network conditions.
1867 */
1868 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN))
1869 error = 0;
1870 if (error && (error != EINTR) && (error != ERESTART))
1871 log(LOG_INFO, "nfs send error %d for server %s\n", error,
1872 !req->r_nmp ? "<unmounted>" :
1873 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
1874
1875 /* prefer request termination error over other errors */
1876 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
1877 if (error2)
1878 error = error2;
1879
1880 /* only allow the following errors to be returned */
1881 if ((error != EINTR) && (error != ERESTART) && (error != EIO) &&
1882 (error != ENXIO) && (error != ETIMEDOUT))
1883 error = 0;
1884 return (error);
1885 }
1886
1887 /*
1888 * NFS client socket upcalls
1889 *
1890 * Pull RPC replies out of an NFS mount's socket and match them
1891 * up with the pending request.
1892 *
1893 * The datagram code is simple because we always get whole
1894 * messages out of the socket.
1895 *
1896 * The stream code is more involved because we have to parse
1897 * the RPC records out of the stream.
1898 */
1899
1900 /* NFS client UDP socket upcall */
1901 void
1902 nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag)
1903 {
1904 struct nfsmount *nmp = arg;
1905 size_t rcvlen;
1906 mbuf_t m;
1907 int error = 0;
1908
1909 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
1910 wakeup(&nmp->nm_so);
1911 return;
1912 }
1913
1914 /* make sure we're on the current socket */
1915 if (nmp->nm_so != so)
1916 return;
1917
1918 do {
1919 m = NULL;
1920 rcvlen = 1000000;
1921 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
1922 if (m)
1923 nfs_request_match_reply(nmp, m);
1924 } while (m && !error);
1925
1926 if (error && (error != EWOULDBLOCK)) {
1927 /* problems with the socket... mark for reconnection */
1928 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error));
1929 nfs_need_reconnect(nmp);
1930 }
1931 }
1932
1933 /* NFS client TCP socket upcall */
1934 void
1935 nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag)
1936 {
1937 struct nfsmount *nmp = arg;
1938 struct iovec aio;
1939 struct msghdr msg;
1940 size_t rcvlen;
1941 mbuf_t m;
1942 int error = 0;
1943 int recv;
1944
1945 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
1946 wakeup(&nmp->nm_so);
1947 return;
1948 }
1949
1950 /* make sure we're on the current socket */
1951 if (nmp->nm_so != so)
1952 return;
1953
1954 lck_mtx_lock(&nmp->nm_lock);
1955 if (nmp->nm_sockflags & NMSOCK_UPCALL) {
1956 /* upcall is already receiving data - just return */
1957 lck_mtx_unlock(&nmp->nm_lock);
1958 return;
1959 }
1960 nmp->nm_sockflags |= NMSOCK_UPCALL;
1961
1962 nextfrag:
1963 recv = 0;
1964
1965 /* read the TCP RPC record marker */
1966 while (!error && nmp->nm_markerleft) {
1967 aio.iov_base = ((char*)&nmp->nm_fragleft +
1968 sizeof(nmp->nm_fragleft) - nmp->nm_markerleft);
1969 aio.iov_len = nmp->nm_markerleft;
1970 bzero(&msg, sizeof(msg));
1971 msg.msg_iov = &aio;
1972 msg.msg_iovlen = 1;
1973 lck_mtx_unlock(&nmp->nm_lock);
1974 error = sock_receive(so, &msg, MSG_DONTWAIT, &rcvlen);
1975 lck_mtx_lock(&nmp->nm_lock);
1976 if (error || !rcvlen)
1977 break;
1978 recv = 1;
1979 nmp->nm_markerleft -= rcvlen;
1980 if (nmp->nm_markerleft)
1981 continue;
1982 /* record marker complete */
1983 nmp->nm_fragleft = ntohl(nmp->nm_fragleft);
1984 if (nmp->nm_fragleft & 0x80000000) {
1985 nmp->nm_sockflags |= NMSOCK_LASTFRAG;
1986 nmp->nm_fragleft &= ~0x80000000;
1987 }
1988 nmp->nm_reclen += nmp->nm_fragleft;
1989 if (nmp->nm_reclen > NFS_MAXPACKET) {
1990 /*
1991 * This is SERIOUS! We are out of sync with the sender
1992 * and forcing a disconnect/reconnect is all I can do.
1993 */
1994 log(LOG_ERR, "%s (%d) from nfs server %s\n",
1995 "impossible RPC record length", nmp->nm_reclen,
1996 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1997 error = EFBIG;
1998 }
1999 }
2000
2001 /* read the TCP RPC record fragment */
2002 while (!error && !nmp->nm_markerleft && nmp->nm_fragleft) {
2003 m = NULL;
2004 rcvlen = nmp->nm_fragleft;
2005 lck_mtx_unlock(&nmp->nm_lock);
2006 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
2007 lck_mtx_lock(&nmp->nm_lock);
2008 if (error || !rcvlen || !m)
2009 break;
2010 recv = 1;
2011 /* append mbufs to list */
2012 nmp->nm_fragleft -= rcvlen;
2013 if (!nmp->nm_m) {
2014 nmp->nm_m = m;
2015 } else {
2016 error = mbuf_setnext(nmp->nm_mlast, m);
2017 if (error) {
2018 printf("nfs_tcp_rcv: mbuf_setnext failed %d\n", error);
2019 mbuf_freem(m);
2020 break;
2021 }
2022 }
2023 while (mbuf_next(m))
2024 m = mbuf_next(m);
2025 nmp->nm_mlast = m;
2026 }
2027
2028 /* done reading fragment? */
2029 m = NULL;
2030 if (!error && !nmp->nm_markerleft && !nmp->nm_fragleft) {
2031 /* reset socket fragment parsing state */
2032 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
2033 if (nmp->nm_sockflags & NMSOCK_LASTFRAG) {
2034 /* RPC record complete */
2035 m = nmp->nm_m;
2036 /* reset socket record parsing state */
2037 nmp->nm_reclen = 0;
2038 nmp->nm_m = nmp->nm_mlast = NULL;
2039 nmp->nm_sockflags &= ~NMSOCK_LASTFRAG;
2040 }
2041 }
2042
2043 if (m) { /* match completed response with request */
2044 lck_mtx_unlock(&nmp->nm_lock);
2045 nfs_request_match_reply(nmp, m);
2046 lck_mtx_lock(&nmp->nm_lock);
2047 }
2048
2049 /* loop if we've been making error-free progress */
2050 if (!error && recv)
2051 goto nextfrag;
2052
2053 nmp->nm_sockflags &= ~NMSOCK_UPCALL;
2054 lck_mtx_unlock(&nmp->nm_lock);
2055 #ifdef NFS_SOCKET_DEBUGGING
2056 if (!recv && (error != EWOULDBLOCK))
2057 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error));
2058 #endif
2059 /* note: no error and no data indicates server closed its end */
2060 if ((error != EWOULDBLOCK) && (error || !recv)) {
2061 /* problems with the socket... mark for reconnection */
2062 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error));
2063 nfs_need_reconnect(nmp);
2064 }
2065 }
2066
2067 /*
2068 * "poke" a socket to try to provoke any pending errors
2069 */
2070 void
2071 nfs_sock_poke(struct nfsmount *nmp)
2072 {
2073 struct iovec aio;
2074 struct msghdr msg;
2075 size_t len;
2076 int error = 0;
2077 int dummy;
2078
2079 lck_mtx_lock(&nmp->nm_lock);
2080 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || !nmp->nm_so) {
2081 lck_mtx_unlock(&nmp->nm_lock);
2082 return;
2083 }
2084 lck_mtx_unlock(&nmp->nm_lock);
2085 aio.iov_base = &dummy;
2086 aio.iov_len = 0;
2087 len = 0;
2088 bzero(&msg, sizeof(msg));
2089 msg.msg_iov = &aio;
2090 msg.msg_iovlen = 1;
2091 error = sock_send(nmp->nm_so, &msg, MSG_DONTWAIT, &len);
2092 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error));
2093 }
2094
2095 /*
2096 * Match an RPC reply with the corresponding request
2097 */
2098 void
2099 nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
2100 {
2101 struct nfsreq *req;
2102 struct nfsm_chain nmrep;
2103 u_int32_t reply = 0, rxid = 0;
2104 int error = 0, asyncioq, t1;
2105
2106 /* Get the xid and check that it is an rpc reply */
2107 nfsm_chain_dissect_init(error, &nmrep, mrep);
2108 nfsm_chain_get_32(error, &nmrep, rxid);
2109 nfsm_chain_get_32(error, &nmrep, reply);
2110 if (error || (reply != RPC_REPLY)) {
2111 OSAddAtomic(1, &nfsstats.rpcinvalid);
2112 mbuf_freem(mrep);
2113 return;
2114 }
2115
2116 /*
2117 * Loop through the request list to match up the reply
2118 * Iff no match, just drop it.
2119 */
2120 lck_mtx_lock(nfs_request_mutex);
2121 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
2122 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid)))
2123 continue;
2124 /* looks like we have it, grab lock and double check */
2125 lck_mtx_lock(&req->r_mtx);
2126 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
2127 lck_mtx_unlock(&req->r_mtx);
2128 continue;
2129 }
2130 /* Found it.. */
2131 req->r_nmrep = nmrep;
2132 lck_mtx_lock(&nmp->nm_lock);
2133 if (nmp->nm_sotype == SOCK_DGRAM) {
2134 /*
2135 * Update congestion window.
2136 * Do the additive increase of one rpc/rtt.
2137 */
2138 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
2139 if (nmp->nm_cwnd <= nmp->nm_sent) {
2140 nmp->nm_cwnd +=
2141 ((NFS_CWNDSCALE * NFS_CWNDSCALE) +
2142 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
2143 if (nmp->nm_cwnd > NFS_MAXCWND)
2144 nmp->nm_cwnd = NFS_MAXCWND;
2145 }
2146 if (req->r_flags & R_CWND) {
2147 nmp->nm_sent -= NFS_CWNDSCALE;
2148 req->r_flags &= ~R_CWND;
2149 }
2150 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
2151 /* congestion window is open, poke the cwnd queue */
2152 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
2153 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
2154 req2->r_cchain.tqe_next = NFSREQNOLIST;
2155 wakeup(req2);
2156 }
2157 }
2158 /*
2159 * Update rtt using a gain of 0.125 on the mean
2160 * and a gain of 0.25 on the deviation.
2161 */
2162 if (req->r_flags & R_TIMING) {
2163 /*
2164 * Since the timer resolution of
2165 * NFS_HZ is so course, it can often
2166 * result in r_rtt == 0. Since
2167 * r_rtt == N means that the actual
2168 * rtt is between N+dt and N+2-dt ticks,
2169 * add 1.
2170 */
2171 if (proct[req->r_procnum] == 0)
2172 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum);
2173 t1 = req->r_rtt + 1;
2174 t1 -= (NFS_SRTT(req) >> 3);
2175 NFS_SRTT(req) += t1;
2176 if (t1 < 0)
2177 t1 = -t1;
2178 t1 -= (NFS_SDRTT(req) >> 2);
2179 NFS_SDRTT(req) += t1;
2180 }
2181 nmp->nm_timeouts = 0;
2182 lck_mtx_unlock(&nmp->nm_lock);
2183 /* signal anyone waiting on this request */
2184 wakeup(req);
2185 asyncioq = (req->r_callback.rcb_func != NULL);
2186 if (req->r_gss_ctx != NULL)
2187 nfs_gss_clnt_rpcdone(req);
2188 lck_mtx_unlock(&req->r_mtx);
2189 lck_mtx_unlock(nfs_request_mutex);
2190 /* if it's an async RPC with a callback, queue it up */
2191 if (asyncioq)
2192 nfs_asyncio_finish(req);
2193 break;
2194 }
2195
2196 if (!req) {
2197 /* not matched to a request, so drop it. */
2198 lck_mtx_unlock(nfs_request_mutex);
2199 OSAddAtomic(1, &nfsstats.rpcunexpected);
2200 mbuf_freem(mrep);
2201 }
2202 }
2203
2204 /*
2205 * Wait for the reply for a given request...
2206 * ...potentially resending the request if necessary.
2207 */
2208 int
2209 nfs_wait_reply(struct nfsreq *req)
2210 {
2211 struct timespec ts = { 2, 0 };
2212 int error = 0, slpflag;
2213
2214 if (req->r_nmp && (req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread)
2215 slpflag = PCATCH;
2216 else
2217 slpflag = 0;
2218
2219 lck_mtx_lock(&req->r_mtx);
2220 while (!req->r_nmrep.nmc_mhead) {
2221 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
2222 break;
2223 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
2224 break;
2225 /* check if we need to resend */
2226 if (req->r_flags & R_MUSTRESEND) {
2227 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
2228 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
2229 req->r_flags |= R_SENDING;
2230 lck_mtx_unlock(&req->r_mtx);
2231 if (req->r_gss_ctx) {
2232 /*
2233 * It's an RPCSEC_GSS mount.
2234 * Can't just resend the original request
2235 * without bumping the cred sequence number.
2236 * Go back and re-build the request.
2237 */
2238 lck_mtx_lock(&req->r_mtx);
2239 req->r_flags &= ~R_SENDING;
2240 lck_mtx_unlock(&req->r_mtx);
2241 return (EAGAIN);
2242 }
2243 error = nfs_send(req, 1);
2244 lck_mtx_lock(&req->r_mtx);
2245 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
2246 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error));
2247 if (error)
2248 break;
2249 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
2250 break;
2251 }
2252 /* need to poll if we're P_NOREMOTEHANG */
2253 if (nfs_noremotehang(req->r_thread))
2254 ts.tv_sec = 1;
2255 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts);
2256 slpflag = 0;
2257 }
2258 lck_mtx_unlock(&req->r_mtx);
2259
2260 return (error);
2261 }
2262
2263 /*
2264 * An NFS request goes something like this:
2265 * (nb: always frees up mreq mbuf list)
2266 * nfs_request_create()
2267 * - allocates a request struct if one is not provided
2268 * - initial fill-in of the request struct
2269 * nfs_request_add_header()
2270 * - add the RPC header
2271 * nfs_request_send()
2272 * - link it into list
2273 * - call nfs_send() for first transmit
2274 * nfs_request_wait()
2275 * - call nfs_wait_reply() to wait for the reply
2276 * nfs_request_finish()
2277 * - break down rpc header and return with error or nfs reply
2278 * pointed to by nmrep.
2279 * nfs_request_rele()
2280 * nfs_request_destroy()
2281 * - clean up the request struct
2282 * - free the request struct if it was allocated by nfs_request_create()
2283 */
2284
2285 /*
2286 * Set up an NFS request struct (allocating if no request passed in).
2287 */
2288 int
2289 nfs_request_create(
2290 nfsnode_t np,
2291 mount_t mp, /* used only if !np */
2292 struct nfsm_chain *nmrest,
2293 int procnum,
2294 thread_t thd,
2295 kauth_cred_t cred,
2296 struct nfsreq **reqp)
2297 {
2298 struct nfsreq *req, *newreq = NULL;
2299 struct nfsmount *nmp;
2300
2301 req = *reqp;
2302 if (!req) {
2303 /* allocate a new NFS request structure */
2304 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK);
2305 if (!newreq) {
2306 mbuf_freem(nmrest->nmc_mhead);
2307 nmrest->nmc_mhead = NULL;
2308 return (ENOMEM);
2309 }
2310 req = newreq;
2311 }
2312
2313 bzero(req, sizeof(*req));
2314 if (req == newreq)
2315 req->r_flags = R_ALLOCATED;
2316
2317 nmp = VFSTONFS(np ? NFSTOMP(np) : mp);
2318 if (!nmp) {
2319 if (newreq)
2320 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
2321 return (ENXIO);
2322 }
2323 lck_mtx_lock(&nmp->nm_lock);
2324 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
2325 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
2326 lck_mtx_unlock(&nmp->nm_lock);
2327 mbuf_freem(nmrest->nmc_mhead);
2328 nmrest->nmc_mhead = NULL;
2329 if (newreq)
2330 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
2331 return (ENXIO);
2332 }
2333
2334 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS))
2335 OSAddAtomic(1, &nfsstats.rpccnt[procnum]);
2336 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL))
2337 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
2338
2339 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL);
2340 req->r_nmp = nmp;
2341 req->r_np = np;
2342 req->r_thread = thd;
2343 if (IS_VALID_CRED(cred)) {
2344 kauth_cred_ref(cred);
2345 req->r_cred = cred;
2346 }
2347 req->r_procnum = procnum;
2348 if (proct[procnum] > 0)
2349 req->r_flags |= R_TIMING;
2350 req->r_nmrep.nmc_mhead = NULL;
2351 SLIST_INIT(&req->r_gss_seqlist);
2352 req->r_achain.tqe_next = NFSREQNOLIST;
2353 req->r_rchain.tqe_next = NFSREQNOLIST;
2354 req->r_cchain.tqe_next = NFSREQNOLIST;
2355
2356 lck_mtx_unlock(&nmp->nm_lock);
2357
2358 /* move the request mbuf chain to the nfsreq */
2359 req->r_mrest = nmrest->nmc_mhead;
2360 nmrest->nmc_mhead = NULL;
2361
2362 req->r_flags |= R_INITTED;
2363 req->r_refs = 1;
2364 if (newreq)
2365 *reqp = req;
2366 return (0);
2367 }
2368
2369 /*
2370 * Clean up and free an NFS request structure.
2371 */
2372 void
2373 nfs_request_destroy(struct nfsreq *req)
2374 {
2375 struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
2376 struct gss_seq *gsp, *ngsp;
2377 struct timespec ts = { 1, 0 };
2378 int clearjbtimeo = 0;
2379
2380 if (!req || !(req->r_flags & R_INITTED))
2381 return;
2382 req->r_flags &= ~R_INITTED;
2383 if (req->r_lflags & RL_QUEUED)
2384 nfs_reqdequeue(req);
2385 if (req->r_achain.tqe_next != NFSREQNOLIST) {
2386 /* still on an async I/O queue? */
2387 lck_mtx_lock(nfsiod_mutex);
2388 if (nmp && (req->r_achain.tqe_next != NFSREQNOLIST)) {
2389 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
2390 req->r_achain.tqe_next = NFSREQNOLIST;
2391 }
2392 lck_mtx_unlock(nfsiod_mutex);
2393 }
2394 lck_mtx_lock(&req->r_mtx);
2395 if (nmp) {
2396 lck_mtx_lock(&nmp->nm_lock);
2397 if (req->r_rchain.tqe_next != NFSREQNOLIST) {
2398 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
2399 req->r_rchain.tqe_next = NFSREQNOLIST;
2400 if (req->r_flags & R_RESENDQ)
2401 req->r_flags &= ~R_RESENDQ;
2402 }
2403 if (req->r_cchain.tqe_next != NFSREQNOLIST) {
2404 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
2405 req->r_cchain.tqe_next = NFSREQNOLIST;
2406 }
2407 if (req->r_flags & R_JBTPRINTFMSG) {
2408 req->r_flags &= ~R_JBTPRINTFMSG;
2409 nmp->nm_jbreqs--;
2410 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
2411 }
2412 lck_mtx_unlock(&nmp->nm_lock);
2413 }
2414 while (req->r_flags & R_RESENDQ)
2415 msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts);
2416 lck_mtx_unlock(&req->r_mtx);
2417 if (clearjbtimeo)
2418 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL);
2419 if (req->r_mhead)
2420 mbuf_freem(req->r_mhead);
2421 else if (req->r_mrest)
2422 mbuf_freem(req->r_mrest);
2423 if (req->r_nmrep.nmc_mhead)
2424 mbuf_freem(req->r_nmrep.nmc_mhead);
2425 if (IS_VALID_CRED(req->r_cred))
2426 kauth_cred_unref(&req->r_cred);
2427 if (req->r_gss_ctx)
2428 nfs_gss_clnt_rpcdone(req);
2429 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp)
2430 FREE(gsp, M_TEMP);
2431 if (req->r_gss_ctx)
2432 nfs_gss_clnt_ctx_unref(req);
2433
2434 lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
2435 if (req->r_flags & R_ALLOCATED)
2436 FREE_ZONE(req, sizeof(*req), M_NFSREQ);
2437 }
2438
2439 void
2440 nfs_request_ref(struct nfsreq *req, int locked)
2441 {
2442 if (!locked)
2443 lck_mtx_lock(&req->r_mtx);
2444 if (req->r_refs <= 0)
2445 panic("nfsreq reference error");
2446 req->r_refs++;
2447 if (!locked)
2448 lck_mtx_unlock(&req->r_mtx);
2449 }
2450
2451 void
2452 nfs_request_rele(struct nfsreq *req)
2453 {
2454 int destroy;
2455
2456 lck_mtx_lock(&req->r_mtx);
2457 if (req->r_refs <= 0)
2458 panic("nfsreq reference underflow");
2459 req->r_refs--;
2460 destroy = (req->r_refs == 0);
2461 lck_mtx_unlock(&req->r_mtx);
2462 if (destroy)
2463 nfs_request_destroy(req);
2464 }
2465
2466
2467 /*
2468 * Add an (updated) RPC header with authorization to an NFS request.
2469 */
2470 int
2471 nfs_request_add_header(struct nfsreq *req)
2472 {
2473 struct nfsmount *nmp;
2474 int error = 0, auth_len = 0;
2475 mbuf_t m;
2476
2477 /* free up any previous header */
2478 if ((m = req->r_mhead)) {
2479 while (m && (m != req->r_mrest))
2480 m = mbuf_free(m);
2481 req->r_mhead = NULL;
2482 }
2483
2484 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
2485 if (!nmp)
2486 return (ENXIO);
2487
2488 if (!req->r_cred) /* RPCAUTH_NULL */
2489 auth_len = 0;
2490 else switch (nmp->nm_auth) {
2491 case RPCAUTH_UNIX:
2492 if (req->r_cred->cr_ngroups < 1)
2493 return (EINVAL);
2494 auth_len = ((((req->r_cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
2495 nmp->nm_numgrps : (req->r_cred->cr_ngroups - 1)) << 2) +
2496 5 * NFSX_UNSIGNED;
2497 break;
2498 case RPCAUTH_KRB5:
2499 case RPCAUTH_KRB5I:
2500 case RPCAUTH_KRB5P:
2501 auth_len = 5 * NFSX_UNSIGNED + 0; // zero context handle for now
2502 break;
2503 }
2504
2505 error = nfsm_rpchead(req, auth_len, req->r_mrest, &req->r_xid, &req->r_mhead);
2506 if (error)
2507 return (error);
2508
2509 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead);
2510 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
2511 if (!nmp)
2512 return (ENXIO);
2513 lck_mtx_lock(&nmp->nm_lock);
2514 if (nmp->nm_flag & NFSMNT_SOFT)
2515 req->r_retry = nmp->nm_retry;
2516 else
2517 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
2518 lck_mtx_unlock(&nmp->nm_lock);
2519
2520 return (error);
2521 }
2522
2523
2524 /*
2525 * Queue an NFS request up and send it out.
2526 */
2527 int
2528 nfs_request_send(struct nfsreq *req, int wait)
2529 {
2530 struct nfsmount *nmp;
2531 struct timeval now;
2532
2533 lck_mtx_lock(&req->r_mtx);
2534 req->r_flags |= R_SENDING;
2535 lck_mtx_unlock(&req->r_mtx);
2536
2537 lck_mtx_lock(nfs_request_mutex);
2538
2539 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
2540 if (!nmp) {
2541 lck_mtx_unlock(nfs_request_mutex);
2542 return (ENXIO);
2543 }
2544
2545 microuptime(&now);
2546 if (!req->r_start) {
2547 req->r_start = now.tv_sec;
2548 req->r_lastmsg = now.tv_sec -
2549 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
2550 }
2551
2552 OSAddAtomic(1, &nfsstats.rpcrequests);
2553
2554 /*
2555 * Chain request into list of outstanding requests. Be sure
2556 * to put it LAST so timer finds oldest requests first.
2557 * Make sure that the request queue timer is running
2558 * to check for possible request timeout.
2559 */
2560 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain);
2561 req->r_lflags |= RL_QUEUED;
2562 if (!nfs_request_timer_on) {
2563 nfs_request_timer_on = 1;
2564 nfs_interval_timer_start(nfs_request_timer_call,
2565 NFS_REQUESTDELAY);
2566 }
2567 lck_mtx_unlock(nfs_request_mutex);
2568
2569 /* Send the request... */
2570 return (nfs_send(req, wait));
2571 }
2572
2573 /*
2574 * Call nfs_wait_reply() to wait for the reply.
2575 */
2576 void
2577 nfs_request_wait(struct nfsreq *req)
2578 {
2579 req->r_error = nfs_wait_reply(req);
2580 }
2581
2582 /*
2583 * Finish up an NFS request by dequeueing it and
2584 * doing the initial NFS request reply processing.
2585 */
2586 int
2587 nfs_request_finish(
2588 struct nfsreq *req,
2589 struct nfsm_chain *nmrepp,
2590 int *status)
2591 {
2592 struct nfsmount *nmp;
2593 mbuf_t mrep;
2594 int verf_type = 0;
2595 uint32_t verf_len = 0;
2596 uint32_t reply_status = 0;
2597 uint32_t rejected_status = 0;
2598 uint32_t auth_status = 0;
2599 uint32_t accepted_status = 0;
2600 struct nfsm_chain nmrep;
2601 int error, auth, clearjbtimeo;
2602
2603 error = req->r_error;
2604
2605 if (nmrepp)
2606 nmrepp->nmc_mhead = NULL;
2607
2608 /* RPC done, unlink the request. */
2609 nfs_reqdequeue(req);
2610
2611 mrep = req->r_nmrep.nmc_mhead;
2612
2613 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
2614
2615 /*
2616 * Decrement the outstanding request count.
2617 */
2618 if ((req->r_flags & R_CWND) && nmp) {
2619 req->r_flags &= ~R_CWND;
2620 lck_mtx_lock(&nmp->nm_lock);
2621 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
2622 nmp->nm_sent -= NFS_CWNDSCALE;
2623 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
2624 /* congestion window is open, poke the cwnd queue */
2625 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
2626 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
2627 req2->r_cchain.tqe_next = NFSREQNOLIST;
2628 wakeup(req2);
2629 }
2630 lck_mtx_unlock(&nmp->nm_lock);
2631 }
2632
2633 if (req->r_gss_ctx) { // Using gss cred ?
2634 /*
2635 * If the request had an RPCSEC_GSS credential
2636 * then reset its sequence number bit in the
2637 * request window.
2638 */
2639 nfs_gss_clnt_rpcdone(req);
2640
2641 /*
2642 * If we need to re-send, go back and re-build the
2643 * request based on a new sequence number.
2644 * Note that we're using the original XID.
2645 */
2646 if (error == EAGAIN) {
2647 req->r_error = 0;
2648 if (mrep)
2649 mbuf_freem(mrep);
2650 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs
2651 req->r_nmrep.nmc_mhead = NULL;
2652 req->r_flags |= R_RESTART;
2653 if (error == ENEEDAUTH) {
2654 req->r_xid = 0; // get a new XID
2655 error = 0;
2656 }
2657 goto nfsmout;
2658 }
2659 }
2660
2661 /*
2662 * If there was a successful reply, make sure to mark the mount as up.
2663 * If a tprintf message was given (or if this is a timed-out soft mount)
2664 * then post a tprintf message indicating the server is alive again.
2665 */
2666 if (!error) {
2667 if ((req->r_flags & R_TPRINTFMSG) ||
2668 (nmp && (nmp->nm_flag & NFSMNT_SOFT) &&
2669 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO)))
2670 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again");
2671 else
2672 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL);
2673 }
2674 if (!error && !nmp)
2675 error = ENXIO;
2676 nfsmout_if(error);
2677
2678 /*
2679 * break down the RPC header and check if ok
2680 */
2681 nmrep = req->r_nmrep;
2682 nfsm_chain_get_32(error, &nmrep, reply_status);
2683 nfsmout_if(error);
2684 if (reply_status == RPC_MSGDENIED) {
2685 nfsm_chain_get_32(error, &nmrep, rejected_status);
2686 nfsmout_if(error);
2687 if (rejected_status == RPC_MISMATCH) {
2688 error = ENOTSUP;
2689 goto nfsmout;
2690 }
2691 nfsm_chain_get_32(error, &nmrep, auth_status);
2692 nfsmout_if(error);
2693 switch (auth_status) {
2694 case RPCSEC_GSS_CREDPROBLEM:
2695 case RPCSEC_GSS_CTXPROBLEM:
2696 /*
2697 * An RPCSEC_GSS cred or context problem.
2698 * We can't use it anymore.
2699 * Restore the args, renew the context
2700 * and set up for a resend.
2701 */
2702 error = nfs_gss_clnt_args_restore(req);
2703 if (error && error != ENEEDAUTH)
2704 break;
2705
2706 if (!error) {
2707 error = nfs_gss_clnt_ctx_renew(req);
2708 if (error)
2709 break;
2710 }
2711 mbuf_freem(mrep);
2712 req->r_nmrep.nmc_mhead = NULL;
2713 req->r_xid = 0; // get a new XID
2714 req->r_flags |= R_RESTART;
2715 goto nfsmout;
2716 default:
2717 error = EACCES;
2718 break;
2719 }
2720 goto nfsmout;
2721 }
2722
2723 /* Now check the verifier */
2724 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor
2725 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length
2726 nfsmout_if(error);
2727
2728 auth = !req->r_cred ? RPCAUTH_NULL : nmp->nm_auth;
2729 switch (auth) {
2730 case RPCAUTH_NULL:
2731 case RPCAUTH_UNIX:
2732 /* Any AUTH_UNIX verifier is ignored */
2733 if (verf_len > 0)
2734 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
2735 nfsm_chain_get_32(error, &nmrep, accepted_status);
2736 break;
2737 case RPCAUTH_KRB5:
2738 case RPCAUTH_KRB5I:
2739 case RPCAUTH_KRB5P:
2740 error = nfs_gss_clnt_verf_get(req, &nmrep,
2741 verf_type, verf_len, &accepted_status);
2742 break;
2743 }
2744 nfsmout_if(error);
2745
2746 switch (accepted_status) {
2747 case RPC_SUCCESS:
2748 if (req->r_procnum == NFSPROC_NULL) {
2749 /*
2750 * The NFS null procedure is unique,
2751 * in not returning an NFS status.
2752 */
2753 *status = NFS_OK;
2754 } else {
2755 nfsm_chain_get_32(error, &nmrep, *status);
2756 nfsmout_if(error);
2757 }
2758
2759 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) {
2760 /*
2761 * It's a JUKEBOX error - delay and try again
2762 */
2763 int delay, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
2764
2765 mbuf_freem(mrep);
2766 req->r_nmrep.nmc_mhead = NULL;
2767 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) {
2768 /* we're not yet completely mounted and */
2769 /* we can't complete an RPC, so we fail */
2770 OSAddAtomic(1, &nfsstats.rpctimeouts);
2771 nfs_softterm(req);
2772 error = req->r_error;
2773 goto nfsmout;
2774 }
2775 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2);
2776 if (req->r_delay > 30)
2777 req->r_delay = 30;
2778 if (nmp->nm_tprintf_initial_delay && (req->r_delay >= nmp->nm_tprintf_initial_delay)) {
2779 if (!(req->r_flags & R_JBTPRINTFMSG)) {
2780 req->r_flags |= R_JBTPRINTFMSG;
2781 lck_mtx_lock(&nmp->nm_lock);
2782 nmp->nm_jbreqs++;
2783 lck_mtx_unlock(&nmp->nm_lock);
2784 }
2785 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO,
2786 "resource temporarily unavailable (jukebox)");
2787 }
2788 if ((nmp->nm_flag & NFSMNT_SOFT) && (req->r_delay == 30)) {
2789 /* for soft mounts, just give up after a short while */
2790 OSAddAtomic(1, &nfsstats.rpctimeouts);
2791 nfs_softterm(req);
2792 error = req->r_error;
2793 goto nfsmout;
2794 }
2795 delay = req->r_delay;
2796 if (req->r_callback.rcb_func) {
2797 struct timeval now;
2798 microuptime(&now);
2799 req->r_resendtime = now.tv_sec + delay;
2800 } else {
2801 do {
2802 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
2803 goto nfsmout;
2804 tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0);
2805 } while (--delay > 0);
2806 }
2807 req->r_xid = 0; // get a new XID
2808 req->r_flags |= R_RESTART;
2809 req->r_start = 0;
2810 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER);
2811 return (0);
2812 }
2813
2814 if (req->r_flags & R_JBTPRINTFMSG) {
2815 req->r_flags &= ~R_JBTPRINTFMSG;
2816 lck_mtx_lock(&nmp->nm_lock);
2817 nmp->nm_jbreqs--;
2818 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
2819 lck_mtx_unlock(&nmp->nm_lock);
2820 nfs_up(nmp, req->r_thread, clearjbtimeo, "resource available again");
2821 }
2822
2823 if (*status == NFS_OK) {
2824 /*
2825 * Successful NFS request
2826 */
2827 *nmrepp = nmrep;
2828 req->r_nmrep.nmc_mhead = NULL;
2829 break;
2830 }
2831 /* Got an NFS error of some kind */
2832
2833 /*
2834 * If the File Handle was stale, invalidate the
2835 * lookup cache, just in case.
2836 */
2837 if ((*status == ESTALE) && req->r_np)
2838 cache_purge(NFSTOV(req->r_np));
2839 if (nmp->nm_vers == NFS_VER2)
2840 mbuf_freem(mrep);
2841 else
2842 *nmrepp = nmrep;
2843 req->r_nmrep.nmc_mhead = NULL;
2844 error = 0;
2845 break;
2846 case RPC_PROGUNAVAIL:
2847 error = EPROGUNAVAIL;
2848 break;
2849 case RPC_PROGMISMATCH:
2850 error = ERPCMISMATCH;
2851 break;
2852 case RPC_PROCUNAVAIL:
2853 error = EPROCUNAVAIL;
2854 break;
2855 case RPC_GARBAGE:
2856 error = EBADRPC;
2857 break;
2858 case RPC_SYSTEM_ERR:
2859 default:
2860 error = EIO;
2861 break;
2862 }
2863 nfsmout:
2864 if (req->r_flags & R_JBTPRINTFMSG) {
2865 req->r_flags &= ~R_JBTPRINTFMSG;
2866 lck_mtx_lock(&nmp->nm_lock);
2867 nmp->nm_jbreqs--;
2868 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0;
2869 lck_mtx_unlock(&nmp->nm_lock);
2870 if (clearjbtimeo)
2871 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL);
2872 }
2873 FSDBG(273, R_XID32(req->r_xid), nmp, req,
2874 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error);
2875 return (error);
2876 }
2877
2878
2879 /*
2880 * Perform an NFS request synchronously.
2881 */
2882
2883 int
2884 nfs_request(
2885 nfsnode_t np,
2886 mount_t mp, /* used only if !np */
2887 struct nfsm_chain *nmrest,
2888 int procnum,
2889 vfs_context_t ctx,
2890 struct nfsm_chain *nmrepp,
2891 u_int64_t *xidp,
2892 int *status)
2893 {
2894 return nfs_request2(np, mp, nmrest, procnum,
2895 vfs_context_thread(ctx), vfs_context_ucred(ctx),
2896 0, nmrepp, xidp, status);
2897 }
2898
2899 int
2900 nfs_request2(
2901 nfsnode_t np,
2902 mount_t mp, /* used only if !np */
2903 struct nfsm_chain *nmrest,
2904 int procnum,
2905 thread_t thd,
2906 kauth_cred_t cred,
2907 int flags,
2908 struct nfsm_chain *nmrepp,
2909 u_int64_t *xidp,
2910 int *status)
2911 {
2912 struct nfsreq rq, *req = &rq;
2913 int error;
2914
2915 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req)))
2916 return (error);
2917 req->r_flags |= (flags & R_OPTMASK);
2918
2919 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0);
2920 do {
2921 req->r_error = 0;
2922 req->r_flags &= ~R_RESTART;
2923 if ((error = nfs_request_add_header(req)))
2924 break;
2925 if (xidp)
2926 *xidp = req->r_xid;
2927 if ((error = nfs_request_send(req, 1)))
2928 break;
2929 nfs_request_wait(req);
2930 if ((error = nfs_request_finish(req, nmrepp, status)))
2931 break;
2932 } while (req->r_flags & R_RESTART);
2933
2934 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error);
2935 nfs_request_rele(req);
2936 return (error);
2937 }
2938
2939
2940 /*
2941 * Set up a new null proc request to exchange GSS context tokens with the
2942 * server. Associate the context that we are setting up with the request that we
2943 * are sending.
2944 */
2945
2946 int
2947 nfs_request_gss(
2948 mount_t mp,
2949 struct nfsm_chain *nmrest,
2950 thread_t thd,
2951 kauth_cred_t cred,
2952 int flags,
2953 struct nfs_gss_clnt_ctx *cp, /* Set to gss context to renew or setup */
2954 struct nfsm_chain *nmrepp,
2955 int *status)
2956 {
2957 struct nfsreq rq, *req = &rq;
2958 int error;
2959
2960 if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req)))
2961 return (error);
2962 req->r_flags |= (flags & R_OPTMASK);
2963
2964 if (cp == NULL) {
2965 printf("nfs_request_gss request has no context\n");
2966 nfs_request_rele(req);
2967 return (NFSERR_EAUTH);
2968 }
2969 nfs_gss_clnt_ctx_ref(req, cp);
2970
2971 FSDBG_TOP(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, 0);
2972 do {
2973 req->r_error = 0;
2974 req->r_flags &= ~R_RESTART;
2975 if ((error = nfs_request_add_header(req)))
2976 break;
2977
2978 if ((error = nfs_request_send(req, 1)))
2979 break;
2980 nfs_request_wait(req);
2981 if ((error = nfs_request_finish(req, nmrepp, status)))
2982 break;
2983 } while (req->r_flags & R_RESTART);
2984
2985 FSDBG_BOT(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, error);
2986 nfs_request_rele(req);
2987 return (error);
2988 }
2989
2990 /*
2991 * Create and start an asynchronous NFS request.
2992 */
2993 int
2994 nfs_request_async(
2995 nfsnode_t np,
2996 mount_t mp, /* used only if !np */
2997 struct nfsm_chain *nmrest,
2998 int procnum,
2999 thread_t thd,
3000 kauth_cred_t cred,
3001 struct nfsreq_cbinfo *cb,
3002 struct nfsreq **reqp)
3003 {
3004 struct nfsreq *req;
3005 int error, sent;
3006
3007 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp);
3008 req = *reqp;
3009 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error);
3010 if (error)
3011 return (error);
3012 req->r_flags |= R_ASYNC;
3013 if (cb)
3014 req->r_callback = *cb;
3015 error = nfs_request_add_header(req);
3016 if (!error) {
3017 req->r_flags |= R_WAITSENT;
3018 if (req->r_callback.rcb_func)
3019 nfs_request_ref(req, 0);
3020 error = nfs_request_send(req, 1);
3021 lck_mtx_lock(&req->r_mtx);
3022 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) {
3023 /* make sure to wait until this async I/O request gets sent */
3024 int slpflag = (req->r_nmp && (req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
3025 struct timespec ts = { 2, 0 };
3026 while (!(req->r_flags & R_SENT)) {
3027 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
3028 break;
3029 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts);
3030 slpflag = 0;
3031 }
3032 }
3033 sent = req->r_flags & R_SENT;
3034 lck_mtx_unlock(&req->r_mtx);
3035 if (error && req->r_callback.rcb_func && !sent)
3036 nfs_request_rele(req);
3037 }
3038 FSDBG(274, R_XID32(req->r_xid), np, procnum, error);
3039 if (error || req->r_callback.rcb_func)
3040 nfs_request_rele(req);
3041 return (error);
3042 }
3043
3044 /*
3045 * Wait for and finish an asynchronous NFS request.
3046 */
3047 int
3048 nfs_request_async_finish(
3049 struct nfsreq *req,
3050 struct nfsm_chain *nmrepp,
3051 u_int64_t *xidp,
3052 int *status)
3053 {
3054 int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0;
3055
3056 lck_mtx_lock(&req->r_mtx);
3057 if (!asyncio)
3058 req->r_flags |= R_ASYNCWAIT;
3059 while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */
3060 struct timespec ts = { 2, 0 };
3061 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
3062 break;
3063 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", &ts);
3064 }
3065 lck_mtx_unlock(&req->r_mtx);
3066
3067 if (!error) {
3068 nfs_request_wait(req);
3069 error = nfs_request_finish(req, nmrepp, status);
3070 }
3071
3072 while (!error && (req->r_flags & R_RESTART)) {
3073 if (asyncio && req->r_resendtime) { /* send later */
3074 lck_mtx_lock(&req->r_mtx);
3075 nfs_asyncio_resend(req);
3076 lck_mtx_unlock(&req->r_mtx);
3077 return (EINPROGRESS);
3078 }
3079 req->r_error = 0;
3080 req->r_flags &= ~R_RESTART;
3081 if ((error = nfs_request_add_header(req)))
3082 break;
3083 if ((error = nfs_request_send(req, !asyncio)))
3084 break;
3085 if (asyncio)
3086 return (EINPROGRESS);
3087 nfs_request_wait(req);
3088 if ((error = nfs_request_finish(req, nmrepp, status)))
3089 break;
3090 }
3091 if (xidp)
3092 *xidp = req->r_xid;
3093
3094 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error);
3095 nfs_request_rele(req);
3096 return (error);
3097 }
3098
3099 /*
3100 * Cancel a pending asynchronous NFS request.
3101 */
3102 void
3103 nfs_request_async_cancel(struct nfsreq *req)
3104 {
3105 nfs_reqdequeue(req);
3106 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E);
3107 nfs_request_rele(req);
3108 }
3109
3110 /*
3111 * Flag a request as being terminated.
3112 */
3113 void
3114 nfs_softterm(struct nfsreq *req)
3115 {
3116 struct nfsmount *nmp = req->r_nmp;
3117 req->r_flags |= R_SOFTTERM;
3118 req->r_error = ETIMEDOUT;
3119 if (!(req->r_flags & R_CWND) || !nmp)
3120 return;
3121 /* update congestion window */
3122 req->r_flags &= ~R_CWND;
3123 lck_mtx_lock(&nmp->nm_lock);
3124 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
3125 nmp->nm_sent -= NFS_CWNDSCALE;
3126 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
3127 /* congestion window is open, poke the cwnd queue */
3128 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
3129 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
3130 req2->r_cchain.tqe_next = NFSREQNOLIST;
3131 wakeup(req2);
3132 }
3133 lck_mtx_unlock(&nmp->nm_lock);
3134 }
3135
3136 /*
3137 * Ensure req isn't in use by the timer, then dequeue it.
3138 */
3139 void
3140 nfs_reqdequeue(struct nfsreq *req)
3141 {
3142 lck_mtx_lock(nfs_request_mutex);
3143 while (req->r_lflags & RL_BUSY) {
3144 req->r_lflags |= RL_WAITING;
3145 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL);
3146 }
3147 if (req->r_lflags & RL_QUEUED) {
3148 TAILQ_REMOVE(&nfs_reqq, req, r_chain);
3149 req->r_lflags &= ~RL_QUEUED;
3150 }
3151 lck_mtx_unlock(nfs_request_mutex);
3152 }
3153
3154 /*
3155 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
3156 * free()'d out from under it.
3157 */
3158 void
3159 nfs_reqbusy(struct nfsreq *req)
3160 {
3161 if (req->r_lflags & RL_BUSY)
3162 panic("req locked");
3163 req->r_lflags |= RL_BUSY;
3164 }
3165
3166 /*
3167 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
3168 */
3169 struct nfsreq *
3170 nfs_reqnext(struct nfsreq *req)
3171 {
3172 struct nfsreq * nextreq;
3173
3174 if (req == NULL)
3175 return (NULL);
3176 /*
3177 * We need to get and busy the next req before signalling the
3178 * current one, otherwise wakeup() may block us and we'll race to
3179 * grab the next req.
3180 */
3181 nextreq = TAILQ_NEXT(req, r_chain);
3182 if (nextreq != NULL)
3183 nfs_reqbusy(nextreq);
3184 /* unbusy and signal. */
3185 req->r_lflags &= ~RL_BUSY;
3186 if (req->r_lflags & RL_WAITING) {
3187 req->r_lflags &= ~RL_WAITING;
3188 wakeup(&req->r_lflags);
3189 }
3190 return (nextreq);
3191 }
3192
3193 /*
3194 * NFS request queue timer routine
3195 *
3196 * Scan the NFS request queue for any requests that have timed out.
3197 *
3198 * Alert the system of unresponsive servers.
3199 * Mark expired requests on soft mounts as terminated.
3200 * For UDP, mark/signal requests for retransmission.
3201 */
3202 void
3203 nfs_request_timer(__unused void *param0, __unused void *param1)
3204 {
3205 struct nfsreq *req;
3206 struct nfsmount *nmp;
3207 int timeo, maxtime, finish_asyncio, error;
3208 struct timeval now;
3209 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue;
3210
3211 lck_mtx_lock(nfs_request_mutex);
3212 req = TAILQ_FIRST(&nfs_reqq);
3213 if (req == NULL) { /* no requests - turn timer off */
3214 nfs_request_timer_on = 0;
3215 lck_mtx_unlock(nfs_request_mutex);
3216 return;
3217 }
3218
3219 nfs_reqbusy(req);
3220 TAILQ_INIT(&nfs_mount_poke_queue);
3221
3222 microuptime(&now);
3223 for ( ; req != NULL ; req = nfs_reqnext(req)) {
3224 nmp = req->r_nmp;
3225 if (!nmp) /* unmounted */
3226 continue;
3227 if (req->r_error || req->r_nmrep.nmc_mhead)
3228 continue;
3229 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) {
3230 if (req->r_callback.rcb_func != NULL) {
3231 /* async I/O RPC needs to be finished */
3232 lck_mtx_lock(&req->r_mtx);
3233 req->r_error = error;
3234 finish_asyncio = !(req->r_flags & R_WAITSENT);
3235 wakeup(req);
3236 lck_mtx_unlock(&req->r_mtx);
3237 if (finish_asyncio)
3238 nfs_asyncio_finish(req);
3239 }
3240 continue;
3241 }
3242
3243 lck_mtx_lock(&req->r_mtx);
3244
3245 if (nmp->nm_tprintf_initial_delay &&
3246 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) &&
3247 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
3248 req->r_lastmsg = now.tv_sec;
3249 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
3250 "not responding");
3251 req->r_flags |= R_TPRINTFMSG;
3252 lck_mtx_lock(&nmp->nm_lock);
3253 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
3254 lck_mtx_unlock(&nmp->nm_lock);
3255 /* we're not yet completely mounted and */
3256 /* we can't complete an RPC, so we fail */
3257 OSAddAtomic(1, &nfsstats.rpctimeouts);
3258 nfs_softterm(req);
3259 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
3260 wakeup(req);
3261 lck_mtx_unlock(&req->r_mtx);
3262 if (finish_asyncio)
3263 nfs_asyncio_finish(req);
3264 continue;
3265 }
3266 lck_mtx_unlock(&nmp->nm_lock);
3267 }
3268
3269 /*
3270 * Put a reasonable limit on the maximum timeout,
3271 * and reduce that limit when soft mounts get timeouts or are in reconnect.
3272 */
3273 if (!(nmp->nm_flag & NFSMNT_SOFT))
3274 maxtime = NFS_MAXTIMEO;
3275 else if ((req->r_flags & (R_SETUP|R_RECOVER)) ||
3276 ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
3277 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2;
3278 else
3279 maxtime = NFS_MINTIMEO/4;
3280
3281 /*
3282 * Check for request timeout.
3283 */
3284 if (req->r_rtt >= 0) {
3285 req->r_rtt++;
3286 lck_mtx_lock(&nmp->nm_lock);
3287 if (req->r_flags & R_RESENDERR) {
3288 /* with resend errors, retry every few seconds */
3289 timeo = 4*hz;
3290 } else {
3291 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL)
3292 timeo = NFS_MINIDEMTIMEO; // gss context setup
3293 else if (nmp->nm_flag & NFSMNT_DUMBTIMR)
3294 timeo = nmp->nm_timeo;
3295 else
3296 timeo = NFS_RTO(nmp, proct[req->r_procnum]);
3297
3298 /* ensure 62.5 ms floor */
3299 while (16 * timeo < hz)
3300 timeo *= 2;
3301 if (nmp->nm_timeouts > 0)
3302 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
3303 }
3304 /* limit timeout to max */
3305 if (timeo > maxtime)
3306 timeo = maxtime;
3307 if (req->r_rtt <= timeo) {
3308 lck_mtx_unlock(&nmp->nm_lock);
3309 lck_mtx_unlock(&req->r_mtx);
3310 continue;
3311 }
3312 /* The request has timed out */
3313 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
3314 req->r_procnum, proct[req->r_procnum],
3315 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts,
3316 (now.tv_sec - req->r_start)*NFS_HZ, maxtime));
3317 if (nmp->nm_timeouts < 8)
3318 nmp->nm_timeouts++;
3319 nfs_mount_check_dead_timeout(nmp);
3320 /* if it's been a few seconds, try poking the socket */
3321 if ((nmp->nm_sotype == SOCK_STREAM) &&
3322 ((now.tv_sec - req->r_start) >= 3) &&
3323 !(nmp->nm_sockflags & NMSOCK_POKE)) {
3324 nmp->nm_sockflags |= NMSOCK_POKE;
3325 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq);
3326 }
3327 lck_mtx_unlock(&nmp->nm_lock);
3328 }
3329
3330 /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */
3331 if (((nmp->nm_flag & NFSMNT_SOFT) || (req->r_flags & (R_SETUP|R_RECOVER))) &&
3332 ((req->r_rexmit >= req->r_retry) || /* too many */
3333 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */
3334 OSAddAtomic(1, &nfsstats.rpctimeouts);
3335 lck_mtx_lock(&nmp->nm_lock);
3336 if (!(nmp->nm_state & NFSSTA_TIMEO)) {
3337 lck_mtx_unlock(&nmp->nm_lock);
3338 /* make sure we note the unresponsive server */
3339 /* (maxtime may be less than tprintf delay) */
3340 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
3341 "not responding");
3342 req->r_lastmsg = now.tv_sec;
3343 req->r_flags |= R_TPRINTFMSG;
3344 } else {
3345 lck_mtx_unlock(&nmp->nm_lock);
3346 }
3347 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
3348 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt,
3349 now.tv_sec - req->r_start));
3350 nfs_softterm(req);
3351 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
3352 wakeup(req);
3353 lck_mtx_unlock(&req->r_mtx);
3354 if (finish_asyncio)
3355 nfs_asyncio_finish(req);
3356 continue;
3357 }
3358
3359 /* for TCP, only resend if explicitly requested */
3360 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) {
3361 if (++req->r_rexmit > NFS_MAXREXMIT)
3362 req->r_rexmit = NFS_MAXREXMIT;
3363 req->r_rtt = 0;
3364 lck_mtx_unlock(&req->r_mtx);
3365 continue;
3366 }
3367
3368 /*
3369 * The request needs to be (re)sent. Kick the requester to resend it.
3370 * (unless it's already marked as needing a resend)
3371 */
3372 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) {
3373 lck_mtx_unlock(&req->r_mtx);
3374 continue;
3375 }
3376 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
3377 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
3378 req->r_flags |= R_MUSTRESEND;
3379 req->r_rtt = -1;
3380 wakeup(req);
3381 if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
3382 nfs_asyncio_resend(req);
3383 lck_mtx_unlock(&req->r_mtx);
3384 }
3385
3386 lck_mtx_unlock(nfs_request_mutex);
3387
3388 /* poke any sockets */
3389 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) {
3390 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
3391 nfs_sock_poke(nmp);
3392 lck_mtx_lock(&nmp->nm_lock);
3393 nmp->nm_sockflags &= ~NMSOCK_POKE;
3394 if (!(nmp->nm_state & NFSSTA_MOUNTED))
3395 wakeup(&nmp->nm_sockflags);
3396 lck_mtx_unlock(&nmp->nm_lock);
3397 }
3398
3399 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY);
3400 }
3401
3402 /*
3403 * check a thread's proc for the "noremotehang" flag.
3404 */
3405 int
3406 nfs_noremotehang(thread_t thd)
3407 {
3408 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
3409 return (p && proc_noremotehang(p));
3410 }
3411
3412 /*
3413 * Test for a termination condition pending on the process.
3414 * This is used to determine if we need to bail on a mount.
3415 * ETIMEDOUT is returned if there has been a soft timeout.
3416 * EINTR is returned if there is a signal pending that is not being ignored
3417 * and the mount is interruptable, or if we are a thread that is in the process
3418 * of cancellation (also SIGKILL posted).
3419 */
3420 int
3421 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked)
3422 {
3423 proc_t p;
3424 int error = 0;
3425
3426 if (nmp == NULL)
3427 return (ENXIO);
3428
3429 if (req && (req->r_flags & R_SOFTTERM))
3430 return (ETIMEDOUT); /* request has been terminated. */
3431
3432 /*
3433 * If we're in the progress of a force unmount and there's
3434 * been a timeout, we're dead and fail IO.
3435 */
3436 if (!nmplocked)
3437 lck_mtx_lock(&nmp->nm_lock);
3438 if ((nmp->nm_state & NFSSTA_FORCE) &&
3439 (nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO|NFSSTA_LOCKTIMEO))) {
3440 error = EIO;
3441 } else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) {
3442 /* Someone is unmounting us, go soft and mark it. */
3443 nmp->nm_flag |= NFSMNT_SOFT;
3444 nmp->nm_state |= NFSSTA_FORCE;
3445 }
3446
3447 /* Check if the mount is marked dead. */
3448 if (!error && (nmp->nm_state & NFSSTA_DEAD))
3449 error = ENXIO;
3450
3451 /*
3452 * If the mount is hung and we've requested not to hang
3453 * on remote filesystems, then bail now.
3454 */
3455 if (!error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd))
3456 error = EIO;
3457
3458 if (!nmplocked)
3459 lck_mtx_unlock(&nmp->nm_lock);
3460 if (error)
3461 return (error);
3462
3463 /* may not have a thread for async I/O */
3464 if (thd == NULL)
3465 return (0);
3466
3467 /* If this thread belongs to kernel task; then abort check is not needed */
3468 if ((current_proc() != kernproc) && current_thread_aborted())
3469 return (EINTR);
3470
3471 /* mask off thread and process blocked signals. */
3472 if ((nmp->nm_flag & NFSMNT_INT) && ((p = get_bsdthreadtask_info(thd))) &&
3473 proc_pendingsignals(p, NFSINT_SIGMASK))
3474 return (EINTR);
3475 return (0);
3476 }
3477
3478 /*
3479 * Lock a socket against others.
3480 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
3481 * and also to avoid race conditions between the processes with nfs requests
3482 * in progress when a reconnect is necessary.
3483 */
3484 int
3485 nfs_sndlock(struct nfsreq *req)
3486 {
3487 struct nfsmount *nmp = req->r_nmp;
3488 int *statep;
3489 int error = 0, slpflag = 0;
3490 struct timespec ts = { 0, 0 };
3491
3492 if (nmp == NULL)
3493 return (ENXIO);
3494
3495 lck_mtx_lock(&nmp->nm_lock);
3496 statep = &nmp->nm_state;
3497
3498 if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
3499 slpflag = PCATCH;
3500 while (*statep & NFSSTA_SNDLOCK) {
3501 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1)))
3502 break;
3503 *statep |= NFSSTA_WANTSND;
3504 if (nfs_noremotehang(req->r_thread))
3505 ts.tv_sec = 1;
3506 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts);
3507 if (slpflag == PCATCH) {
3508 slpflag = 0;
3509 ts.tv_sec = 2;
3510 }
3511 }
3512 if (!error)
3513 *statep |= NFSSTA_SNDLOCK;
3514 lck_mtx_unlock(&nmp->nm_lock);
3515 return (error);
3516 }
3517
3518 /*
3519 * Unlock the stream socket for others.
3520 */
3521 void
3522 nfs_sndunlock(struct nfsreq *req)
3523 {
3524 struct nfsmount *nmp = req->r_nmp;
3525 int *statep, wake = 0;
3526
3527 if (nmp == NULL)
3528 return;
3529 lck_mtx_lock(&nmp->nm_lock);
3530 statep = &nmp->nm_state;
3531 if ((*statep & NFSSTA_SNDLOCK) == 0)
3532 panic("nfs sndunlock");
3533 *statep &= ~NFSSTA_SNDLOCK;
3534 if (*statep & NFSSTA_WANTSND) {
3535 *statep &= ~NFSSTA_WANTSND;
3536 wake = 1;
3537 }
3538 lck_mtx_unlock(&nmp->nm_lock);
3539 if (wake)
3540 wakeup(statep);
3541 }
3542
3543 int
3544 nfs_aux_request(
3545 struct nfsmount *nmp,
3546 thread_t thd,
3547 struct sockaddr_in *saddr,
3548 mbuf_t mreq,
3549 uint32_t xid,
3550 int bindresv,
3551 int timeo,
3552 struct nfsm_chain *nmrep)
3553 {
3554 int error = 0, on = 1, try, sendat = 2;
3555 socket_t so = NULL;
3556 struct sockaddr_in sin;
3557 struct timeval tv = { 1, 0 };
3558 mbuf_t m, mrep = NULL;
3559 struct msghdr msg;
3560 uint32_t rxid = 0, reply = 0, reply_status, rejected_status;
3561 uint32_t verf_type, verf_len, accepted_status;
3562 size_t readlen;
3563
3564 /* create socket and set options */
3565 if (((error = sock_socket(saddr->sin_family, SOCK_DGRAM, IPPROTO_UDP, NULL, NULL, &so))) ||
3566 ((error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) ||
3567 ((error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) ||
3568 ((error = sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)))))
3569 goto nfsmout;
3570 if (bindresv) {
3571 int portrange = IP_PORTRANGE_LOW;
3572 error = sock_setsockopt(so, IPPROTO_IP, IP_PORTRANGE, &portrange, sizeof(portrange));
3573 nfsmout_if(error);
3574 /* bind now to check for failure */
3575 sin.sin_len = sizeof (struct sockaddr_in);
3576 sin.sin_family = AF_INET;
3577 sin.sin_addr.s_addr = INADDR_ANY;
3578 sin.sin_port = 0;
3579 error = sock_bind(so, (struct sockaddr *) &sin);
3580 nfsmout_if(error);
3581 }
3582
3583 for (try=0; try < timeo; try++) {
3584 if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
3585 break;
3586 if (!try || (try == sendat)) {
3587 /* send the request (resending periodically) */
3588 if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m)))
3589 goto nfsmout;
3590 bzero(&msg, sizeof(msg));
3591 msg.msg_name = saddr;
3592 msg.msg_namelen = saddr->sin_len;
3593 if ((error = sock_sendmbuf(so, &msg, m, 0, NULL)))
3594 goto nfsmout;
3595 sendat *= 2;
3596 if (sendat > 30)
3597 sendat = 30;
3598 }
3599 /* wait for the response */
3600 readlen = 1<<18;
3601 bzero(&msg, sizeof(msg));
3602 error = sock_receivembuf(so, &msg, &mrep, 0, &readlen);
3603 if (error == EWOULDBLOCK)
3604 continue;
3605 nfsmout_if(error);
3606 /* parse the response */
3607 nfsm_chain_dissect_init(error, nmrep, mrep);
3608 nfsm_chain_get_32(error, nmrep, rxid);
3609 nfsm_chain_get_32(error, nmrep, reply);
3610 nfsmout_if(error);
3611 if ((rxid != xid) || (reply != RPC_REPLY))
3612 error = EBADRPC;
3613 nfsm_chain_get_32(error, nmrep, reply_status);
3614 nfsmout_if(error);
3615 if (reply_status == RPC_MSGDENIED) {
3616 nfsm_chain_get_32(error, nmrep, rejected_status);
3617 nfsmout_if(error);
3618 error = (rejected_status == RPC_MISMATCH) ? ENOTSUP : EACCES;
3619 goto nfsmout;
3620 }
3621 nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */
3622 nfsm_chain_get_32(error, nmrep, verf_len); /* verifier length */
3623 nfsmout_if(error);
3624 if (verf_len)
3625 nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len));
3626 nfsm_chain_get_32(error, nmrep, accepted_status);
3627 nfsm_assert(error, (accepted_status == RPC_SUCCESS), EIO);
3628 break;
3629 }
3630 nfsmout:
3631 if (so) {
3632 sock_shutdown(so, SHUT_RDWR);
3633 sock_close(so);
3634 }
3635 mbuf_freem(mreq);
3636 return (error);
3637 }
3638
3639 int
3640 nfs_msg(thread_t thd,
3641 const char *server,
3642 const char *msg,
3643 int error)
3644 {
3645 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
3646 tpr_t tpr;
3647
3648 if (p)
3649 tpr = tprintf_open(p);
3650 else
3651 tpr = NULL;
3652 if (error)
3653 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error);
3654 else
3655 tprintf(tpr, "nfs server %s: %s\n", server, msg);
3656 tprintf_close(tpr);
3657 return (0);
3658 }
3659
3660 void
3661 nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg)
3662 {
3663 int timeoutmask, wasunresponsive, unresponsive, softnobrowse;
3664 uint32_t do_vfs_signal;
3665 struct timeval now;
3666
3667 if (nmp == NULL)
3668 return;
3669
3670 lck_mtx_lock(&nmp->nm_lock);
3671
3672 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
3673 if (nmp->nm_flag & NFSMNT_MUTEJUKEBOX) /* jukebox timeouts don't count as unresponsive if muted */
3674 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
3675 wasunresponsive = (nmp->nm_state & timeoutmask);
3676
3677 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
3678 softnobrowse = ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
3679
3680 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO))
3681 nmp->nm_state |= NFSSTA_TIMEO;
3682 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO))
3683 nmp->nm_state |= NFSSTA_LOCKTIMEO;
3684 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(nmp->nm_state & NFSSTA_JUKEBOXTIMEO))
3685 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO;
3686
3687 unresponsive = (nmp->nm_state & timeoutmask);
3688
3689 if (unresponsive && (nmp->nm_flag & NFSMNT_DEADTIMEOUT)) {
3690 microuptime(&now);
3691 if (!wasunresponsive) {
3692 nmp->nm_deadto_start = now.tv_sec;
3693 nfs_mount_sock_thread_wake(nmp);
3694 } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_deadtimeout) {
3695 if (!(nmp->nm_state & NFSSTA_DEAD))
3696 printf("nfs server %s: dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
3697 nmp->nm_state |= NFSSTA_DEAD;
3698 }
3699 }
3700 lck_mtx_unlock(&nmp->nm_lock);
3701
3702 if (nmp->nm_state & NFSSTA_DEAD)
3703 do_vfs_signal = VQ_DEAD;
3704 else if (softnobrowse || wasunresponsive || !unresponsive)
3705 do_vfs_signal = 0;
3706 else
3707 do_vfs_signal = VQ_NOTRESP;
3708 if (do_vfs_signal)
3709 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, do_vfs_signal, 0);
3710
3711 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
3712 }
3713
3714 void
3715 nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
3716 {
3717 int timeoutmask, wasunresponsive, unresponsive, softnobrowse;
3718 int do_vfs_signal;
3719
3720 if (nmp == NULL)
3721 return;
3722
3723 if (msg)
3724 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
3725
3726 lck_mtx_lock(&nmp->nm_lock);
3727
3728 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
3729 if (nmp->nm_flag & NFSMNT_MUTEJUKEBOX) /* jukebox timeouts don't count as unresponsive if muted */
3730 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
3731 wasunresponsive = (nmp->nm_state & timeoutmask);
3732
3733 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
3734 softnobrowse = ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
3735
3736 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO))
3737 nmp->nm_state &= ~NFSSTA_TIMEO;
3738 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO))
3739 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
3740 if ((flags & NFSSTA_JUKEBOXTIMEO) && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO))
3741 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO;
3742
3743 unresponsive = (nmp->nm_state & timeoutmask);
3744
3745 if (nmp->nm_deadto_start)
3746 nmp->nm_deadto_start = 0;
3747 lck_mtx_unlock(&nmp->nm_lock);
3748
3749 if (softnobrowse)
3750 do_vfs_signal = 0;
3751 else
3752 do_vfs_signal = (wasunresponsive && !unresponsive);
3753 if (do_vfs_signal)
3754 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
3755 }
3756
3757
3758 #endif /* NFSCLIENT */
3759
3760 #if NFSSERVER
3761
3762 /*
3763 * Generate the rpc reply header
3764 * siz arg. is used to decide if adding a cluster is worthwhile
3765 */
3766 int
3767 nfsrv_rephead(
3768 struct nfsrv_descript *nd,
3769 __unused struct nfsrv_sock *slp,
3770 struct nfsm_chain *nmrepp,
3771 size_t siz)
3772 {
3773 mbuf_t mrep;
3774 u_int32_t *tl;
3775 struct nfsm_chain nmrep;
3776 int err, error;
3777
3778 err = nd->nd_repstat;
3779 if (err && (nd->nd_vers == NFS_VER2))
3780 siz = 0;
3781
3782 /*
3783 * If this is a big reply, use a cluster else
3784 * try and leave leading space for the lower level headers.
3785 */
3786 siz += RPC_REPLYSIZ;
3787 if (siz >= nfs_mbuf_minclsize) {
3788 error = mbuf_getpacket(MBUF_WAITOK, &mrep);
3789 } else {
3790 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
3791 }
3792 if (error) {
3793 /* unable to allocate packet */
3794 /* XXX should we keep statistics for these errors? */
3795 return (error);
3796 }
3797 if (siz < nfs_mbuf_minclsize) {
3798 /* leave space for lower level headers */
3799 tl = mbuf_data(mrep);
3800 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */
3801 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
3802 }
3803 nfsm_chain_init(&nmrep, mrep);
3804 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
3805 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
3806 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
3807 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
3808 if (err & NFSERR_AUTHERR) {
3809 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
3810 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
3811 } else {
3812 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
3813 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
3814 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
3815 }
3816 } else {
3817 /* reply status */
3818 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
3819 if (nd->nd_gss_context != NULL) {
3820 /* RPCSEC_GSS verifier */
3821 error = nfs_gss_svc_verf_put(nd, &nmrep);
3822 if (error) {
3823 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
3824 goto done;
3825 }
3826 } else {
3827 /* RPCAUTH_NULL verifier */
3828 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
3829 nfsm_chain_add_32(error, &nmrep, 0);
3830 }
3831 /* accepted status */
3832 switch (err) {
3833 case EPROGUNAVAIL:
3834 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
3835 break;
3836 case EPROGMISMATCH:
3837 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
3838 /* XXX hard coded versions? */
3839 nfsm_chain_add_32(error, &nmrep, NFS_VER2);
3840 nfsm_chain_add_32(error, &nmrep, NFS_VER3);
3841 break;
3842 case EPROCUNAVAIL:
3843 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
3844 break;
3845 case EBADRPC:
3846 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
3847 break;
3848 default:
3849 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
3850 if (nd->nd_gss_context != NULL)
3851 error = nfs_gss_svc_prepare_reply(nd, &nmrep);
3852 if (err != NFSERR_RETVOID)
3853 nfsm_chain_add_32(error, &nmrep,
3854 (err ? nfsrv_errmap(nd, err) : 0));
3855 break;
3856 }
3857 }
3858
3859 done:
3860 nfsm_chain_build_done(error, &nmrep);
3861 if (error) {
3862 /* error composing reply header */
3863 /* XXX should we keep statistics for these errors? */
3864 mbuf_freem(mrep);
3865 return (error);
3866 }
3867
3868 *nmrepp = nmrep;
3869 if ((err != 0) && (err != NFSERR_RETVOID))
3870 OSAddAtomic(1, &nfsstats.srvrpc_errs);
3871 return (0);
3872 }
3873
3874 /*
3875 * The nfs server send routine.
3876 *
3877 * - return EINTR or ERESTART if interrupted by a signal
3878 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
3879 * - do any cleanup required by recoverable socket errors (???)
3880 */
3881 int
3882 nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
3883 {
3884 int error;
3885 socket_t so = slp->ns_so;
3886 struct sockaddr *sendnam;
3887 struct msghdr msg;
3888
3889 bzero(&msg, sizeof(msg));
3890 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
3891 if ((sendnam = mbuf_data(nam))) {
3892 msg.msg_name = (caddr_t)sendnam;
3893 msg.msg_namelen = sendnam->sa_len;
3894 }
3895 }
3896 error = sock_sendmbuf(so, &msg, top, 0, NULL);
3897 if (!error)
3898 return (0);
3899 log(LOG_INFO, "nfsd send error %d\n", error);
3900
3901 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM))
3902 error = EPIPE; /* zap TCP sockets if they time out on send */
3903
3904 /* Handle any recoverable (soft) socket errors here. (???) */
3905 if (error != EINTR && error != ERESTART && error != EIO &&
3906 error != EWOULDBLOCK && error != EPIPE)
3907 error = 0;
3908
3909 return (error);
3910 }
3911
3912 /*
3913 * Socket upcall routine for the nfsd sockets.
3914 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
3915 * Essentially do as much as possible non-blocking, else punt and it will
3916 * be called with MBUF_WAITOK from an nfsd.
3917 */
3918 void
3919 nfsrv_rcv(socket_t so, caddr_t arg, int waitflag)
3920 {
3921 struct nfsrv_sock *slp = (struct nfsrv_sock *)arg;
3922
3923 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID))
3924 return;
3925
3926 lck_rw_lock_exclusive(&slp->ns_rwlock);
3927 nfsrv_rcv_locked(so, slp, waitflag);
3928 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
3929 }
3930 void
3931 nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
3932 {
3933 mbuf_t m, mp, mhck, m2;
3934 int ns_flag=0, error;
3935 struct msghdr msg;
3936 size_t bytes_read;
3937
3938 if ((slp->ns_flag & SLP_VALID) == 0) {
3939 if (waitflag == MBUF_DONTWAIT)
3940 lck_rw_done(&slp->ns_rwlock);
3941 return;
3942 }
3943
3944 #ifdef notdef
3945 /*
3946 * Define this to test for nfsds handling this under heavy load.
3947 */
3948 if (waitflag == MBUF_DONTWAIT) {
3949 ns_flag = SLP_NEEDQ;
3950 goto dorecs;
3951 }
3952 #endif
3953 if (slp->ns_sotype == SOCK_STREAM) {
3954 /*
3955 * If there are already records on the queue, defer soreceive()
3956 * to an(other) nfsd so that there is feedback to the TCP layer that
3957 * the nfs servers are heavily loaded.
3958 */
3959 if (slp->ns_rec) {
3960 ns_flag = SLP_NEEDQ;
3961 goto dorecs;
3962 }
3963
3964 /*
3965 * Do soreceive().
3966 */
3967 bytes_read = 1000000000;
3968 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
3969 if (error || mp == NULL) {
3970 if (error == EWOULDBLOCK)
3971 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
3972 else
3973 ns_flag = SLP_DISCONN;
3974 goto dorecs;
3975 }
3976 m = mp;
3977 if (slp->ns_rawend) {
3978 if ((error = mbuf_setnext(slp->ns_rawend, m)))
3979 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
3980 slp->ns_cc += bytes_read;
3981 } else {
3982 slp->ns_raw = m;
3983 slp->ns_cc = bytes_read;
3984 }
3985 while ((m2 = mbuf_next(m)))
3986 m = m2;
3987 slp->ns_rawend = m;
3988
3989 /*
3990 * Now try and parse record(s) out of the raw stream data.
3991 */
3992 error = nfsrv_getstream(slp, waitflag);
3993 if (error) {
3994 if (error == EPERM)
3995 ns_flag = SLP_DISCONN;
3996 else
3997 ns_flag = SLP_NEEDQ;
3998 }
3999 } else {
4000 struct sockaddr_storage nam;
4001
4002 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
4003 /* already have max # RPC records queued on this socket */
4004 ns_flag = SLP_NEEDQ;
4005 goto dorecs;
4006 }
4007
4008 bzero(&msg, sizeof(msg));
4009 msg.msg_name = (caddr_t)&nam;
4010 msg.msg_namelen = sizeof(nam);
4011
4012 do {
4013 bytes_read = 1000000000;
4014 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
4015 if (mp) {
4016 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
4017 mbuf_setlen(mhck, nam.ss_len);
4018 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
4019 m = mhck;
4020 if (mbuf_setnext(m, mp)) {
4021 /* trouble... just drop it */
4022 printf("nfsrv_rcv: mbuf_setnext failed\n");
4023 mbuf_free(mhck);
4024 m = mp;
4025 }
4026 } else {
4027 m = mp;
4028 }
4029 if (slp->ns_recend)
4030 mbuf_setnextpkt(slp->ns_recend, m);
4031 else {
4032 slp->ns_rec = m;
4033 slp->ns_flag |= SLP_DOREC;
4034 }
4035 slp->ns_recend = m;
4036 mbuf_setnextpkt(m, NULL);
4037 slp->ns_reccnt++;
4038 }
4039 } while (mp);
4040 }
4041
4042 /*
4043 * Now try and process the request records, non-blocking.
4044 */
4045 dorecs:
4046 if (ns_flag)
4047 slp->ns_flag |= ns_flag;
4048 if (waitflag == MBUF_DONTWAIT) {
4049 int wake = (slp->ns_flag & SLP_WORKTODO);
4050 lck_rw_done(&slp->ns_rwlock);
4051 if (wake && nfsd_thread_count) {
4052 lck_mtx_lock(nfsd_mutex);
4053 nfsrv_wakenfsd(slp);
4054 lck_mtx_unlock(nfsd_mutex);
4055 }
4056 }
4057 }
4058
4059 /*
4060 * Try and extract an RPC request from the mbuf data list received on a
4061 * stream socket. The "waitflag" argument indicates whether or not it
4062 * can sleep.
4063 */
4064 int
4065 nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
4066 {
4067 mbuf_t m;
4068 char *cp1, *cp2, *mdata;
4069 int len, mlen, error;
4070 mbuf_t om, m2, recm;
4071 u_int32_t recmark;
4072
4073 if (slp->ns_flag & SLP_GETSTREAM)
4074 panic("nfs getstream");
4075 slp->ns_flag |= SLP_GETSTREAM;
4076 for (;;) {
4077 if (slp->ns_reclen == 0) {
4078 if (slp->ns_cc < NFSX_UNSIGNED) {
4079 slp->ns_flag &= ~SLP_GETSTREAM;
4080 return (0);
4081 }
4082 m = slp->ns_raw;
4083 mdata = mbuf_data(m);
4084 mlen = mbuf_len(m);
4085 if (mlen >= NFSX_UNSIGNED) {
4086 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
4087 mdata += NFSX_UNSIGNED;
4088 mlen -= NFSX_UNSIGNED;
4089 mbuf_setdata(m, mdata, mlen);
4090 } else {
4091 cp1 = (caddr_t)&recmark;
4092 cp2 = mdata;
4093 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
4094 while (mlen == 0) {
4095 m = mbuf_next(m);
4096 cp2 = mbuf_data(m);
4097 mlen = mbuf_len(m);
4098 }
4099 *cp1++ = *cp2++;
4100 mlen--;
4101 mbuf_setdata(m, cp2, mlen);
4102 }
4103 }
4104 slp->ns_cc -= NFSX_UNSIGNED;
4105 recmark = ntohl(recmark);
4106 slp->ns_reclen = recmark & ~0x80000000;
4107 if (recmark & 0x80000000)
4108 slp->ns_flag |= SLP_LASTFRAG;
4109 else
4110 slp->ns_flag &= ~SLP_LASTFRAG;
4111 if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) {
4112 slp->ns_flag &= ~SLP_GETSTREAM;
4113 return (EPERM);
4114 }
4115 }
4116
4117 /*
4118 * Now get the record part.
4119 *
4120 * Note that slp->ns_reclen may be 0. Linux sometimes
4121 * generates 0-length RPCs
4122 */
4123 recm = NULL;
4124 if (slp->ns_cc == slp->ns_reclen) {
4125 recm = slp->ns_raw;
4126 slp->ns_raw = slp->ns_rawend = NULL;
4127 slp->ns_cc = slp->ns_reclen = 0;
4128 } else if (slp->ns_cc > slp->ns_reclen) {
4129 len = 0;
4130 m = slp->ns_raw;
4131 mlen = mbuf_len(m);
4132 mdata = mbuf_data(m);
4133 om = NULL;
4134 while (len < slp->ns_reclen) {
4135 if ((len + mlen) > slp->ns_reclen) {
4136 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
4137 slp->ns_flag &= ~SLP_GETSTREAM;
4138 return (EWOULDBLOCK);
4139 }
4140 if (om) {
4141 if (mbuf_setnext(om, m2)) {
4142 /* trouble... just drop it */
4143 printf("nfsrv_getstream: mbuf_setnext failed\n");
4144 mbuf_freem(m2);
4145 slp->ns_flag &= ~SLP_GETSTREAM;
4146 return (EWOULDBLOCK);
4147 }
4148 recm = slp->ns_raw;
4149 } else {
4150 recm = m2;
4151 }
4152 mdata += slp->ns_reclen - len;
4153 mlen -= slp->ns_reclen - len;
4154 mbuf_setdata(m, mdata, mlen);
4155 len = slp->ns_reclen;
4156 } else if ((len + mlen) == slp->ns_reclen) {
4157 om = m;
4158 len += mlen;
4159 m = mbuf_next(m);
4160 recm = slp->ns_raw;
4161 if (mbuf_setnext(om, NULL)) {
4162 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
4163 slp->ns_flag &= ~SLP_GETSTREAM;
4164 return (EWOULDBLOCK);
4165 }
4166 mlen = mbuf_len(m);
4167 mdata = mbuf_data(m);
4168 } else {
4169 om = m;
4170 len += mlen;
4171 m = mbuf_next(m);
4172 mlen = mbuf_len(m);
4173 mdata = mbuf_data(m);
4174 }
4175 }
4176 slp->ns_raw = m;
4177 slp->ns_cc -= len;
4178 slp->ns_reclen = 0;
4179 } else {
4180 slp->ns_flag &= ~SLP_GETSTREAM;
4181 return (0);
4182 }
4183
4184 /*
4185 * Accumulate the fragments into a record.
4186 */
4187 if (slp->ns_frag == NULL) {
4188 slp->ns_frag = recm;
4189 } else {
4190 m = slp->ns_frag;
4191 while ((m2 = mbuf_next(m)))
4192 m = m2;
4193 if ((error = mbuf_setnext(m, recm)))
4194 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
4195 }
4196 if (slp->ns_flag & SLP_LASTFRAG) {
4197 if (slp->ns_recend)
4198 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
4199 else {
4200 slp->ns_rec = slp->ns_frag;
4201 slp->ns_flag |= SLP_DOREC;
4202 }
4203 slp->ns_recend = slp->ns_frag;
4204 slp->ns_frag = NULL;
4205 }
4206 }
4207 }
4208
4209 /*
4210 * Parse an RPC header.
4211 */
4212 int
4213 nfsrv_dorec(
4214 struct nfsrv_sock *slp,
4215 struct nfsd *nfsd,
4216 struct nfsrv_descript **ndp)
4217 {
4218 mbuf_t m;
4219 mbuf_t nam;
4220 struct nfsrv_descript *nd;
4221 int error = 0;
4222
4223 *ndp = NULL;
4224 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL))
4225 return (ENOBUFS);
4226 MALLOC_ZONE(nd, struct nfsrv_descript *,
4227 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
4228 if (!nd)
4229 return (ENOMEM);
4230 m = slp->ns_rec;
4231 slp->ns_rec = mbuf_nextpkt(m);
4232 if (slp->ns_rec)
4233 mbuf_setnextpkt(m, NULL);
4234 else {
4235 slp->ns_flag &= ~SLP_DOREC;
4236 slp->ns_recend = NULL;
4237 }
4238 slp->ns_reccnt--;
4239 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
4240 nam = m;
4241 m = mbuf_next(m);
4242 if ((error = mbuf_setnext(nam, NULL)))
4243 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
4244 } else
4245 nam = NULL;
4246 nd->nd_nam2 = nam;
4247 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
4248 if (!error)
4249 error = nfsrv_getreq(nd);
4250 if (error) {
4251 if (nam)
4252 mbuf_freem(nam);
4253 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
4254 return (error);
4255 }
4256 nd->nd_mrep = NULL;
4257 *ndp = nd;
4258 nfsd->nfsd_nd = nd;
4259 return (0);
4260 }
4261
4262 /*
4263 * Parse an RPC request
4264 * - verify it
4265 * - fill in the cred struct.
4266 */
4267 int
4268 nfsrv_getreq(struct nfsrv_descript *nd)
4269 {
4270 struct nfsm_chain *nmreq;
4271 int len, i;
4272 u_int32_t nfsvers, auth_type;
4273 int error = 0;
4274 uid_t user_id;
4275 gid_t group_id;
4276 int ngroups;
4277 struct ucred temp_cred;
4278 uint32_t val;
4279
4280 nd->nd_cr = NULL;
4281 nd->nd_gss_context = NULL;
4282 nd->nd_gss_seqnum = 0;
4283 nd->nd_gss_mb = NULL;
4284
4285 user_id = group_id = -2;
4286 val = auth_type = len = 0;
4287
4288 nmreq = &nd->nd_nmreq;
4289 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
4290 nfsm_chain_get_32(error, nmreq, val); // RPC Call
4291 if (!error && (val != RPC_CALL))
4292 error = EBADRPC;
4293 nfsmout_if(error);
4294 nd->nd_repstat = 0;
4295 nfsm_chain_get_32(error, nmreq, val); // RPC Version
4296 nfsmout_if(error);
4297 if (val != RPC_VER2) {
4298 nd->nd_repstat = ERPCMISMATCH;
4299 nd->nd_procnum = NFSPROC_NOOP;
4300 return (0);
4301 }
4302 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number
4303 nfsmout_if(error);
4304 if (val != NFS_PROG) {
4305 nd->nd_repstat = EPROGUNAVAIL;
4306 nd->nd_procnum = NFSPROC_NOOP;
4307 return (0);
4308 }
4309 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
4310 nfsmout_if(error);
4311 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
4312 nd->nd_repstat = EPROGMISMATCH;
4313 nd->nd_procnum = NFSPROC_NOOP;
4314 return (0);
4315 }
4316 nd->nd_vers = nfsvers;
4317 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
4318 nfsmout_if(error);
4319 if ((nd->nd_procnum >= NFS_NPROCS) ||
4320 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
4321 nd->nd_repstat = EPROCUNAVAIL;
4322 nd->nd_procnum = NFSPROC_NOOP;
4323 return (0);
4324 }
4325 if (nfsvers != NFS_VER3)
4326 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
4327 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor
4328 nfsm_chain_get_32(error, nmreq, len); // Auth Length
4329 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ))
4330 error = EBADRPC;
4331 nfsmout_if(error);
4332
4333 /* Handle authentication */
4334 if (auth_type == RPCAUTH_UNIX) {
4335 if (nd->nd_procnum == NFSPROC_NULL)
4336 return (0);
4337 nd->nd_sec = RPCAUTH_UNIX;
4338 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp
4339 nfsm_chain_get_32(error, nmreq, len); // hostname length
4340 if (len < 0 || len > NFS_MAXNAMLEN)
4341 error = EBADRPC;
4342 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname
4343 nfsmout_if(error);
4344
4345 /* create a temporary credential using the bits from the wire */
4346 bzero(&temp_cred, sizeof(temp_cred));
4347 nfsm_chain_get_32(error, nmreq, user_id);
4348 nfsm_chain_get_32(error, nmreq, group_id);
4349 temp_cred.cr_groups[0] = group_id;
4350 nfsm_chain_get_32(error, nmreq, len); // extra GID count
4351 if ((len < 0) || (len > RPCAUTH_UNIXGIDS))
4352 error = EBADRPC;
4353 nfsmout_if(error);
4354 for (i = 1; i <= len; i++)
4355 if (i < NGROUPS)
4356 nfsm_chain_get_32(error, nmreq, temp_cred.cr_groups[i]);
4357 else
4358 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
4359 nfsmout_if(error);
4360 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
4361 if (ngroups > 1)
4362 nfsrv_group_sort(&temp_cred.cr_groups[0], ngroups);
4363 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
4364 nfsm_chain_get_32(error, nmreq, len); // verifier length
4365 if (len < 0 || len > RPCAUTH_MAXSIZ)
4366 error = EBADRPC;
4367 if (len > 0)
4368 nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
4369
4370 /* request creation of a real credential */
4371 temp_cred.cr_uid = user_id;
4372 temp_cred.cr_ngroups = ngroups;
4373 nd->nd_cr = kauth_cred_create(&temp_cred);
4374 if (nd->nd_cr == NULL) {
4375 nd->nd_repstat = ENOMEM;
4376 nd->nd_procnum = NFSPROC_NOOP;
4377 return (0);
4378 }
4379 } else if (auth_type == RPCSEC_GSS) {
4380 error = nfs_gss_svc_cred_get(nd, nmreq);
4381 if (error) {
4382 if (error == EINVAL)
4383 goto nfsmout; // drop the request
4384 nd->nd_repstat = error;
4385 nd->nd_procnum = NFSPROC_NOOP;
4386 return (0);
4387 }
4388 } else {
4389 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE
4390 return (0);
4391 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
4392 nd->nd_procnum = NFSPROC_NOOP;
4393 return (0);
4394 }
4395 return (0);
4396 nfsmout:
4397 if (IS_VALID_CRED(nd->nd_cr))
4398 kauth_cred_unref(&nd->nd_cr);
4399 nfsm_chain_cleanup(nmreq);
4400 return (error);
4401 }
4402
4403 /*
4404 * Search for a sleeping nfsd and wake it up.
4405 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
4406 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
4407 * Note: Must be called with nfsd_mutex held.
4408 */
4409 void
4410 nfsrv_wakenfsd(struct nfsrv_sock *slp)
4411 {
4412 struct nfsd *nd;
4413
4414 if ((slp->ns_flag & SLP_VALID) == 0)
4415 return;
4416
4417 lck_rw_lock_exclusive(&slp->ns_rwlock);
4418 /* if there's work to do on this socket, make sure it's queued up */
4419 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
4420 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
4421 slp->ns_flag |= SLP_WAITQ;
4422 }
4423 lck_rw_done(&slp->ns_rwlock);
4424
4425 /* wake up a waiting nfsd, if possible */
4426 nd = TAILQ_FIRST(&nfsd_queue);
4427 if (!nd)
4428 return;
4429
4430 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
4431 nd->nfsd_flag &= ~NFSD_WAITING;
4432 wakeup(nd);
4433 }
4434
4435 #endif /* NFSSERVER */
4436