]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
xnu-1228.15.4.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66 */
67
68 /*
69 * Socket operations for use by nfs
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/proc.h>
75 #include <sys/kauth.h>
76 #include <sys/mount_internal.h>
77 #include <sys/kernel.h>
78 #include <sys/kpi_mbuf.h>
79 #include <sys/malloc.h>
80 #include <sys/vnode.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/socket.h>
84 #include <sys/syslog.h>
85 #include <sys/tprintf.h>
86 #include <sys/uio_internal.h>
87 #include <libkern/OSAtomic.h>
88
89 #include <sys/time.h>
90 #include <kern/clock.h>
91 #include <kern/task.h>
92 #include <kern/thread.h>
93 #include <kern/thread_call.h>
94 #include <sys/user.h>
95
96 #include <netinet/in.h>
97 #include <netinet/tcp.h>
98
99 #include <nfs/rpcv2.h>
100 #include <nfs/nfsproto.h>
101 #include <nfs/nfs.h>
102 #include <nfs/xdr_subs.h>
103 #include <nfs/nfsm_subs.h>
104 #include <nfs/nfs_gss.h>
105 #include <nfs/nfsmount.h>
106 #include <nfs/nfsnode.h>
107
108 /* XXX */
109 boolean_t current_thread_aborted(void);
110 kern_return_t thread_terminate(thread_t);
111
112
113 #if NFSSERVER
114 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
115
116 static int nfsrv_getstream(struct nfsrv_sock *,int);
117 static int nfsrv_getreq(struct nfsrv_descript *);
118 extern int nfsv3_procid[NFS_NPROCS];
119 #endif /* NFSSERVER */
120
121 #if NFSCLIENT
122
123 static int nfs_connect_setup(struct nfsmount *);
124 static void nfs_reqdequeue(struct nfsreq *);
125 static void nfs_udp_rcv(socket_t, void*, int);
126 static void nfs_tcp_rcv(socket_t, void*, int);
127 static void nfs_request_match_reply(struct nfsmount *, mbuf_t);
128 static void nfs_softterm(struct nfsreq *);
129
130 #ifdef NFS_SOCKET_DEBUGGING
131 #define NFS_SOCK_DBG(X) printf X
132 #else
133 #define NFS_SOCK_DBG(X)
134 #endif
135
136 /*
137 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
138 * Use the mean and mean deviation of rtt for the appropriate type of rpc
139 * for the frequent rpcs and a default for the others.
140 * The justification for doing "other" this way is that these rpcs
141 * happen so infrequently that timer est. would probably be stale.
142 * Also, since many of these rpcs are
143 * non-idempotent, a conservative timeout is desired.
144 * getattr, lookup - A+2D
145 * read, write - A+4D
146 * other - nm_timeo
147 */
148 #define NFS_RTO(n, t) \
149 ((t) == 0 ? (n)->nm_timeo : \
150 ((t) < 3 ? \
151 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
152 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
153 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
154 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
155
156 /*
157 * Defines which timer to use for the procnum.
158 * 0 - default
159 * 1 - getattr
160 * 2 - lookup
161 * 3 - read
162 * 4 - write
163 */
164 static int proct[NFS_NPROCS] = {
165 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
166 };
167
168 /*
169 * There is a congestion window for outstanding rpcs maintained per mount
170 * point. The cwnd size is adjusted in roughly the way that:
171 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
172 * SIGCOMM '88". ACM, August 1988.
173 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
174 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
175 * of rpcs is in progress.
176 * (The sent count and cwnd are scaled for integer arith.)
177 * Variants of "slow start" were tried and were found to be too much of a
178 * performance hit (ave. rtt 3 times larger),
179 * I suspect due to the large rtt that nfs rpcs have.
180 */
181 #define NFS_CWNDSCALE 256
182 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
183 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
184
185 /*
186 * Initialize socket state and perform setup for a new NFS connection.
187 */
188 int
189 nfs_connect(struct nfsmount *nmp)
190 {
191 socket_t so;
192 int error, on = 1, proto;
193 sock_upcall upcall;
194 struct sockaddr *saddr;
195 struct sockaddr_in sin;
196 struct timeval timeo;
197 u_short tport;
198
199 lck_mtx_lock(&nmp->nm_lock);
200 nmp->nm_sockflags |= NMSOCK_CONNECTING;
201 saddr = mbuf_data(nmp->nm_nam);
202 upcall = (nmp->nm_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
203 lck_mtx_unlock(&nmp->nm_lock);
204 error = sock_socket(saddr->sa_family, nmp->nm_sotype,
205 nmp->nm_soproto, upcall, nmp, &nmp->nm_so);
206 if (error)
207 goto bad;
208 lck_mtx_lock(&nmp->nm_lock);
209 so = nmp->nm_so;
210
211 /*
212 * Some servers require that the client port be a reserved port number.
213 */
214 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
215 lck_mtx_unlock(&nmp->nm_lock);
216 sin.sin_len = sizeof (struct sockaddr_in);
217 sin.sin_family = AF_INET;
218 sin.sin_addr.s_addr = INADDR_ANY;
219 tport = IPPORT_RESERVED - 1;
220 sin.sin_port = htons(tport);
221 while (((error = sock_bind(so, (struct sockaddr *) &sin)) == EADDRINUSE) &&
222 (--tport > IPPORT_RESERVED / 2))
223 sin.sin_port = htons(tport);
224 if (error)
225 goto bad;
226 lck_mtx_lock(&nmp->nm_lock);
227 }
228
229 /*
230 * Protocols that do not require connections may be optionally left
231 * unconnected for servers that reply from a different address/port.
232 */
233 if (nmp->nm_flag & NFSMNT_NOCONN) {
234 if (nmp->nm_sotype == SOCK_STREAM) {
235 error = ENOTCONN;
236 lck_mtx_unlock(&nmp->nm_lock);
237 goto bad;
238 }
239 } else {
240 int tocnt = 0, optlen = sizeof(error);
241 struct timespec ts = { 2, 0 };
242
243 lck_mtx_unlock(&nmp->nm_lock);
244 error = sock_connect(so, mbuf_data(nmp->nm_nam), MSG_DONTWAIT);
245 if (error && (error != EINPROGRESS))
246 goto bad;
247 lck_mtx_lock(&nmp->nm_lock);
248 while (!sock_isconnected(so)) {
249 if (tocnt++ == 15) /* log a warning if connect is taking a while */
250 log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n",
251 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
252 /* check for error on socket */
253 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &error, &optlen);
254 if (error) {
255 log(LOG_INFO, "nfs_connect: socket error %d for %s\n",
256 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
257 break;
258 }
259 if (tocnt > 60) {
260 /* abort if this is taking too long */
261 error = ENOTCONN;
262 break;
263 }
264 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
265 break;
266 msleep(&nmp->nm_so, &nmp->nm_lock, PSOCK, "nfs_socket_connect", &ts);
267 }
268 if (tocnt > 15)
269 log(LOG_INFO, "nfs_connect: socket connect %s for %s\n",
270 error ? "aborted" : "completed",
271 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
272 if (error) {
273 lck_mtx_unlock(&nmp->nm_lock);
274 goto bad;
275 }
276 }
277
278 /*
279 * Set socket send/receive timeouts
280 * - Receive timeout shouldn't matter because all receives are performed
281 * in the socket upcall non-blocking.
282 * - Send timeout should allow us to react to a blocked socket.
283 * Soft mounts will want to abort sooner.
284 */
285 timeo.tv_usec = 0;
286 timeo.tv_sec = (nmp->nm_flag & NFSMNT_SOFT) ? 10 : 60;
287 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
288 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
289 if (error) {
290 log(LOG_INFO, "nfs_connect: socket timeout setting errors for %s\n",
291 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
292 error = 0;
293 }
294
295 if (nmp->nm_sotype == SOCK_STREAM) {
296 /* Assume that SOCK_STREAM always requires a connection */
297 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
298 /* set nodelay for TCP */
299 sock_gettype(so, NULL, NULL, &proto);
300 if (proto == IPPROTO_TCP)
301 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
302 }
303
304 if (nmp->nm_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
305 int reserve = NFS_UDPSOCKBUF;
306 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
307 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
308 if (error) {
309 log(LOG_INFO, "nfs_connect: socket buffer setting errors for %s\n",
310 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
311 error = 0;
312 }
313 }
314
315 /* set SO_NOADDRERR to detect network changes ASAP */
316 error = sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
317 if (error) {
318 lck_mtx_unlock(&nmp->nm_lock);
319 goto bad;
320 }
321 /* just playin' it safe */
322 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
323
324 if (!(nmp->nm_flag & NFSMNT_INT))
325 sock_nointerrupt(so, 1);
326
327 /* Initialize socket state variables */
328 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
329 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
330 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
331 nmp->nm_sdrtt[3] = 0;
332 if (nmp->nm_sotype == SOCK_DGRAM) {
333 /* XXX do we really want to reset this on each reconnect? */
334 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
335 nmp->nm_sent = 0;
336 } else if (nmp->nm_sotype == SOCK_STREAM) {
337 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
338 nmp->nm_fragleft = nmp->nm_reclen = 0;
339 nmp->nm_timeouts = 0;
340 }
341 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
342 nmp->nm_sockflags |= NMSOCK_SETUP;
343 FSDBG(529, nmp, nmp->nm_state, nmp->nm_flag, nmp->nm_cwnd);
344 lck_mtx_unlock(&nmp->nm_lock);
345 error = nfs_connect_setup(nmp);
346 bad:
347 lck_mtx_lock(&nmp->nm_lock);
348 nmp->nm_sockflags &= ~(NMSOCK_CONNECTING|NMSOCK_SETUP);
349 if (!error) {
350 nmp->nm_sockflags |= NMSOCK_READY;
351 wakeup(&nmp->nm_sockflags);
352 }
353 lck_mtx_unlock(&nmp->nm_lock);
354 if (error)
355 nfs_disconnect(nmp);
356 return (error);
357 }
358
359 /* setup & confirm socket connection is functional */
360 static int
361 nfs_connect_setup(struct nfsmount *nmp)
362 {
363 struct nfsm_chain nmreq, nmrep;
364 int error = 0, status;
365 u_int64_t xid;
366
367 if (nmp->nm_vers >= NFS_VER4) {
368 error = nfs4_setclientid(nmp);
369 } else {
370 /* verify connection's OK by sending a NULL request */
371 nfsm_chain_null(&nmreq);
372 nfsm_chain_null(&nmrep);
373 nfsm_chain_build_alloc_init(error, &nmreq, 0);
374 nfsm_chain_build_done(error, &nmreq);
375 nfsmout_if(error);
376 error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC_NULL,
377 current_thread(), NULL, R_SETUP, &nmrep, &xid, &status);
378 if (!error)
379 error = status;
380 nfsmout:
381 nfsm_chain_cleanup(&nmreq);
382 nfsm_chain_cleanup(&nmrep);
383 }
384 return (error);
385 }
386
387 /*
388 * NFS socket reconnect routine:
389 * Called when a connection is broken.
390 * - disconnect the old socket
391 * - nfs_connect() again
392 * - set R_MUSTRESEND for all outstanding requests on mount point
393 * If this fails the mount point is DEAD!
394 */
395 static int
396 nfs_reconnect(struct nfsmount *nmp)
397 {
398 struct nfsreq *rq;
399 struct timeval now;
400 thread_t thd = current_thread();
401 int error, lastmsg, wentdown = 0;
402
403 microuptime(&now);
404 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay);
405
406 nfs_disconnect(nmp);
407
408 while ((error = nfs_connect(nmp))) {
409 if (error == EINTR || error == ERESTART)
410 return (EINTR);
411 if (error == EIO)
412 return (EIO);
413 microuptime(&now);
414 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) {
415 lastmsg = now.tv_sec;
416 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect");
417 wentdown = 1;
418 }
419 lck_mtx_lock(&nmp->nm_lock);
420 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
421 /* we're not yet completely mounted and */
422 /* we can't reconnect, so we fail */
423 lck_mtx_unlock(&nmp->nm_lock);
424 return (error);
425 }
426 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) {
427 lck_mtx_unlock(&nmp->nm_lock);
428 return (error);
429 }
430 lck_mtx_unlock(&nmp->nm_lock);
431 tsleep(&lbolt, PSOCK, "nfs_reconnect_delay", 0);
432 if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
433 return (error);
434 }
435
436 if (wentdown)
437 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected");
438
439 /*
440 * Loop through outstanding request list and mark all requests
441 * as needing a resend. (Though nfs_need_reconnect() probably
442 * marked them all already.)
443 */
444 lck_mtx_lock(nfs_request_mutex);
445 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
446 if (rq->r_nmp == nmp) {
447 lck_mtx_lock(&rq->r_mtx);
448 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
449 rq->r_flags |= R_MUSTRESEND;
450 rq->r_rtt = -1;
451 wakeup(rq);
452 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
453 nfs_asyncio_resend(rq);
454 }
455 lck_mtx_unlock(&rq->r_mtx);
456 }
457 }
458 lck_mtx_unlock(nfs_request_mutex);
459 return (0);
460 }
461
462 /*
463 * NFS disconnect. Clean up and unlink.
464 */
465 void
466 nfs_disconnect(struct nfsmount *nmp)
467 {
468 socket_t so;
469
470 lck_mtx_lock(&nmp->nm_lock);
471 if ((nmp->nm_sotype == SOCK_STREAM) && nmp->nm_m) {
472 mbuf_freem(nmp->nm_m);
473 nmp->nm_m = nmp->nm_mlast = NULL;
474 }
475 if (nmp->nm_so) {
476 so = nmp->nm_so;
477 nmp->nm_so = NULL;
478 lck_mtx_unlock(&nmp->nm_lock);
479 sock_shutdown(so, SHUT_RDWR);
480 sock_close(so);
481 } else {
482 lck_mtx_unlock(&nmp->nm_lock);
483 }
484 }
485
486 /*
487 * mark an NFS mount as needing a reconnect/resends.
488 */
489 static void
490 nfs_need_reconnect(struct nfsmount *nmp)
491 {
492 struct nfsreq *rq;
493
494 lck_mtx_lock(&nmp->nm_lock);
495 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP);
496 lck_mtx_unlock(&nmp->nm_lock);
497
498 /*
499 * Loop through outstanding request list and
500 * mark all requests as needing a resend.
501 */
502 lck_mtx_lock(nfs_request_mutex);
503 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
504 if (rq->r_nmp == nmp) {
505 lck_mtx_lock(&rq->r_mtx);
506 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
507 rq->r_flags |= R_MUSTRESEND;
508 rq->r_rtt = -1;
509 wakeup(rq);
510 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
511 nfs_asyncio_resend(rq);
512 }
513 lck_mtx_unlock(&rq->r_mtx);
514 }
515 }
516 lck_mtx_unlock(nfs_request_mutex);
517 }
518
519 /*
520 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
521 */
522 static void
523 nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
524 {
525 struct nfsmount *nmp = arg;
526 struct timespec ts = { 30, 0 };
527 thread_t thd = current_thread();
528 struct nfsreq *req;
529 struct timeval now;
530 int error, dofinish, force;
531
532 lck_mtx_lock(&nmp->nm_lock);
533
534 while (!(nmp->nm_sockflags & NMSOCK_READY) || !TAILQ_EMPTY(&nmp->nm_resendq)) {
535 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
536 break;
537 force = (nmp->nm_state & NFSSTA_FORCE);
538 /* do reconnect, if necessary */
539 if (!(nmp->nm_sockflags & NMSOCK_READY) && !force) {
540 if (nmp->nm_reconnect_start <= 0) {
541 microuptime(&now);
542 nmp->nm_reconnect_start = now.tv_sec;
543 }
544 lck_mtx_unlock(&nmp->nm_lock);
545 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname));
546 if ((error = nfs_reconnect(nmp)))
547 printf("nfs_reconnect failed %d for %s\n", error,
548 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
549 else
550 nmp->nm_reconnect_start = 0;
551 lck_mtx_lock(&nmp->nm_lock);
552 }
553 /* do resends, if necessary/possible */
554 while (((nmp->nm_sockflags & NMSOCK_READY) || force) && ((req = TAILQ_FIRST(&nmp->nm_resendq)))) {
555 if (req->r_resendtime)
556 microuptime(&now);
557 while (req && !force && req->r_resendtime && (now.tv_sec < req->r_resendtime))
558 req = TAILQ_NEXT(req, r_rchain);
559 if (!req)
560 break;
561 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
562 req->r_rchain.tqe_next = NFSREQNOLIST;
563 lck_mtx_unlock(&nmp->nm_lock);
564 lck_mtx_lock(&req->r_mtx);
565 if (req->r_error || req->r_nmrep.nmc_mhead) {
566 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
567 req->r_flags &= ~R_RESENDQ;
568 wakeup(req);
569 lck_mtx_unlock(&req->r_mtx);
570 if (dofinish)
571 nfs_asyncio_finish(req);
572 lck_mtx_lock(&nmp->nm_lock);
573 continue;
574 }
575 if ((req->r_flags & R_RESTART) || req->r_gss_ctx) {
576 req->r_flags &= ~R_RESTART;
577 req->r_resendtime = 0;
578 lck_mtx_unlock(&req->r_mtx);
579 /* async RPCs on GSS mounts need to be rebuilt and resent. */
580 nfs_reqdequeue(req);
581 if (req->r_gss_ctx) {
582 nfs_gss_clnt_rpcdone(req);
583 error = nfs_gss_clnt_args_restore(req);
584 if (error == ENEEDAUTH)
585 req->r_xid = 0;
586 }
587 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
588 req->r_gss_ctx ? " gss" : "", req->r_procnum, req->r_xid,
589 req->r_flags, req->r_rtt));
590 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
591 if (!error)
592 error = nfs_sigintr(nmp, req, req->r_thread, 0);
593 if (!error)
594 error = nfs_request_add_header(req);
595 if (!error)
596 error = nfs_request_send(req, 0);
597 lck_mtx_lock(&req->r_mtx);
598 if (req->r_rchain.tqe_next == NFSREQNOLIST)
599 req->r_flags &= ~R_RESENDQ;
600 if (error)
601 req->r_error = error;
602 wakeup(req);
603 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
604 lck_mtx_unlock(&req->r_mtx);
605 if (dofinish)
606 nfs_asyncio_finish(req);
607 lck_mtx_lock(&nmp->nm_lock);
608 error = 0;
609 continue;
610 }
611 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
612 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
613 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
614 if (!error)
615 error = nfs_sigintr(nmp, req, req->r_thread, 0);
616 if (!error) {
617 lck_mtx_unlock(&req->r_mtx);
618 error = nfs_send(req, 0);
619 lck_mtx_lock(&req->r_mtx);
620 if (!error) {
621 if (req->r_rchain.tqe_next == NFSREQNOLIST)
622 req->r_flags &= ~R_RESENDQ;
623 wakeup(req);
624 lck_mtx_unlock(&req->r_mtx);
625 lck_mtx_lock(&nmp->nm_lock);
626 continue;
627 }
628 }
629 req->r_error = error;
630 if (req->r_rchain.tqe_next == NFSREQNOLIST)
631 req->r_flags &= ~R_RESENDQ;
632 wakeup(req);
633 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
634 lck_mtx_unlock(&req->r_mtx);
635 if (dofinish)
636 nfs_asyncio_finish(req);
637 lck_mtx_lock(&nmp->nm_lock);
638 }
639 if (nmp->nm_sockflags & NMSOCK_READY) {
640 ts.tv_sec = TAILQ_EMPTY(&nmp->nm_resendq) ? 30 : 1;
641 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts);
642 } else if (force)
643 break;
644 }
645
646 if (nmp->nm_sockthd == thd)
647 nmp->nm_sockthd = NULL;
648 lck_mtx_unlock(&nmp->nm_lock);
649 wakeup(&nmp->nm_sockthd);
650 thread_terminate(thd);
651 }
652
653 /* start or wake a mount's socket thread */
654 void
655 nfs_mount_sock_thread_wake(struct nfsmount *nmp)
656 {
657 if (nmp->nm_sockthd)
658 wakeup(&nmp->nm_sockthd);
659 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS)
660 thread_deallocate(nmp->nm_sockthd);
661 }
662
663 /*
664 * The NFS client send routine.
665 *
666 * Send the given NFS request out the mount's socket.
667 * Holds nfs_sndlock() for the duration of this call.
668 *
669 * - check for request termination (sigintr)
670 * - perform reconnect, if necessary
671 * - UDP: check the congestion window
672 * - make a copy of the request to send
673 * - UDP: update the congestion window
674 * - send the request
675 *
676 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
677 * rexmit count is also updated if this isn't the first send.
678 *
679 * If the send is not successful, make sure R_MUSTRESEND is set.
680 * If this wasn't the first transmit, set R_RESENDERR.
681 * Also, undo any UDP congestion window changes made.
682 *
683 * If the error appears to indicate that the socket should
684 * be reconnected, mark the socket for reconnection.
685 *
686 * Only return errors when the request should be aborted.
687 */
688 int
689 nfs_send(struct nfsreq *req, int wait)
690 {
691 struct nfsmount *nmp;
692 socket_t so;
693 int error, error2, sotype, rexmit, slpflag = 0, needrecon;
694 struct msghdr msg;
695 struct sockaddr *sendnam;
696 mbuf_t mreqcopy;
697 size_t sentlen = 0;
698 struct timespec ts = { 2, 0 };
699
700 again:
701 error = nfs_sndlock(req);
702 if (error)
703 return (error);
704
705 error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
706 if (error) {
707 nfs_sndunlock(req);
708 return (error);
709 }
710 nmp = req->r_nmp;
711 sotype = nmp->nm_sotype;
712
713 if ((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) {
714 /* a setup RPC but we're not in SETUP... must need reconnect */
715 nfs_sndunlock(req);
716 return (EPIPE);
717 }
718
719 /* If the socket needs reconnection, do that now. */
720 /* wait until socket is ready - unless this request is part of setup */
721 lck_mtx_lock(&nmp->nm_lock);
722 if (!(nmp->nm_sockflags & NMSOCK_READY) &&
723 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) {
724 if (nmp->nm_flag & NFSMNT_INT)
725 slpflag |= PCATCH;
726 lck_mtx_unlock(&nmp->nm_lock);
727 nfs_sndunlock(req);
728 if (!wait) {
729 lck_mtx_lock(&req->r_mtx);
730 req->r_flags |= R_MUSTRESEND;
731 req->r_rtt = 0;
732 lck_mtx_unlock(&req->r_mtx);
733 return (0);
734 }
735 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req->r_xid));
736 lck_mtx_lock(&req->r_mtx);
737 req->r_flags &= ~R_MUSTRESEND;
738 req->r_rtt = 0;
739 lck_mtx_unlock(&req->r_mtx);
740 lck_mtx_lock(&nmp->nm_lock);
741 while (!(nmp->nm_sockflags & NMSOCK_READY)) {
742 /* don't bother waiting if the socket thread won't be reconnecting it */
743 if (nmp->nm_state & NFSSTA_FORCE) {
744 error = EIO;
745 break;
746 }
747 /* make sure socket thread is running, then wait */
748 nfs_mount_sock_thread_wake(nmp);
749 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
750 break;
751 msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait", &ts);
752 slpflag = 0;
753 }
754 lck_mtx_unlock(&nmp->nm_lock);
755 if (error)
756 return (error);
757 goto again;
758 }
759 so = nmp->nm_so;
760 lck_mtx_unlock(&nmp->nm_lock);
761 if (!so) {
762 nfs_sndunlock(req);
763 lck_mtx_lock(&req->r_mtx);
764 req->r_flags |= R_MUSTRESEND;
765 req->r_rtt = 0;
766 lck_mtx_unlock(&req->r_mtx);
767 return (0);
768 }
769
770 lck_mtx_lock(&req->r_mtx);
771 rexmit = (req->r_flags & R_SENT);
772
773 if (sotype == SOCK_DGRAM) {
774 lck_mtx_lock(&nmp->nm_lock);
775 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) {
776 /* if we can't send this out yet, wait on the cwnd queue */
777 slpflag = ((nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
778 lck_mtx_unlock(&nmp->nm_lock);
779 nfs_sndunlock(req);
780 req->r_flags |= R_MUSTRESEND;
781 lck_mtx_unlock(&req->r_mtx);
782 if (!wait) {
783 req->r_rtt = 0;
784 return (0);
785 }
786 lck_mtx_lock(&nmp->nm_lock);
787 while (nmp->nm_sent >= nmp->nm_cwnd) {
788 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
789 break;
790 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain);
791 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts);
792 slpflag = 0;
793 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) {
794 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
795 req->r_cchain.tqe_next = NFSREQNOLIST;
796 }
797 }
798 lck_mtx_unlock(&nmp->nm_lock);
799 goto again;
800 }
801 /*
802 * We update these *before* the send to avoid racing
803 * against others who may be looking to send requests.
804 */
805 if (!rexmit) {
806 /* first transmit */
807 req->r_flags |= R_CWND;
808 nmp->nm_sent += NFS_CWNDSCALE;
809 } else {
810 /*
811 * When retransmitting, turn timing off
812 * and divide congestion window by 2.
813 */
814 req->r_flags &= ~R_TIMING;
815 nmp->nm_cwnd >>= 1;
816 if (nmp->nm_cwnd < NFS_CWNDSCALE)
817 nmp->nm_cwnd = NFS_CWNDSCALE;
818 }
819 lck_mtx_unlock(&nmp->nm_lock);
820 }
821
822 req->r_flags &= ~R_MUSTRESEND;
823 lck_mtx_unlock(&req->r_mtx);
824
825 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL,
826 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy);
827 if (error) {
828 if (wait)
829 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error);
830 nfs_sndunlock(req);
831 lck_mtx_lock(&req->r_mtx);
832 req->r_flags |= R_MUSTRESEND;
833 req->r_rtt = 0;
834 lck_mtx_unlock(&req->r_mtx);
835 return (0);
836 }
837
838 bzero(&msg, sizeof(msg));
839 if (nmp->nm_nam && (sotype != SOCK_STREAM) && !sock_isconnected(so)) {
840 if ((sendnam = mbuf_data(nmp->nm_nam))) {
841 msg.msg_name = (caddr_t)sendnam;
842 msg.msg_namelen = sendnam->sa_len;
843 }
844 }
845 error = sock_sendmbuf(so, &msg, mreqcopy, 0, &sentlen);
846 #ifdef NFS_SOCKET_DEBUGGING
847 if (error || (sentlen != req->r_mreqlen))
848 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
849 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error));
850 #endif
851 if (!error && (sentlen != req->r_mreqlen))
852 error = EWOULDBLOCK;
853 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen));
854
855 lck_mtx_lock(&req->r_mtx);
856 req->r_rtt = 0;
857 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT))
858 req->r_rexmit = NFS_MAXREXMIT;
859
860 if (!error) {
861 /* SUCCESS */
862 req->r_flags &= ~R_RESENDERR;
863 if (rexmit)
864 OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries);
865 req->r_flags |= R_SENT;
866 if (req->r_flags & R_WAITSENT) {
867 req->r_flags &= ~R_WAITSENT;
868 wakeup(req);
869 }
870 nfs_sndunlock(req);
871 lck_mtx_unlock(&req->r_mtx);
872 return (0);
873 }
874
875 /* send failed */
876 req->r_flags |= R_MUSTRESEND;
877 if (rexmit)
878 req->r_flags |= R_RESENDERR;
879 if ((error == EINTR) || (error == ERESTART))
880 req->r_error = error;
881 lck_mtx_unlock(&req->r_mtx);
882
883 if (sotype == SOCK_DGRAM) {
884 /*
885 * Note: even though a first send may fail, we consider
886 * the request sent for congestion window purposes.
887 * So we don't need to undo any of the changes made above.
888 */
889 /*
890 * Socket errors ignored for connectionless sockets??
891 * For now, ignore them all
892 */
893 if ((error != EINTR) && (error != ERESTART) &&
894 (error != EWOULDBLOCK) && (error != EIO)) {
895 int clearerror = 0, optlen = sizeof(clearerror);
896 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
897 #ifdef NFS_SOCKET_DEBUGGING
898 if (clearerror)
899 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
900 error, clearerror));
901 #endif
902 }
903 }
904
905 /* check if it appears we should reconnect the socket */
906 switch (error) {
907 case EWOULDBLOCK:
908 /* if send timed out, reconnect if on TCP */
909 if (sotype != SOCK_STREAM)
910 break;
911 case EPIPE:
912 case EADDRNOTAVAIL:
913 case ENETDOWN:
914 case ENETUNREACH:
915 case ENETRESET:
916 case ECONNABORTED:
917 case ECONNRESET:
918 case ENOTCONN:
919 case ESHUTDOWN:
920 case ECONNREFUSED:
921 case EHOSTDOWN:
922 case EHOSTUNREACH:
923 needrecon = 1;
924 break;
925 }
926 if (needrecon) { /* mark socket as needing reconnect */
927 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error));
928 nfs_need_reconnect(nmp);
929 }
930
931 nfs_sndunlock(req);
932
933 /*
934 * Don't log some errors:
935 * EPIPE errors may be common with servers that drop idle connections.
936 * EADDRNOTAVAIL may occur on network transitions.
937 * ENOTCONN may occur under some network conditions.
938 */
939 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN))
940 error = 0;
941 if (error && (error != EINTR) && (error != ERESTART))
942 log(LOG_INFO, "nfs send error %d for server %s\n", error,
943 !req->r_nmp ? "<unmounted>" :
944 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
945
946 /* prefer request termination error over other errors */
947 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
948 if (error2)
949 error = error2;
950
951 /* only allow the following errors to be returned */
952 if ((error != EINTR) && (error != ERESTART) && (error != EIO) &&
953 (error != ENXIO) && (error != ETIMEDOUT))
954 error = 0;
955 return (error);
956 }
957
958 /*
959 * NFS client socket upcalls
960 *
961 * Pull RPC replies out of an NFS mount's socket and match them
962 * up with the pending request.
963 *
964 * The datagram code is simple because we always get whole
965 * messages out of the socket.
966 *
967 * The stream code is more involved because we have to parse
968 * the RPC records out of the stream.
969 */
970
971 /* NFS client UDP socket upcall */
972 static void
973 nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag)
974 {
975 struct nfsmount *nmp = arg;
976 size_t rcvlen;
977 mbuf_t m;
978 int error = 0;
979
980 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
981 wakeup(&nmp->nm_so);
982 return;
983 }
984
985 /* make sure we're on the current socket */
986 if (nmp->nm_so != so)
987 return;
988
989 do {
990 m = NULL;
991 rcvlen = 1000000;
992 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
993 if (m)
994 nfs_request_match_reply(nmp, m);
995 } while (m && !error);
996
997 if (error && (error != EWOULDBLOCK)) {
998 /* problems with the socket... mark for reconnection */
999 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error));
1000 nfs_need_reconnect(nmp);
1001 }
1002 }
1003
1004 /* NFS client TCP socket upcall */
1005 static void
1006 nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag)
1007 {
1008 struct nfsmount *nmp = arg;
1009 struct iovec_32 aio;
1010 struct msghdr msg;
1011 size_t rcvlen;
1012 mbuf_t m;
1013 int error = 0;
1014 int recv;
1015
1016 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
1017 wakeup(&nmp->nm_so);
1018 return;
1019 }
1020
1021 /* make sure we're on the current socket */
1022 if (nmp->nm_so != so)
1023 return;
1024
1025 lck_mtx_lock(&nmp->nm_lock);
1026 if (nmp->nm_sockflags & NMSOCK_UPCALL) {
1027 /* upcall is already receiving data - just return */
1028 lck_mtx_unlock(&nmp->nm_lock);
1029 return;
1030 }
1031 nmp->nm_sockflags |= NMSOCK_UPCALL;
1032
1033 nextfrag:
1034 recv = 0;
1035
1036 /* read the TCP RPC record marker */
1037 while (!error && nmp->nm_markerleft) {
1038 aio.iov_base = (uintptr_t)((char*)&nmp->nm_fragleft +
1039 sizeof(nmp->nm_fragleft) - nmp->nm_markerleft);
1040 aio.iov_len = nmp->nm_markerleft;
1041 bzero(&msg, sizeof(msg));
1042 msg.msg_iov = (struct iovec *) &aio;
1043 msg.msg_iovlen = 1;
1044 lck_mtx_unlock(&nmp->nm_lock);
1045 error = sock_receive(so, &msg, MSG_DONTWAIT, &rcvlen);
1046 lck_mtx_lock(&nmp->nm_lock);
1047 if (error || !rcvlen)
1048 break;
1049 recv = 1;
1050 nmp->nm_markerleft -= rcvlen;
1051 if (nmp->nm_markerleft)
1052 continue;
1053 /* record marker complete */
1054 nmp->nm_fragleft = ntohl(nmp->nm_fragleft);
1055 if (nmp->nm_fragleft & 0x80000000) {
1056 nmp->nm_sockflags |= NMSOCK_LASTFRAG;
1057 nmp->nm_fragleft &= ~0x80000000;
1058 }
1059 nmp->nm_reclen += nmp->nm_fragleft;
1060 if (nmp->nm_reclen > NFS_MAXPACKET) {
1061 /*
1062 * This is SERIOUS! We are out of sync with the sender
1063 * and forcing a disconnect/reconnect is all I can do.
1064 */
1065 log(LOG_ERR, "%s (%d) from nfs server %s\n",
1066 "impossible RPC record length", nmp->nm_reclen,
1067 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1068 error = EFBIG;
1069 }
1070 }
1071
1072 /* read the TCP RPC record fragment */
1073 while (!error && !nmp->nm_markerleft && nmp->nm_fragleft) {
1074 m = NULL;
1075 rcvlen = nmp->nm_fragleft;
1076 lck_mtx_unlock(&nmp->nm_lock);
1077 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
1078 lck_mtx_lock(&nmp->nm_lock);
1079 if (error || !rcvlen || !m)
1080 break;
1081 recv = 1;
1082 /* append mbufs to list */
1083 nmp->nm_fragleft -= rcvlen;
1084 if (!nmp->nm_m) {
1085 nmp->nm_m = m;
1086 } else {
1087 error = mbuf_setnext(nmp->nm_mlast, m);
1088 if (error) {
1089 printf("nfs_tcp_rcv: mbuf_setnext failed %d\n", error);
1090 mbuf_freem(m);
1091 break;
1092 }
1093 }
1094 while (mbuf_next(m))
1095 m = mbuf_next(m);
1096 nmp->nm_mlast = m;
1097 }
1098
1099 /* done reading fragment? */
1100 m = NULL;
1101 if (!error && !nmp->nm_markerleft && !nmp->nm_fragleft) {
1102 /* reset socket fragment parsing state */
1103 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
1104 if (nmp->nm_sockflags & NMSOCK_LASTFRAG) {
1105 /* RPC record complete */
1106 m = nmp->nm_m;
1107 /* reset socket record parsing state */
1108 nmp->nm_reclen = 0;
1109 nmp->nm_m = nmp->nm_mlast = NULL;
1110 nmp->nm_sockflags &= ~NMSOCK_LASTFRAG;
1111 }
1112 }
1113
1114 if (m) { /* match completed response with request */
1115 lck_mtx_unlock(&nmp->nm_lock);
1116 nfs_request_match_reply(nmp, m);
1117 lck_mtx_lock(&nmp->nm_lock);
1118 }
1119
1120 /* loop if we've been making error-free progress */
1121 if (!error && recv)
1122 goto nextfrag;
1123
1124 nmp->nm_sockflags &= ~NMSOCK_UPCALL;
1125 lck_mtx_unlock(&nmp->nm_lock);
1126 #ifdef NFS_SOCKET_DEBUGGING
1127 if (!recv && (error != EWOULDBLOCK))
1128 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error));
1129 #endif
1130 /* note: no error and no data indicates server closed its end */
1131 if ((error != EWOULDBLOCK) && (error || !recv)) {
1132 /* problems with the socket... mark for reconnection */
1133 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error));
1134 nfs_need_reconnect(nmp);
1135 }
1136 }
1137
1138 /*
1139 * "poke" a socket to try to provoke any pending errors
1140 */
1141 static void
1142 nfs_sock_poke(struct nfsmount *nmp)
1143 {
1144 struct iovec_32 aio;
1145 struct msghdr msg;
1146 size_t len;
1147 int error = 0;
1148 int dummy;
1149
1150 lck_mtx_lock(&nmp->nm_lock);
1151 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || !nmp->nm_so) {
1152 lck_mtx_unlock(&nmp->nm_lock);
1153 return;
1154 }
1155 lck_mtx_unlock(&nmp->nm_lock);
1156 aio.iov_base = (uintptr_t)&dummy;
1157 aio.iov_len = 0;
1158 len = 0;
1159 bzero(&msg, sizeof(msg));
1160 msg.msg_iov = (struct iovec *) &aio;
1161 msg.msg_iovlen = 1;
1162 error = sock_send(nmp->nm_so, &msg, MSG_DONTWAIT, &len);
1163 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error));
1164 }
1165
1166 /*
1167 * Match an RPC reply with the corresponding request
1168 */
1169 static void
1170 nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
1171 {
1172 struct nfsreq *req;
1173 struct nfsm_chain nmrep;
1174 u_long reply = 0, rxid = 0;
1175 long t1;
1176 int error = 0, asyncioq, asyncgss;
1177
1178 /* Get the xid and check that it is an rpc reply */
1179 nfsm_chain_dissect_init(error, &nmrep, mrep);
1180 nfsm_chain_get_32(error, &nmrep, rxid);
1181 nfsm_chain_get_32(error, &nmrep, reply);
1182 if (error || (reply != RPC_REPLY)) {
1183 OSAddAtomic(1, (SInt32*)&nfsstats.rpcinvalid);
1184 mbuf_freem(mrep);
1185 return;
1186 }
1187
1188 /*
1189 * Loop through the request list to match up the reply
1190 * Iff no match, just drop it.
1191 */
1192 lck_mtx_lock(nfs_request_mutex);
1193 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
1194 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid)))
1195 continue;
1196 /* looks like we have it, grab lock and double check */
1197 lck_mtx_lock(&req->r_mtx);
1198 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
1199 lck_mtx_unlock(&req->r_mtx);
1200 continue;
1201 }
1202 /* Found it.. */
1203 req->r_nmrep = nmrep;
1204 lck_mtx_lock(&nmp->nm_lock);
1205 if (nmp->nm_sotype == SOCK_DGRAM) {
1206 /*
1207 * Update congestion window.
1208 * Do the additive increase of one rpc/rtt.
1209 */
1210 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
1211 if (nmp->nm_cwnd <= nmp->nm_sent) {
1212 nmp->nm_cwnd +=
1213 ((NFS_CWNDSCALE * NFS_CWNDSCALE) +
1214 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
1215 if (nmp->nm_cwnd > NFS_MAXCWND)
1216 nmp->nm_cwnd = NFS_MAXCWND;
1217 }
1218 if (req->r_flags & R_CWND) {
1219 nmp->nm_sent -= NFS_CWNDSCALE;
1220 req->r_flags &= ~R_CWND;
1221 }
1222 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
1223 /* congestion window is open, poke the cwnd queue */
1224 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
1225 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
1226 req2->r_cchain.tqe_next = NFSREQNOLIST;
1227 wakeup(req2);
1228 }
1229 }
1230 /*
1231 * Update rtt using a gain of 0.125 on the mean
1232 * and a gain of 0.25 on the deviation.
1233 */
1234 if (req->r_flags & R_TIMING) {
1235 /*
1236 * Since the timer resolution of
1237 * NFS_HZ is so course, it can often
1238 * result in r_rtt == 0. Since
1239 * r_rtt == N means that the actual
1240 * rtt is between N+dt and N+2-dt ticks,
1241 * add 1.
1242 */
1243 if (proct[req->r_procnum] == 0)
1244 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum);
1245 t1 = req->r_rtt + 1;
1246 t1 -= (NFS_SRTT(req) >> 3);
1247 NFS_SRTT(req) += t1;
1248 if (t1 < 0)
1249 t1 = -t1;
1250 t1 -= (NFS_SDRTT(req) >> 2);
1251 NFS_SDRTT(req) += t1;
1252 }
1253 nmp->nm_timeouts = 0;
1254 lck_mtx_unlock(&nmp->nm_lock);
1255 /* signal anyone waiting on this request */
1256 wakeup(req);
1257 asyncioq = (req->r_callback.rcb_func != NULL);
1258 if ((asyncgss = ((req->r_gss_ctx != NULL) && ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_ALLOCATED)) == (R_ASYNC|R_ALLOCATED)))))
1259 nfs_request_ref(req, 1);
1260 lck_mtx_unlock(&req->r_mtx);
1261 lck_mtx_unlock(nfs_request_mutex);
1262 if (asyncgss) {
1263 nfs_gss_clnt_rpcdone(req);
1264 nfs_request_rele(req);
1265 }
1266 /* if it's an async RPC with a callback, queue it up */
1267 if (asyncioq)
1268 nfs_asyncio_finish(req);
1269 break;
1270 }
1271
1272 if (!req) {
1273 /* not matched to a request, so drop it. */
1274 lck_mtx_unlock(nfs_request_mutex);
1275 OSAddAtomic(1, (SInt32*)&nfsstats.rpcunexpected);
1276 mbuf_freem(mrep);
1277 }
1278 }
1279
1280 /*
1281 * Wait for the reply for a given request...
1282 * ...potentially resending the request if necessary.
1283 */
1284 static int
1285 nfs_wait_reply(struct nfsreq *req)
1286 {
1287 struct nfsmount *nmp = req->r_nmp;
1288 struct timespec ts = { 30, 0 };
1289 int error = 0, slpflag;
1290
1291 if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
1292 slpflag = PCATCH;
1293 else
1294 slpflag = 0;
1295
1296 lck_mtx_lock(&req->r_mtx);
1297 while (!req->r_nmrep.nmc_mhead) {
1298 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0)))
1299 break;
1300 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
1301 break;
1302 /* check if we need to resend */
1303 if (req->r_flags & R_MUSTRESEND) {
1304 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
1305 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
1306 lck_mtx_unlock(&req->r_mtx);
1307 if (req->r_gss_ctx) {
1308 /*
1309 * It's an RPCSEC_GSS mount.
1310 * Can't just resend the original request
1311 * without bumping the cred sequence number.
1312 * Go back and re-build the request.
1313 */
1314 return (EAGAIN);
1315 }
1316 error = nfs_send(req, 1);
1317 lck_mtx_lock(&req->r_mtx);
1318 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
1319 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error));
1320 if (error)
1321 break;
1322 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
1323 break;
1324 }
1325 /* need to poll if we're P_NOREMOTEHANG */
1326 if (nfs_noremotehang(req->r_thread))
1327 ts.tv_sec = 1;
1328 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts);
1329 slpflag = 0;
1330 }
1331 lck_mtx_unlock(&req->r_mtx);
1332
1333 return (error);
1334 }
1335
1336 /*
1337 * An NFS request goes something like this:
1338 * (nb: always frees up mreq mbuf list)
1339 * nfs_request_create()
1340 * - allocates a request struct if one is not provided
1341 * - initial fill-in of the request struct
1342 * nfs_request_add_header()
1343 * - add the RPC header
1344 * nfs_request_send()
1345 * - link it into list
1346 * - call nfs_send() for first transmit
1347 * nfs_request_wait()
1348 * - call nfs_wait_reply() to wait for the reply
1349 * nfs_request_finish()
1350 * - break down rpc header and return with error or nfs reply
1351 * pointed to by nmrep.
1352 * nfs_request_rele()
1353 * nfs_request_destroy()
1354 * - clean up the request struct
1355 * - free the request struct if it was allocated by nfs_request_create()
1356 */
1357
1358 /*
1359 * Set up an NFS request struct (allocating if no request passed in).
1360 */
1361 int
1362 nfs_request_create(
1363 nfsnode_t np,
1364 mount_t mp, /* used only if !np */
1365 struct nfsm_chain *nmrest,
1366 int procnum,
1367 thread_t thd,
1368 kauth_cred_t cred,
1369 struct nfsreq **reqp)
1370 {
1371 struct nfsreq *req, *newreq = NULL;
1372 struct nfsmount *nmp;
1373
1374 req = *reqp;
1375 if (!req) {
1376 /* allocate a new NFS request structure */
1377 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK);
1378 if (!newreq) {
1379 mbuf_freem(nmrest->nmc_mhead);
1380 nmrest->nmc_mhead = NULL;
1381 return (ENOMEM);
1382 }
1383 req = newreq;
1384 }
1385
1386 bzero(req, sizeof(*req));
1387 if (req == newreq)
1388 req->r_flags = R_ALLOCATED;
1389
1390 nmp = VFSTONFS(np ? NFSTOMP(np) : mp);
1391 if (!nmp) {
1392 if (newreq)
1393 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
1394 return (ENXIO);
1395 }
1396 lck_mtx_lock(&nmp->nm_lock);
1397 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1398 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
1399 lck_mtx_unlock(&nmp->nm_lock);
1400 mbuf_freem(nmrest->nmc_mhead);
1401 nmrest->nmc_mhead = NULL;
1402 if (newreq)
1403 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
1404 return (ENXIO);
1405 }
1406
1407 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS))
1408 OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[procnum]);
1409 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL))
1410 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
1411
1412 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL);
1413 req->r_nmp = nmp;
1414 req->r_np = np;
1415 req->r_thread = thd;
1416 if (IS_VALID_CRED(cred)) {
1417 kauth_cred_ref(cred);
1418 req->r_cred = cred;
1419 }
1420 req->r_procnum = procnum;
1421 if (proct[procnum] > 0)
1422 req->r_flags |= R_TIMING;
1423 req->r_nmrep.nmc_mhead = NULL;
1424 SLIST_INIT(&req->r_gss_seqlist);
1425 req->r_achain.tqe_next = NFSREQNOLIST;
1426 req->r_rchain.tqe_next = NFSREQNOLIST;
1427 req->r_cchain.tqe_next = NFSREQNOLIST;
1428
1429 lck_mtx_unlock(&nmp->nm_lock);
1430
1431 /* move the request mbuf chain to the nfsreq */
1432 req->r_mrest = nmrest->nmc_mhead;
1433 nmrest->nmc_mhead = NULL;
1434
1435 req->r_flags |= R_INITTED;
1436 req->r_refs = 1;
1437 if (newreq)
1438 *reqp = req;
1439 return (0);
1440 }
1441
1442 /*
1443 * Clean up and free an NFS request structure.
1444 */
1445 void
1446 nfs_request_destroy(struct nfsreq *req)
1447 {
1448 struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1449 struct gss_seq *gsp, *ngsp;
1450 struct timespec ts = { 1, 0 };
1451
1452 if (!req || !(req->r_flags & R_INITTED))
1453 return;
1454 req->r_flags &= ~R_INITTED;
1455 if (req->r_lflags & RL_QUEUED)
1456 nfs_reqdequeue(req);
1457 if (req->r_achain.tqe_next != NFSREQNOLIST) {
1458 /* still on an async I/O queue? */
1459 lck_mtx_lock(nfsiod_mutex);
1460 if (nmp && (req->r_achain.tqe_next != NFSREQNOLIST)) {
1461 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
1462 req->r_achain.tqe_next = NFSREQNOLIST;
1463 }
1464 lck_mtx_unlock(nfsiod_mutex);
1465 }
1466 if (nmp) {
1467 lck_mtx_lock(&nmp->nm_lock);
1468 if (req->r_rchain.tqe_next != NFSREQNOLIST) {
1469 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
1470 req->r_rchain.tqe_next = NFSREQNOLIST;
1471 req->r_flags &= ~R_RESENDQ;
1472 }
1473 if (req->r_cchain.tqe_next != NFSREQNOLIST) {
1474 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
1475 req->r_cchain.tqe_next = NFSREQNOLIST;
1476 }
1477 lck_mtx_unlock(&nmp->nm_lock);
1478 }
1479 lck_mtx_lock(&req->r_mtx);
1480 while (req->r_flags & R_RESENDQ)
1481 msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts);
1482 lck_mtx_unlock(&req->r_mtx);
1483 if (req->r_mhead)
1484 mbuf_freem(req->r_mhead);
1485 else if (req->r_mrest)
1486 mbuf_freem(req->r_mrest);
1487 if (req->r_nmrep.nmc_mhead)
1488 mbuf_freem(req->r_nmrep.nmc_mhead);
1489 if (IS_VALID_CRED(req->r_cred))
1490 kauth_cred_unref(&req->r_cred);
1491 if (req->r_gss_ctx)
1492 nfs_gss_clnt_rpcdone(req);
1493 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp)
1494 FREE(gsp, M_TEMP);
1495 if (req->r_gss_ctx)
1496 nfs_gss_clnt_ctx_unref(req);
1497
1498 lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
1499 if (req->r_flags & R_ALLOCATED)
1500 FREE_ZONE(req, sizeof(*req), M_NFSREQ);
1501 }
1502
1503 void
1504 nfs_request_ref(struct nfsreq *req, int locked)
1505 {
1506 if (!locked)
1507 lck_mtx_lock(&req->r_mtx);
1508 if (req->r_refs <= 0)
1509 panic("nfsreq reference error");
1510 req->r_refs++;
1511 if (!locked)
1512 lck_mtx_unlock(&req->r_mtx);
1513 }
1514
1515 void
1516 nfs_request_rele(struct nfsreq *req)
1517 {
1518 int destroy;
1519
1520 lck_mtx_lock(&req->r_mtx);
1521 if (req->r_refs <= 0)
1522 panic("nfsreq reference underflow");
1523 req->r_refs--;
1524 destroy = (req->r_refs == 0);
1525 lck_mtx_unlock(&req->r_mtx);
1526 if (destroy)
1527 nfs_request_destroy(req);
1528 }
1529
1530
1531 /*
1532 * Add an (updated) RPC header with authorization to an NFS request.
1533 */
1534 int
1535 nfs_request_add_header(struct nfsreq *req)
1536 {
1537 struct nfsmount *nmp;
1538 int error = 0, auth_len = 0;
1539 mbuf_t m;
1540
1541 /* free up any previous header */
1542 if ((m = req->r_mhead)) {
1543 while (m && (m != req->r_mrest))
1544 m = mbuf_free(m);
1545 req->r_mhead = NULL;
1546 }
1547
1548 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1549 if (!nmp)
1550 return (ENXIO);
1551
1552 if (!req->r_cred) /* RPCAUTH_NULL */
1553 auth_len = 0;
1554 else switch (nmp->nm_auth) {
1555 case RPCAUTH_UNIX:
1556 if (req->r_cred->cr_ngroups < 1)
1557 return (EINVAL);
1558 auth_len = ((((req->r_cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1559 nmp->nm_numgrps : (req->r_cred->cr_ngroups - 1)) << 2) +
1560 5 * NFSX_UNSIGNED;
1561 break;
1562 case RPCAUTH_KRB5:
1563 case RPCAUTH_KRB5I:
1564 case RPCAUTH_KRB5P:
1565 auth_len = 5 * NFSX_UNSIGNED + 0; // zero context handle for now
1566 break;
1567 }
1568
1569 error = nfsm_rpchead(req, auth_len, req->r_mrest, &req->r_xid, &req->r_mhead);
1570 if (error)
1571 return (error);
1572
1573 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead);
1574 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1575 if (!nmp)
1576 return (ENXIO);
1577 lck_mtx_lock(&nmp->nm_lock);
1578 if (nmp->nm_flag & NFSMNT_SOFT)
1579 req->r_retry = nmp->nm_retry;
1580 else
1581 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1582 lck_mtx_unlock(&nmp->nm_lock);
1583
1584 return (error);
1585 }
1586
1587
1588 /*
1589 * Queue an NFS request up and send it out.
1590 */
1591 int
1592 nfs_request_send(struct nfsreq *req, int wait)
1593 {
1594 struct nfsmount *nmp;
1595 struct timeval now;
1596
1597 lck_mtx_lock(nfs_request_mutex);
1598
1599 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1600 if (!nmp) {
1601 lck_mtx_unlock(nfs_request_mutex);
1602 return (ENXIO);
1603 }
1604
1605 microuptime(&now);
1606 if (!req->r_start) {
1607 req->r_start = now.tv_sec;
1608 req->r_lastmsg = now.tv_sec -
1609 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
1610 }
1611
1612 OSAddAtomic(1, (SInt32*)&nfsstats.rpcrequests);
1613
1614 /*
1615 * Chain request into list of outstanding requests. Be sure
1616 * to put it LAST so timer finds oldest requests first.
1617 * Make sure that the request queue timer is running
1618 * to check for possible request timeout.
1619 */
1620 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain);
1621 req->r_lflags |= RL_QUEUED;
1622 if (!nfs_request_timer_on) {
1623 nfs_request_timer_on = 1;
1624 nfs_interval_timer_start(nfs_request_timer_call,
1625 NFS_REQUESTDELAY);
1626 }
1627 lck_mtx_unlock(nfs_request_mutex);
1628
1629 /* Send the request... */
1630 return (nfs_send(req, wait));
1631 }
1632
1633 /*
1634 * Call nfs_wait_reply() to wait for the reply.
1635 */
1636 void
1637 nfs_request_wait(struct nfsreq *req)
1638 {
1639 req->r_error = nfs_wait_reply(req);
1640 }
1641
1642 /*
1643 * Finish up an NFS request by dequeueing it and
1644 * doing the initial NFS request reply processing.
1645 */
1646 int
1647 nfs_request_finish(
1648 struct nfsreq *req,
1649 struct nfsm_chain *nmrepp,
1650 int *status)
1651 {
1652 struct nfsmount *nmp;
1653 mbuf_t mrep;
1654 int verf_type = 0;
1655 uint32_t verf_len = 0;
1656 uint32_t reply_status = 0;
1657 uint32_t rejected_status = 0;
1658 uint32_t auth_status = 0;
1659 uint32_t accepted_status = 0;
1660 struct nfsm_chain nmrep;
1661 int error, auth;
1662
1663 error = req->r_error;
1664
1665 if (nmrepp)
1666 nmrepp->nmc_mhead = NULL;
1667
1668 /* RPC done, unlink the request. */
1669 nfs_reqdequeue(req);
1670
1671 mrep = req->r_nmrep.nmc_mhead;
1672
1673 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1674
1675 /*
1676 * Decrement the outstanding request count.
1677 */
1678 if (req->r_flags & R_CWND) {
1679 req->r_flags &= ~R_CWND;
1680 lck_mtx_lock(&nmp->nm_lock);
1681 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
1682 nmp->nm_sent -= NFS_CWNDSCALE;
1683 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
1684 /* congestion window is open, poke the cwnd queue */
1685 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
1686 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
1687 req2->r_cchain.tqe_next = NFSREQNOLIST;
1688 wakeup(req2);
1689 }
1690 lck_mtx_unlock(&nmp->nm_lock);
1691 }
1692
1693 if (req->r_gss_ctx) { // Using gss cred ?
1694 /*
1695 * If the request had an RPCSEC_GSS credential
1696 * then reset its sequence number bit in the
1697 * request window.
1698 */
1699 nfs_gss_clnt_rpcdone(req);
1700
1701 /*
1702 * If we need to re-send, go back and re-build the
1703 * request based on a new sequence number.
1704 * Note that we're using the original XID.
1705 */
1706 if (error == EAGAIN) {
1707 req->r_error = 0;
1708 if (mrep)
1709 mbuf_freem(mrep);
1710 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs
1711 req->r_nmrep.nmc_mhead = NULL;
1712 req->r_flags |= R_RESTART;
1713 if (error == ENEEDAUTH) {
1714 req->r_xid = 0; // get a new XID
1715 error = 0;
1716 }
1717 goto nfsmout;
1718 }
1719 }
1720
1721 /*
1722 * If there was a successful reply, make sure to mark the mount as up.
1723 * If a tprintf message was given (or if this is a timed-out soft mount)
1724 * then post a tprintf message indicating the server is alive again.
1725 */
1726 if (!error) {
1727 if ((req->r_flags & R_TPRINTFMSG) ||
1728 (nmp && (nmp->nm_flag & NFSMNT_SOFT) &&
1729 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO)))
1730 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again");
1731 else
1732 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL);
1733 }
1734 if (!error && !nmp)
1735 error = ENXIO;
1736 nfsmout_if(error);
1737
1738 /*
1739 * break down the RPC header and check if ok
1740 */
1741 nmrep = req->r_nmrep;
1742 nfsm_chain_get_32(error, &nmrep, reply_status);
1743 nfsmout_if(error);
1744 if (reply_status == RPC_MSGDENIED) {
1745 nfsm_chain_get_32(error, &nmrep, rejected_status);
1746 nfsmout_if(error);
1747 if (rejected_status == RPC_MISMATCH) {
1748 error = ENOTSUP;
1749 goto nfsmout;
1750 }
1751 nfsm_chain_get_32(error, &nmrep, auth_status);
1752 nfsmout_if(error);
1753 switch (auth_status) {
1754 case RPCSEC_GSS_CREDPROBLEM:
1755 case RPCSEC_GSS_CTXPROBLEM:
1756 /*
1757 * An RPCSEC_GSS cred or context problem.
1758 * We can't use it anymore.
1759 * Restore the args, renew the context
1760 * and set up for a resend.
1761 */
1762 error = nfs_gss_clnt_args_restore(req);
1763 if (error && error != ENEEDAUTH)
1764 break;
1765
1766 if (!error) {
1767 error = nfs_gss_clnt_ctx_renew(req);
1768 if (error)
1769 break;
1770 }
1771 mbuf_freem(mrep);
1772 req->r_nmrep.nmc_mhead = NULL;
1773 req->r_xid = 0; // get a new XID
1774 req->r_flags |= R_RESTART;
1775 goto nfsmout;
1776 default:
1777 error = EACCES;
1778 break;
1779 }
1780 goto nfsmout;
1781 }
1782
1783 /* Now check the verifier */
1784 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor
1785 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length
1786 nfsmout_if(error);
1787
1788 auth = !req->r_cred ? RPCAUTH_NULL : nmp->nm_auth;
1789 switch (auth) {
1790 case RPCAUTH_NULL:
1791 case RPCAUTH_UNIX:
1792 /* Any AUTH_UNIX verifier is ignored */
1793 if (verf_len > 0)
1794 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
1795 nfsm_chain_get_32(error, &nmrep, accepted_status);
1796 break;
1797 case RPCAUTH_KRB5:
1798 case RPCAUTH_KRB5I:
1799 case RPCAUTH_KRB5P:
1800 error = nfs_gss_clnt_verf_get(req, &nmrep,
1801 verf_type, verf_len, &accepted_status);
1802 break;
1803 }
1804 nfsmout_if(error);
1805
1806 switch (accepted_status) {
1807 case RPC_SUCCESS:
1808 if (req->r_procnum == NFSPROC_NULL) {
1809 /*
1810 * The NFS null procedure is unique,
1811 * in not returning an NFS status.
1812 */
1813 *status = NFS_OK;
1814 } else {
1815 nfsm_chain_get_32(error, &nmrep, *status);
1816 nfsmout_if(error);
1817 }
1818
1819 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) {
1820 /*
1821 * It's a JUKEBOX error - delay and try again
1822 */
1823 int delay, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
1824
1825 mbuf_freem(mrep);
1826 req->r_nmrep.nmc_mhead = NULL;
1827 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) {
1828 /* we're not yet completely mounted and */
1829 /* we can't complete an RPC, so we fail */
1830 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
1831 nfs_softterm(req);
1832 error = req->r_error;
1833 goto nfsmout;
1834 }
1835 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2);
1836 if (req->r_delay > 30)
1837 req->r_delay = 30;
1838 if (nmp->nm_tprintf_initial_delay && (req->r_delay == 30)) {
1839 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO,
1840 "resource temporarily unavailable (jukebox)");
1841 req->r_flags |= R_JBTPRINTFMSG;
1842 }
1843 delay = req->r_delay;
1844 if (req->r_callback.rcb_func) {
1845 struct timeval now;
1846 microuptime(&now);
1847 req->r_resendtime = now.tv_sec + delay;
1848 } else {
1849 do {
1850 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
1851 return (error);
1852 tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0);
1853 } while (--delay > 0);
1854 }
1855 req->r_xid = 0; // get a new XID
1856 req->r_flags |= R_RESTART;
1857 req->r_start = 0;
1858 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER);
1859 return (0);
1860 }
1861
1862 if (req->r_flags & R_JBTPRINTFMSG)
1863 nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, "resource available again");
1864
1865 if (*status == NFS_OK) {
1866 /*
1867 * Successful NFS request
1868 */
1869 *nmrepp = nmrep;
1870 req->r_nmrep.nmc_mhead = NULL;
1871 break;
1872 }
1873 /* Got an NFS error of some kind */
1874
1875 /*
1876 * If the File Handle was stale, invalidate the
1877 * lookup cache, just in case.
1878 */
1879 if ((*status == ESTALE) && req->r_np)
1880 cache_purge(NFSTOV(req->r_np));
1881 if (nmp->nm_vers == NFS_VER2)
1882 mbuf_freem(mrep);
1883 else
1884 *nmrepp = nmrep;
1885 req->r_nmrep.nmc_mhead = NULL;
1886 error = 0;
1887 break;
1888 case RPC_PROGUNAVAIL:
1889 error = EPROGUNAVAIL;
1890 break;
1891 case RPC_PROGMISMATCH:
1892 error = ERPCMISMATCH;
1893 break;
1894 case RPC_PROCUNAVAIL:
1895 error = EPROCUNAVAIL;
1896 break;
1897 case RPC_GARBAGE:
1898 error = EBADRPC;
1899 break;
1900 case RPC_SYSTEM_ERR:
1901 default:
1902 error = EIO;
1903 break;
1904 }
1905 nfsmout:
1906 if (!error && (req->r_flags & R_JBTPRINTFMSG))
1907 nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, NULL);
1908 FSDBG(273, R_XID32(req->r_xid), nmp, req,
1909 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error);
1910 return (error);
1911 }
1912
1913
1914 /*
1915 * Perform an NFS request synchronously.
1916 */
1917
1918 int
1919 nfs_request(
1920 nfsnode_t np,
1921 mount_t mp, /* used only if !np */
1922 struct nfsm_chain *nmrest,
1923 int procnum,
1924 vfs_context_t ctx,
1925 struct nfsm_chain *nmrepp,
1926 u_int64_t *xidp,
1927 int *status)
1928 {
1929 return nfs_request2(np, mp, nmrest, procnum,
1930 vfs_context_thread(ctx), vfs_context_ucred(ctx),
1931 0, nmrepp, xidp, status);
1932 }
1933
1934 int
1935 nfs_request2(
1936 nfsnode_t np,
1937 mount_t mp, /* used only if !np */
1938 struct nfsm_chain *nmrest,
1939 int procnum,
1940 thread_t thd,
1941 kauth_cred_t cred,
1942 int flags,
1943 struct nfsm_chain *nmrepp,
1944 u_int64_t *xidp,
1945 int *status)
1946 {
1947 struct nfsreq rq, *req = &rq;
1948 int error;
1949
1950 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req)))
1951 return (error);
1952 req->r_flags |= (flags & R_OPTMASK);
1953
1954 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0);
1955 do {
1956 req->r_error = 0;
1957 req->r_flags &= ~R_RESTART;
1958 if ((error = nfs_request_add_header(req)))
1959 break;
1960 if (xidp)
1961 *xidp = req->r_xid;
1962 if ((error = nfs_request_send(req, 1)))
1963 break;
1964 nfs_request_wait(req);
1965 if ((error = nfs_request_finish(req, nmrepp, status)))
1966 break;
1967 } while (req->r_flags & R_RESTART);
1968
1969 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error);
1970 nfs_request_rele(req);
1971 return (error);
1972 }
1973
1974 /*
1975 * Create and start an asynchronous NFS request.
1976 */
1977 int
1978 nfs_request_async(
1979 nfsnode_t np,
1980 mount_t mp, /* used only if !np */
1981 struct nfsm_chain *nmrest,
1982 int procnum,
1983 thread_t thd,
1984 kauth_cred_t cred,
1985 struct nfsreq_cbinfo *cb,
1986 struct nfsreq **reqp)
1987 {
1988 struct nfsreq *req;
1989 int error, sent;
1990
1991 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp);
1992 req = *reqp;
1993 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error);
1994 if (error)
1995 return (error);
1996 req->r_flags |= R_ASYNC;
1997 if (cb)
1998 req->r_callback = *cb;
1999 error = nfs_request_add_header(req);
2000 if (!error) {
2001 req->r_flags |= R_WAITSENT;
2002 if (req->r_callback.rcb_func)
2003 nfs_request_ref(req, 0);
2004 error = nfs_request_send(req, 1);
2005 lck_mtx_lock(&req->r_mtx);
2006 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) {
2007 /* make sure to wait until this async I/O request gets sent */
2008 int slpflag = (req->r_nmp && (req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
2009 struct timespec ts = { 2, 0 };
2010 while (!(req->r_flags & R_SENT)) {
2011 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
2012 break;
2013 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts);
2014 slpflag = 0;
2015 }
2016 }
2017 sent = req->r_flags & R_SENT;
2018 lck_mtx_unlock(&req->r_mtx);
2019 if (error && req->r_callback.rcb_func && !sent)
2020 nfs_request_rele(req);
2021 }
2022 FSDBG(274, R_XID32(req->r_xid), np, procnum, error);
2023 if (error || req->r_callback.rcb_func)
2024 nfs_request_rele(req);
2025 return (error);
2026 }
2027
2028 /*
2029 * Wait for and finish an asynchronous NFS request.
2030 */
2031 int
2032 nfs_request_async_finish(
2033 struct nfsreq *req,
2034 struct nfsm_chain *nmrepp,
2035 u_int64_t *xidp,
2036 int *status)
2037 {
2038 int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0;
2039
2040 lck_mtx_lock(&req->r_mtx);
2041 if (!asyncio)
2042 req->r_flags |= R_ASYNCWAIT;
2043 while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */
2044 struct timespec ts = { 2, 0 };
2045 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
2046 break;
2047 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", &ts);
2048 }
2049 lck_mtx_unlock(&req->r_mtx);
2050
2051 if (!error) {
2052 nfs_request_wait(req);
2053 error = nfs_request_finish(req, nmrepp, status);
2054 }
2055
2056 while (!error && (req->r_flags & R_RESTART)) {
2057 if (asyncio && req->r_resendtime) { /* send later */
2058 lck_mtx_lock(&req->r_mtx);
2059 nfs_asyncio_resend(req);
2060 lck_mtx_unlock(&req->r_mtx);
2061 return (EINPROGRESS);
2062 }
2063 req->r_error = 0;
2064 req->r_flags &= ~R_RESTART;
2065 if ((error = nfs_request_add_header(req)))
2066 break;
2067 if ((error = nfs_request_send(req, !asyncio)))
2068 break;
2069 if (asyncio)
2070 return (EINPROGRESS);
2071 nfs_request_wait(req);
2072 if ((error = nfs_request_finish(req, nmrepp, status)))
2073 break;
2074 }
2075 if (xidp)
2076 *xidp = req->r_xid;
2077
2078 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error);
2079 nfs_request_rele(req);
2080 return (error);
2081 }
2082
2083 /*
2084 * Cancel a pending asynchronous NFS request.
2085 */
2086 void
2087 nfs_request_async_cancel(struct nfsreq *req)
2088 {
2089 nfs_reqdequeue(req);
2090 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E);
2091 nfs_request_rele(req);
2092 }
2093
2094 /*
2095 * Flag a request as being terminated.
2096 */
2097 static void
2098 nfs_softterm(struct nfsreq *req)
2099 {
2100 struct nfsmount *nmp = req->r_nmp;
2101 req->r_flags |= R_SOFTTERM;
2102 req->r_error = ETIMEDOUT;
2103 if (!(req->r_flags & R_CWND) || !nmp)
2104 return;
2105 /* update congestion window */
2106 req->r_flags &= ~R_CWND;
2107 lck_mtx_lock(&nmp->nm_lock);
2108 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
2109 nmp->nm_sent -= NFS_CWNDSCALE;
2110 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
2111 /* congestion window is open, poke the cwnd queue */
2112 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
2113 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
2114 req2->r_cchain.tqe_next = NFSREQNOLIST;
2115 wakeup(req2);
2116 }
2117 lck_mtx_unlock(&nmp->nm_lock);
2118 }
2119
2120 /*
2121 * Ensure req isn't in use by the timer, then dequeue it.
2122 */
2123 static void
2124 nfs_reqdequeue(struct nfsreq *req)
2125 {
2126 lck_mtx_lock(nfs_request_mutex);
2127 while (req->r_lflags & RL_BUSY) {
2128 req->r_lflags |= RL_WAITING;
2129 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL);
2130 }
2131 if (req->r_lflags & RL_QUEUED) {
2132 TAILQ_REMOVE(&nfs_reqq, req, r_chain);
2133 req->r_lflags &= ~RL_QUEUED;
2134 }
2135 lck_mtx_unlock(nfs_request_mutex);
2136 }
2137
2138 /*
2139 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
2140 * free()'d out from under it.
2141 */
2142 static void
2143 nfs_reqbusy(struct nfsreq *req)
2144 {
2145 if (req->r_lflags & RL_BUSY)
2146 panic("req locked");
2147 req->r_lflags |= RL_BUSY;
2148 }
2149
2150 /*
2151 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
2152 */
2153 static struct nfsreq *
2154 nfs_reqnext(struct nfsreq *req)
2155 {
2156 struct nfsreq * nextreq;
2157
2158 if (req == NULL)
2159 return (NULL);
2160 /*
2161 * We need to get and busy the next req before signalling the
2162 * current one, otherwise wakeup() may block us and we'll race to
2163 * grab the next req.
2164 */
2165 nextreq = TAILQ_NEXT(req, r_chain);
2166 if (nextreq != NULL)
2167 nfs_reqbusy(nextreq);
2168 /* unbusy and signal. */
2169 req->r_lflags &= ~RL_BUSY;
2170 if (req->r_lflags & RL_WAITING) {
2171 req->r_lflags &= ~RL_WAITING;
2172 wakeup(&req->r_lflags);
2173 }
2174 return (nextreq);
2175 }
2176
2177 /*
2178 * NFS request queue timer routine
2179 *
2180 * Scan the NFS request queue for any requests that have timed out.
2181 *
2182 * Alert the system of unresponsive servers.
2183 * Mark expired requests on soft mounts as terminated.
2184 * For UDP, mark/signal requests for retransmission.
2185 */
2186 void
2187 nfs_request_timer(__unused void *param0, __unused void *param1)
2188 {
2189 struct nfsreq *req;
2190 struct nfsmount *nmp;
2191 int timeo, maxtime, finish_asyncio, error;
2192 struct timeval now;
2193 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue;
2194
2195 lck_mtx_lock(nfs_request_mutex);
2196 req = TAILQ_FIRST(&nfs_reqq);
2197 if (req == NULL) { /* no requests - turn timer off */
2198 nfs_request_timer_on = 0;
2199 lck_mtx_unlock(nfs_request_mutex);
2200 return;
2201 }
2202
2203 nfs_reqbusy(req);
2204 TAILQ_INIT(&nfs_mount_poke_queue);
2205
2206 microuptime(&now);
2207 for ( ; req != NULL ; req = nfs_reqnext(req)) {
2208 nmp = req->r_nmp;
2209 if (!nmp) /* unmounted */
2210 continue;
2211 if (req->r_error || req->r_nmrep.nmc_mhead)
2212 continue;
2213 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) {
2214 if (req->r_callback.rcb_func != NULL) {
2215 /* async I/O RPC needs to be finished */
2216 lck_mtx_lock(&req->r_mtx);
2217 req->r_error = error;
2218 finish_asyncio = !(req->r_flags & R_WAITSENT);
2219 wakeup(req);
2220 lck_mtx_unlock(&req->r_mtx);
2221 if (finish_asyncio)
2222 nfs_asyncio_finish(req);
2223 }
2224 continue;
2225 }
2226
2227 lck_mtx_lock(&req->r_mtx);
2228
2229 if (nmp->nm_tprintf_initial_delay &&
2230 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) &&
2231 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
2232 req->r_lastmsg = now.tv_sec;
2233 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
2234 "not responding");
2235 req->r_flags |= R_TPRINTFMSG;
2236 lck_mtx_lock(&nmp->nm_lock);
2237 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
2238 lck_mtx_unlock(&nmp->nm_lock);
2239 /* we're not yet completely mounted and */
2240 /* we can't complete an RPC, so we fail */
2241 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
2242 nfs_softterm(req);
2243 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
2244 wakeup(req);
2245 lck_mtx_unlock(&req->r_mtx);
2246 if (finish_asyncio)
2247 nfs_asyncio_finish(req);
2248 continue;
2249 }
2250 lck_mtx_unlock(&nmp->nm_lock);
2251 }
2252
2253 /*
2254 * Put a reasonable limit on the maximum timeout,
2255 * and reduce that limit when soft mounts get timeouts or are in reconnect.
2256 */
2257 if (!(nmp->nm_flag & NFSMNT_SOFT))
2258 maxtime = NFS_MAXTIMEO;
2259 else if ((req->r_flags & R_SETUP) || ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
2260 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2;
2261 else
2262 maxtime = NFS_MINTIMEO/4;
2263
2264 /*
2265 * Check for request timeout.
2266 */
2267 if (req->r_rtt >= 0) {
2268 req->r_rtt++;
2269 lck_mtx_lock(&nmp->nm_lock);
2270 if (req->r_flags & R_RESENDERR) {
2271 /* with resend errors, retry every few seconds */
2272 timeo = 4*hz;
2273 } else {
2274 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL)
2275 timeo = NFS_MINIDEMTIMEO; // gss context setup
2276 else if (nmp->nm_flag & NFSMNT_DUMBTIMR)
2277 timeo = nmp->nm_timeo;
2278 else
2279 timeo = NFS_RTO(nmp, proct[req->r_procnum]);
2280
2281 /* ensure 62.5 ms floor */
2282 while (16 * timeo < hz)
2283 timeo *= 2;
2284 if (nmp->nm_timeouts > 0)
2285 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
2286 }
2287 /* limit timeout to max */
2288 if (timeo > maxtime)
2289 timeo = maxtime;
2290 if (req->r_rtt <= timeo) {
2291 lck_mtx_unlock(&nmp->nm_lock);
2292 lck_mtx_unlock(&req->r_mtx);
2293 continue;
2294 }
2295 /* The request has timed out */
2296 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
2297 req->r_procnum, proct[req->r_procnum],
2298 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts,
2299 (now.tv_sec - req->r_start)*NFS_HZ, maxtime));
2300 if (nmp->nm_timeouts < 8)
2301 nmp->nm_timeouts++;
2302 /* if it's been a few seconds, try poking the socket */
2303 if ((nmp->nm_sotype == SOCK_STREAM) &&
2304 ((now.tv_sec - req->r_start) >= 3) &&
2305 !(nmp->nm_sockflags & NMSOCK_POKE)) {
2306 nmp->nm_sockflags |= NMSOCK_POKE;
2307 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq);
2308 }
2309 lck_mtx_unlock(&nmp->nm_lock);
2310 }
2311
2312 /* For soft mounts (& SETUPs), check for too many retransmits/timeout. */
2313 if (((nmp->nm_flag & NFSMNT_SOFT) || (req->r_flags & R_SETUP)) &&
2314 ((req->r_rexmit >= req->r_retry) || /* too many */
2315 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */
2316 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
2317 lck_mtx_lock(&nmp->nm_lock);
2318 if (!(nmp->nm_state & NFSSTA_TIMEO)) {
2319 lck_mtx_unlock(&nmp->nm_lock);
2320 /* make sure we note the unresponsive server */
2321 /* (maxtime may be less than tprintf delay) */
2322 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
2323 "not responding");
2324 req->r_lastmsg = now.tv_sec;
2325 req->r_flags |= R_TPRINTFMSG;
2326 } else {
2327 lck_mtx_unlock(&nmp->nm_lock);
2328 }
2329 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
2330 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt,
2331 now.tv_sec - req->r_start));
2332 nfs_softterm(req);
2333 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
2334 wakeup(req);
2335 lck_mtx_unlock(&req->r_mtx);
2336 if (finish_asyncio)
2337 nfs_asyncio_finish(req);
2338 continue;
2339 }
2340
2341 /* for TCP, only resend if explicitly requested */
2342 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) {
2343 if (++req->r_rexmit > NFS_MAXREXMIT)
2344 req->r_rexmit = NFS_MAXREXMIT;
2345 req->r_rtt = 0;
2346 lck_mtx_unlock(&req->r_mtx);
2347 continue;
2348 }
2349
2350 /*
2351 * The request needs to be (re)sent. Kick the requester to resend it.
2352 * (unless it's already marked as needing a resend)
2353 */
2354 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) {
2355 lck_mtx_unlock(&req->r_mtx);
2356 continue;
2357 }
2358 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
2359 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
2360 req->r_flags |= R_MUSTRESEND;
2361 req->r_rtt = -1;
2362 wakeup(req);
2363 if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
2364 nfs_asyncio_resend(req);
2365 lck_mtx_unlock(&req->r_mtx);
2366 }
2367
2368 lck_mtx_unlock(nfs_request_mutex);
2369
2370 /* poke any sockets */
2371 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) {
2372 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
2373 nfs_sock_poke(nmp);
2374 lck_mtx_lock(&nmp->nm_lock);
2375 nmp->nm_sockflags &= ~NMSOCK_POKE;
2376 if (!(nmp->nm_state & NFSSTA_MOUNTED))
2377 wakeup(&nmp->nm_sockflags);
2378 lck_mtx_unlock(&nmp->nm_lock);
2379 }
2380
2381 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY);
2382 }
2383
2384 /*
2385 * check a thread's proc for the "noremotehang" flag.
2386 */
2387 int
2388 nfs_noremotehang(thread_t thd)
2389 {
2390 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
2391 return (p && proc_noremotehang(p));
2392 }
2393
2394 /*
2395 * Test for a termination condition pending on the process.
2396 * This is used to determine if we need to bail on a mount.
2397 * ETIMEDOUT is returned if there has been a soft timeout.
2398 * EINTR is returned if there is a signal pending that is not being ignored
2399 * and the mount is interruptable, or if we are a thread that is in the process
2400 * of cancellation (also SIGKILL posted).
2401 */
2402 int
2403 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked)
2404 {
2405 int error = 0;
2406
2407 if (nmp == NULL)
2408 return (ENXIO);
2409
2410 if (req && (req->r_flags & R_SOFTTERM))
2411 return (ETIMEDOUT); /* request has been terminated. */
2412
2413 /*
2414 * If we're in the progress of a force unmount and there's
2415 * been a timeout, we're dead and fail IO.
2416 */
2417 if (!nmplocked)
2418 lck_mtx_lock(&nmp->nm_lock);
2419 if ((nmp->nm_state & NFSSTA_FORCE) &&
2420 (nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO|NFSSTA_LOCKTIMEO))) {
2421 error = EIO;
2422 } else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) {
2423 /* Someone is unmounting us, go soft and mark it. */
2424 nmp->nm_flag |= NFSMNT_SOFT;
2425 nmp->nm_state |= NFSSTA_FORCE;
2426 }
2427
2428 /*
2429 * If the mount is hung and we've requested not to hang
2430 * on remote filesystems, then bail now.
2431 */
2432 if (!error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd))
2433 error = EIO;
2434
2435 if (!nmplocked)
2436 lck_mtx_unlock(&nmp->nm_lock);
2437 if (error)
2438 return (error);
2439
2440 /* may not have a thread for async I/O */
2441 if (thd == NULL)
2442 return (0);
2443
2444 /* If this thread belongs to kernel task; then abort check is not needed */
2445 if ((current_proc() != kernproc) && current_thread_aborted())
2446 return (EINTR);
2447
2448 /* mask off thread and process blocked signals. */
2449 if ((nmp->nm_flag & NFSMNT_INT) &&
2450 proc_pendingsignals(get_bsdthreadtask_info(thd), NFSINT_SIGMASK))
2451 return (EINTR);
2452 return (0);
2453 }
2454
2455 /*
2456 * Lock a socket against others.
2457 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2458 * and also to avoid race conditions between the processes with nfs requests
2459 * in progress when a reconnect is necessary.
2460 */
2461 int
2462 nfs_sndlock(struct nfsreq *req)
2463 {
2464 struct nfsmount *nmp = req->r_nmp;
2465 int *statep;
2466 int error = 0, slpflag = 0;
2467 struct timespec ts = { 0, 0 };
2468
2469 if (nmp == NULL)
2470 return (ENXIO);
2471
2472 lck_mtx_lock(&nmp->nm_lock);
2473 statep = &nmp->nm_state;
2474
2475 if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
2476 slpflag = PCATCH;
2477 while (*statep & NFSSTA_SNDLOCK) {
2478 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1)))
2479 break;
2480 *statep |= NFSSTA_WANTSND;
2481 if (nfs_noremotehang(req->r_thread))
2482 ts.tv_sec = 1;
2483 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts);
2484 if (slpflag == PCATCH) {
2485 slpflag = 0;
2486 ts.tv_sec = 2;
2487 }
2488 }
2489 if (!error)
2490 *statep |= NFSSTA_SNDLOCK;
2491 lck_mtx_unlock(&nmp->nm_lock);
2492 return (error);
2493 }
2494
2495 /*
2496 * Unlock the stream socket for others.
2497 */
2498 void
2499 nfs_sndunlock(struct nfsreq *req)
2500 {
2501 struct nfsmount *nmp = req->r_nmp;
2502 int *statep, wake = 0;
2503
2504 if (nmp == NULL)
2505 return;
2506 lck_mtx_lock(&nmp->nm_lock);
2507 statep = &nmp->nm_state;
2508 if ((*statep & NFSSTA_SNDLOCK) == 0)
2509 panic("nfs sndunlock");
2510 *statep &= ~NFSSTA_SNDLOCK;
2511 if (*statep & NFSSTA_WANTSND) {
2512 *statep &= ~NFSSTA_WANTSND;
2513 wake = 1;
2514 }
2515 lck_mtx_unlock(&nmp->nm_lock);
2516 if (wake)
2517 wakeup(statep);
2518 }
2519
2520 #endif /* NFSCLIENT */
2521
2522 #if NFSSERVER
2523
2524 /*
2525 * Generate the rpc reply header
2526 * siz arg. is used to decide if adding a cluster is worthwhile
2527 */
2528 int
2529 nfsrv_rephead(
2530 struct nfsrv_descript *nd,
2531 __unused struct nfsrv_sock *slp,
2532 struct nfsm_chain *nmrepp,
2533 size_t siz)
2534 {
2535 mbuf_t mrep;
2536 u_long *tl;
2537 struct nfsm_chain nmrep;
2538 int err, error;
2539
2540 err = nd->nd_repstat;
2541 if (err && (nd->nd_vers == NFS_VER2))
2542 siz = 0;
2543
2544 /*
2545 * If this is a big reply, use a cluster else
2546 * try and leave leading space for the lower level headers.
2547 */
2548 siz += RPC_REPLYSIZ;
2549 if (siz >= nfs_mbuf_minclsize) {
2550 error = mbuf_getpacket(MBUF_WAITOK, &mrep);
2551 } else {
2552 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
2553 }
2554 if (error) {
2555 /* unable to allocate packet */
2556 /* XXX should we keep statistics for these errors? */
2557 return (error);
2558 }
2559 if (siz < nfs_mbuf_minclsize) {
2560 /* leave space for lower level headers */
2561 tl = mbuf_data(mrep);
2562 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */
2563 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
2564 }
2565 nfsm_chain_init(&nmrep, mrep);
2566 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
2567 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
2568 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
2569 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
2570 if (err & NFSERR_AUTHERR) {
2571 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
2572 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
2573 } else {
2574 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
2575 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2576 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2577 }
2578 } else {
2579 /* reply status */
2580 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
2581 if (nd->nd_gss_context != NULL) {
2582 /* RPCSEC_GSS verifier */
2583 error = nfs_gss_svc_verf_put(nd, &nmrep);
2584 if (error) {
2585 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
2586 goto done;
2587 }
2588 } else {
2589 /* RPCAUTH_NULL verifier */
2590 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
2591 nfsm_chain_add_32(error, &nmrep, 0);
2592 }
2593 /* accepted status */
2594 switch (err) {
2595 case EPROGUNAVAIL:
2596 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
2597 break;
2598 case EPROGMISMATCH:
2599 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
2600 /* XXX hard coded versions? */
2601 nfsm_chain_add_32(error, &nmrep, NFS_VER2);
2602 nfsm_chain_add_32(error, &nmrep, NFS_VER3);
2603 break;
2604 case EPROCUNAVAIL:
2605 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
2606 break;
2607 case EBADRPC:
2608 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
2609 break;
2610 default:
2611 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
2612 if (nd->nd_gss_context != NULL)
2613 error = nfs_gss_svc_prepare_reply(nd, &nmrep);
2614 if (err != NFSERR_RETVOID)
2615 nfsm_chain_add_32(error, &nmrep,
2616 (err ? nfsrv_errmap(nd, err) : 0));
2617 break;
2618 }
2619 }
2620
2621 done:
2622 nfsm_chain_build_done(error, &nmrep);
2623 if (error) {
2624 /* error composing reply header */
2625 /* XXX should we keep statistics for these errors? */
2626 mbuf_freem(mrep);
2627 return (error);
2628 }
2629
2630 *nmrepp = nmrep;
2631 if ((err != 0) && (err != NFSERR_RETVOID))
2632 OSAddAtomic(1, (SInt32*)&nfsstats.srvrpc_errs);
2633 return (0);
2634 }
2635
2636 /*
2637 * The nfs server send routine.
2638 *
2639 * - return EINTR or ERESTART if interrupted by a signal
2640 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
2641 * - do any cleanup required by recoverable socket errors (???)
2642 */
2643 int
2644 nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
2645 {
2646 int error;
2647 socket_t so = slp->ns_so;
2648 struct sockaddr *sendnam;
2649 struct msghdr msg;
2650
2651 bzero(&msg, sizeof(msg));
2652 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
2653 if ((sendnam = mbuf_data(nam))) {
2654 msg.msg_name = (caddr_t)sendnam;
2655 msg.msg_namelen = sendnam->sa_len;
2656 }
2657 }
2658 error = sock_sendmbuf(so, &msg, top, 0, NULL);
2659 if (!error)
2660 return (0);
2661 log(LOG_INFO, "nfsd send error %d\n", error);
2662
2663 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM))
2664 error = EPIPE; /* zap TCP sockets if they time out on send */
2665
2666 /* Handle any recoverable (soft) socket errors here. (???) */
2667 if (error != EINTR && error != ERESTART && error != EIO &&
2668 error != EWOULDBLOCK && error != EPIPE)
2669 error = 0;
2670
2671 return (error);
2672 }
2673
2674 /*
2675 * Socket upcall routine for the nfsd sockets.
2676 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
2677 * Essentially do as much as possible non-blocking, else punt and it will
2678 * be called with MBUF_WAITOK from an nfsd.
2679 */
2680 void
2681 nfsrv_rcv(socket_t so, caddr_t arg, int waitflag)
2682 {
2683 struct nfsrv_sock *slp = (struct nfsrv_sock *)arg;
2684
2685 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID))
2686 return;
2687
2688 lck_rw_lock_exclusive(&slp->ns_rwlock);
2689 nfsrv_rcv_locked(so, slp, waitflag);
2690 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
2691 }
2692 void
2693 nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
2694 {
2695 mbuf_t m, mp, mhck, m2;
2696 int ns_flag=0, error;
2697 struct msghdr msg;
2698 size_t bytes_read;
2699
2700 if ((slp->ns_flag & SLP_VALID) == 0) {
2701 if (waitflag == MBUF_DONTWAIT)
2702 lck_rw_done(&slp->ns_rwlock);
2703 return;
2704 }
2705
2706 #ifdef notdef
2707 /*
2708 * Define this to test for nfsds handling this under heavy load.
2709 */
2710 if (waitflag == MBUF_DONTWAIT) {
2711 ns_flag = SLP_NEEDQ;
2712 goto dorecs;
2713 }
2714 #endif
2715 if (slp->ns_sotype == SOCK_STREAM) {
2716 /*
2717 * If there are already records on the queue, defer soreceive()
2718 * to an nfsd so that there is feedback to the TCP layer that
2719 * the nfs servers are heavily loaded.
2720 */
2721 if (slp->ns_rec && waitflag == MBUF_DONTWAIT) {
2722 ns_flag = SLP_NEEDQ;
2723 goto dorecs;
2724 }
2725
2726 /*
2727 * Do soreceive().
2728 */
2729 bytes_read = 1000000000;
2730 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
2731 if (error || mp == NULL) {
2732 if (error == EWOULDBLOCK)
2733 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
2734 else
2735 ns_flag = SLP_DISCONN;
2736 goto dorecs;
2737 }
2738 m = mp;
2739 if (slp->ns_rawend) {
2740 if ((error = mbuf_setnext(slp->ns_rawend, m)))
2741 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
2742 slp->ns_cc += bytes_read;
2743 } else {
2744 slp->ns_raw = m;
2745 slp->ns_cc = bytes_read;
2746 }
2747 while ((m2 = mbuf_next(m)))
2748 m = m2;
2749 slp->ns_rawend = m;
2750
2751 /*
2752 * Now try and parse record(s) out of the raw stream data.
2753 */
2754 error = nfsrv_getstream(slp, waitflag);
2755 if (error) {
2756 if (error == EPERM)
2757 ns_flag = SLP_DISCONN;
2758 else
2759 ns_flag = SLP_NEEDQ;
2760 }
2761 } else {
2762 struct sockaddr_storage nam;
2763
2764 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
2765 /* already have max # RPC records queued on this socket */
2766 ns_flag = SLP_NEEDQ;
2767 goto dorecs;
2768 }
2769
2770 bzero(&msg, sizeof(msg));
2771 msg.msg_name = (caddr_t)&nam;
2772 msg.msg_namelen = sizeof(nam);
2773
2774 do {
2775 bytes_read = 1000000000;
2776 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
2777 if (mp) {
2778 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
2779 mbuf_setlen(mhck, nam.ss_len);
2780 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
2781 m = mhck;
2782 if (mbuf_setnext(m, mp)) {
2783 /* trouble... just drop it */
2784 printf("nfsrv_rcv: mbuf_setnext failed\n");
2785 mbuf_free(mhck);
2786 m = mp;
2787 }
2788 } else {
2789 m = mp;
2790 }
2791 if (slp->ns_recend)
2792 mbuf_setnextpkt(slp->ns_recend, m);
2793 else {
2794 slp->ns_rec = m;
2795 slp->ns_flag |= SLP_DOREC;
2796 }
2797 slp->ns_recend = m;
2798 mbuf_setnextpkt(m, NULL);
2799 slp->ns_reccnt++;
2800 }
2801 } while (mp);
2802 }
2803
2804 /*
2805 * Now try and process the request records, non-blocking.
2806 */
2807 dorecs:
2808 if (ns_flag)
2809 slp->ns_flag |= ns_flag;
2810 if (waitflag == MBUF_DONTWAIT) {
2811 int wake = (slp->ns_flag & SLP_WORKTODO);
2812 lck_rw_done(&slp->ns_rwlock);
2813 if (wake && nfsd_thread_count) {
2814 lck_mtx_lock(nfsd_mutex);
2815 nfsrv_wakenfsd(slp);
2816 lck_mtx_unlock(nfsd_mutex);
2817 }
2818 }
2819 }
2820
2821 /*
2822 * Try and extract an RPC request from the mbuf data list received on a
2823 * stream socket. The "waitflag" argument indicates whether or not it
2824 * can sleep.
2825 */
2826 static int
2827 nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
2828 {
2829 mbuf_t m;
2830 char *cp1, *cp2, *mdata;
2831 int len, mlen, error;
2832 mbuf_t om, m2, recm;
2833 u_long recmark;
2834
2835 if (slp->ns_flag & SLP_GETSTREAM)
2836 panic("nfs getstream");
2837 slp->ns_flag |= SLP_GETSTREAM;
2838 for (;;) {
2839 if (slp->ns_reclen == 0) {
2840 if (slp->ns_cc < NFSX_UNSIGNED) {
2841 slp->ns_flag &= ~SLP_GETSTREAM;
2842 return (0);
2843 }
2844 m = slp->ns_raw;
2845 mdata = mbuf_data(m);
2846 mlen = mbuf_len(m);
2847 if (mlen >= NFSX_UNSIGNED) {
2848 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
2849 mdata += NFSX_UNSIGNED;
2850 mlen -= NFSX_UNSIGNED;
2851 mbuf_setdata(m, mdata, mlen);
2852 } else {
2853 cp1 = (caddr_t)&recmark;
2854 cp2 = mdata;
2855 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2856 while (mlen == 0) {
2857 m = mbuf_next(m);
2858 cp2 = mbuf_data(m);
2859 mlen = mbuf_len(m);
2860 }
2861 *cp1++ = *cp2++;
2862 mlen--;
2863 mbuf_setdata(m, cp2, mlen);
2864 }
2865 }
2866 slp->ns_cc -= NFSX_UNSIGNED;
2867 recmark = ntohl(recmark);
2868 slp->ns_reclen = recmark & ~0x80000000;
2869 if (recmark & 0x80000000)
2870 slp->ns_flag |= SLP_LASTFRAG;
2871 else
2872 slp->ns_flag &= ~SLP_LASTFRAG;
2873 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
2874 slp->ns_flag &= ~SLP_GETSTREAM;
2875 return (EPERM);
2876 }
2877 }
2878
2879 /*
2880 * Now get the record part.
2881 *
2882 * Note that slp->ns_reclen may be 0. Linux sometimes
2883 * generates 0-length RPCs
2884 */
2885 recm = NULL;
2886 if (slp->ns_cc == slp->ns_reclen) {
2887 recm = slp->ns_raw;
2888 slp->ns_raw = slp->ns_rawend = NULL;
2889 slp->ns_cc = slp->ns_reclen = 0;
2890 } else if (slp->ns_cc > slp->ns_reclen) {
2891 len = 0;
2892 m = slp->ns_raw;
2893 mlen = mbuf_len(m);
2894 mdata = mbuf_data(m);
2895 om = NULL;
2896 while (len < slp->ns_reclen) {
2897 if ((len + mlen) > slp->ns_reclen) {
2898 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
2899 slp->ns_flag &= ~SLP_GETSTREAM;
2900 return (EWOULDBLOCK);
2901 }
2902 if (om) {
2903 if (mbuf_setnext(om, m2)) {
2904 /* trouble... just drop it */
2905 printf("nfsrv_getstream: mbuf_setnext failed\n");
2906 mbuf_freem(m2);
2907 slp->ns_flag &= ~SLP_GETSTREAM;
2908 return (EWOULDBLOCK);
2909 }
2910 recm = slp->ns_raw;
2911 } else {
2912 recm = m2;
2913 }
2914 mdata += slp->ns_reclen - len;
2915 mlen -= slp->ns_reclen - len;
2916 mbuf_setdata(m, mdata, mlen);
2917 len = slp->ns_reclen;
2918 } else if ((len + mlen) == slp->ns_reclen) {
2919 om = m;
2920 len += mlen;
2921 m = mbuf_next(m);
2922 recm = slp->ns_raw;
2923 if (mbuf_setnext(om, NULL)) {
2924 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
2925 slp->ns_flag &= ~SLP_GETSTREAM;
2926 return (EWOULDBLOCK);
2927 }
2928 mlen = mbuf_len(m);
2929 mdata = mbuf_data(m);
2930 } else {
2931 om = m;
2932 len += mlen;
2933 m = mbuf_next(m);
2934 mlen = mbuf_len(m);
2935 mdata = mbuf_data(m);
2936 }
2937 }
2938 slp->ns_raw = m;
2939 slp->ns_cc -= len;
2940 slp->ns_reclen = 0;
2941 } else {
2942 slp->ns_flag &= ~SLP_GETSTREAM;
2943 return (0);
2944 }
2945
2946 /*
2947 * Accumulate the fragments into a record.
2948 */
2949 if (slp->ns_frag == NULL) {
2950 slp->ns_frag = recm;
2951 } else {
2952 m = slp->ns_frag;
2953 while ((m2 = mbuf_next(m)))
2954 m = m2;
2955 if ((error = mbuf_setnext(m, recm)))
2956 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
2957 }
2958 if (slp->ns_flag & SLP_LASTFRAG) {
2959 if (slp->ns_recend)
2960 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
2961 else {
2962 slp->ns_rec = slp->ns_frag;
2963 slp->ns_flag |= SLP_DOREC;
2964 }
2965 slp->ns_recend = slp->ns_frag;
2966 slp->ns_frag = NULL;
2967 }
2968 }
2969 }
2970
2971 /*
2972 * Parse an RPC header.
2973 */
2974 int
2975 nfsrv_dorec(
2976 struct nfsrv_sock *slp,
2977 struct nfsd *nfsd,
2978 struct nfsrv_descript **ndp)
2979 {
2980 mbuf_t m;
2981 mbuf_t nam;
2982 struct nfsrv_descript *nd;
2983 int error = 0;
2984
2985 *ndp = NULL;
2986 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL))
2987 return (ENOBUFS);
2988 MALLOC_ZONE(nd, struct nfsrv_descript *,
2989 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2990 if (!nd)
2991 return (ENOMEM);
2992 m = slp->ns_rec;
2993 slp->ns_rec = mbuf_nextpkt(m);
2994 if (slp->ns_rec)
2995 mbuf_setnextpkt(m, NULL);
2996 else {
2997 slp->ns_flag &= ~SLP_DOREC;
2998 slp->ns_recend = NULL;
2999 }
3000 slp->ns_reccnt--;
3001 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
3002 nam = m;
3003 m = mbuf_next(m);
3004 if ((error = mbuf_setnext(nam, NULL)))
3005 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
3006 } else
3007 nam = NULL;
3008 nd->nd_nam2 = nam;
3009 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
3010 if (!error)
3011 error = nfsrv_getreq(nd);
3012 if (error) {
3013 if (nam)
3014 mbuf_freem(nam);
3015 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
3016 return (error);
3017 }
3018 nd->nd_mrep = NULL;
3019 *ndp = nd;
3020 nfsd->nfsd_nd = nd;
3021 return (0);
3022 }
3023
3024 /*
3025 * Parse an RPC request
3026 * - verify it
3027 * - fill in the cred struct.
3028 */
3029 static int
3030 nfsrv_getreq(struct nfsrv_descript *nd)
3031 {
3032 struct nfsm_chain *nmreq;
3033 int len, i;
3034 u_long nfsvers, auth_type;
3035 int error = 0;
3036 uid_t user_id;
3037 gid_t group_id;
3038 int ngroups;
3039 struct ucred temp_cred;
3040 uint32_t val;
3041
3042 nd->nd_cr = NULL;
3043 nd->nd_gss_context = NULL;
3044 nd->nd_gss_seqnum = 0;
3045 nd->nd_gss_mb = NULL;
3046
3047 user_id = group_id = -2;
3048 val = auth_type = len = 0;
3049
3050 nmreq = &nd->nd_nmreq;
3051 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
3052 nfsm_chain_get_32(error, nmreq, val); // RPC Call
3053 if (!error && (val != RPC_CALL))
3054 error = EBADRPC;
3055 nfsmout_if(error);
3056 nd->nd_repstat = 0;
3057 nfsm_chain_get_32(error, nmreq, val); // RPC Version
3058 nfsmout_if(error);
3059 if (val != RPC_VER2) {
3060 nd->nd_repstat = ERPCMISMATCH;
3061 nd->nd_procnum = NFSPROC_NOOP;
3062 return (0);
3063 }
3064 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number
3065 nfsmout_if(error);
3066 if (val != NFS_PROG) {
3067 nd->nd_repstat = EPROGUNAVAIL;
3068 nd->nd_procnum = NFSPROC_NOOP;
3069 return (0);
3070 }
3071 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
3072 nfsmout_if(error);
3073 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
3074 nd->nd_repstat = EPROGMISMATCH;
3075 nd->nd_procnum = NFSPROC_NOOP;
3076 return (0);
3077 }
3078 nd->nd_vers = nfsvers;
3079 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
3080 nfsmout_if(error);
3081 if ((nd->nd_procnum >= NFS_NPROCS) ||
3082 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
3083 nd->nd_repstat = EPROCUNAVAIL;
3084 nd->nd_procnum = NFSPROC_NOOP;
3085 return (0);
3086 }
3087 if (nfsvers != NFS_VER3)
3088 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
3089 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor
3090 nfsm_chain_get_32(error, nmreq, len); // Auth Length
3091 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ))
3092 error = EBADRPC;
3093 nfsmout_if(error);
3094
3095 /* Handle authentication */
3096 if (auth_type == RPCAUTH_UNIX) {
3097 if (nd->nd_procnum == NFSPROC_NULL)
3098 return (0);
3099 nd->nd_sec = RPCAUTH_UNIX;
3100 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp
3101 nfsm_chain_get_32(error, nmreq, len); // hostname length
3102 if (len < 0 || len > NFS_MAXNAMLEN)
3103 error = EBADRPC;
3104 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname
3105 nfsmout_if(error);
3106
3107 /* create a temporary credential using the bits from the wire */
3108 bzero(&temp_cred, sizeof(temp_cred));
3109 nfsm_chain_get_32(error, nmreq, user_id);
3110 nfsm_chain_get_32(error, nmreq, group_id);
3111 temp_cred.cr_groups[0] = group_id;
3112 nfsm_chain_get_32(error, nmreq, len); // extra GID count
3113 if ((len < 0) || (len > RPCAUTH_UNIXGIDS))
3114 error = EBADRPC;
3115 nfsmout_if(error);
3116 for (i = 1; i <= len; i++)
3117 if (i < NGROUPS)
3118 nfsm_chain_get_32(error, nmreq, temp_cred.cr_groups[i]);
3119 else
3120 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
3121 nfsmout_if(error);
3122 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
3123 if (ngroups > 1)
3124 nfsrv_group_sort(&temp_cred.cr_groups[0], ngroups);
3125 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
3126 nfsm_chain_get_32(error, nmreq, len); // verifier length
3127 if (len < 0 || len > RPCAUTH_MAXSIZ)
3128 error = EBADRPC;
3129 if (len > 0)
3130 nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
3131
3132 /* request creation of a real credential */
3133 temp_cred.cr_uid = user_id;
3134 temp_cred.cr_ngroups = ngroups;
3135 nd->nd_cr = kauth_cred_create(&temp_cred);
3136 if (nd->nd_cr == NULL) {
3137 nd->nd_repstat = ENOMEM;
3138 nd->nd_procnum = NFSPROC_NOOP;
3139 return (0);
3140 }
3141 } else if (auth_type == RPCSEC_GSS) {
3142 error = nfs_gss_svc_cred_get(nd, nmreq);
3143 if (error) {
3144 if (error == EINVAL)
3145 goto nfsmout; // drop the request
3146 nd->nd_repstat = error;
3147 nd->nd_procnum = NFSPROC_NOOP;
3148 return (0);
3149 }
3150 } else {
3151 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE
3152 return (0);
3153 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
3154 nd->nd_procnum = NFSPROC_NOOP;
3155 return (0);
3156 }
3157 return (0);
3158 nfsmout:
3159 if (IS_VALID_CRED(nd->nd_cr))
3160 kauth_cred_unref(&nd->nd_cr);
3161 nfsm_chain_cleanup(nmreq);
3162 return (error);
3163 }
3164
3165 /*
3166 * Search for a sleeping nfsd and wake it up.
3167 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
3168 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
3169 * Note: Must be called with nfsd_mutex held.
3170 */
3171 void
3172 nfsrv_wakenfsd(struct nfsrv_sock *slp)
3173 {
3174 struct nfsd *nd;
3175
3176 if ((slp->ns_flag & SLP_VALID) == 0)
3177 return;
3178
3179 lck_rw_lock_exclusive(&slp->ns_rwlock);
3180 /* if there's work to do on this socket, make sure it's queued up */
3181 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
3182 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
3183 slp->ns_flag |= SLP_WAITQ;
3184 }
3185 lck_rw_done(&slp->ns_rwlock);
3186
3187 /* wake up a waiting nfsd, if possible */
3188 nd = TAILQ_FIRST(&nfsd_queue);
3189 if (!nd)
3190 return;
3191
3192 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
3193 nd->nfsd_flag &= ~NFSD_WAITING;
3194 wakeup(nd);
3195 }
3196
3197 #endif /* NFSSERVER */
3198
3199 static int
3200 nfs_msg(thread_t thd,
3201 const char *server,
3202 const char *msg,
3203 int error)
3204 {
3205 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
3206 tpr_t tpr;
3207
3208 if (p)
3209 tpr = tprintf_open(p);
3210 else
3211 tpr = NULL;
3212 if (error)
3213 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error);
3214 else
3215 tprintf(tpr, "nfs server %s: %s\n", server, msg);
3216 tprintf_close(tpr);
3217 return (0);
3218 }
3219
3220 void
3221 nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg)
3222 {
3223 int ostate, do_vfs_signal;
3224
3225 if (nmp == NULL)
3226 return;
3227
3228 lck_mtx_lock(&nmp->nm_lock);
3229 ostate = nmp->nm_state;
3230 if ((flags & NFSSTA_TIMEO) && !(ostate & NFSSTA_TIMEO))
3231 nmp->nm_state |= NFSSTA_TIMEO;
3232 if ((flags & NFSSTA_LOCKTIMEO) && !(ostate & NFSSTA_LOCKTIMEO))
3233 nmp->nm_state |= NFSSTA_LOCKTIMEO;
3234 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(ostate & NFSSTA_JUKEBOXTIMEO))
3235 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO;
3236 lck_mtx_unlock(&nmp->nm_lock);
3237
3238 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
3239 if ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE))
3240 do_vfs_signal = 0;
3241 else
3242 do_vfs_signal = !(ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO));
3243 if (do_vfs_signal)
3244 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 0);
3245
3246 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
3247 }
3248
3249 void
3250 nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
3251 {
3252 int ostate, state, do_vfs_signal;
3253
3254 if (nmp == NULL)
3255 return;
3256
3257 if (msg)
3258 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
3259
3260 lck_mtx_lock(&nmp->nm_lock);
3261 ostate = nmp->nm_state;
3262 if ((flags & NFSSTA_TIMEO) && (ostate & NFSSTA_TIMEO))
3263 nmp->nm_state &= ~NFSSTA_TIMEO;
3264 if ((flags & NFSSTA_LOCKTIMEO) && (ostate & NFSSTA_LOCKTIMEO))
3265 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
3266 if ((flags & NFSSTA_JUKEBOXTIMEO) && (ostate & NFSSTA_JUKEBOXTIMEO))
3267 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO;
3268 state = nmp->nm_state;
3269 lck_mtx_unlock(&nmp->nm_lock);
3270
3271 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
3272 if ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE))
3273 do_vfs_signal = 0;
3274 else
3275 do_vfs_signal = (ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)) &&
3276 !(state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO));
3277 if (do_vfs_signal)
3278 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
3279 }
3280