]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
xnu-1228.3.13.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66 */
67
68 /*
69 * Socket operations for use by nfs
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/proc.h>
75 #include <sys/kauth.h>
76 #include <sys/mount_internal.h>
77 #include <sys/kernel.h>
78 #include <sys/kpi_mbuf.h>
79 #include <sys/malloc.h>
80 #include <sys/vnode.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/socket.h>
84 #include <sys/syslog.h>
85 #include <sys/tprintf.h>
86 #include <sys/uio_internal.h>
87 #include <libkern/OSAtomic.h>
88
89 #include <sys/time.h>
90 #include <kern/clock.h>
91 #include <kern/task.h>
92 #include <kern/thread.h>
93 #include <kern/thread_call.h>
94 #include <sys/user.h>
95
96 #include <netinet/in.h>
97 #include <netinet/tcp.h>
98
99 #include <nfs/rpcv2.h>
100 #include <nfs/nfsproto.h>
101 #include <nfs/nfs.h>
102 #include <nfs/xdr_subs.h>
103 #include <nfs/nfsm_subs.h>
104 #include <nfs/nfs_gss.h>
105 #include <nfs/nfsmount.h>
106 #include <nfs/nfsnode.h>
107
108 /* XXX */
109 boolean_t current_thread_aborted(void);
110 kern_return_t thread_terminate(thread_t);
111
112
113 #if NFSSERVER
114 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
115
116 static int nfsrv_getstream(struct nfsrv_sock *,int);
117 static int nfsrv_getreq(struct nfsrv_descript *);
118 extern int nfsv3_procid[NFS_NPROCS];
119 #endif /* NFSSERVER */
120
121 #if NFSCLIENT
122
123 static int nfs_connect_setup(struct nfsmount *);
124 static void nfs_reqdequeue(struct nfsreq *);
125 static void nfs_udp_rcv(socket_t, void*, int);
126 static void nfs_tcp_rcv(socket_t, void*, int);
127 static void nfs_request_match_reply(struct nfsmount *, mbuf_t);
128 static void nfs_softterm(struct nfsreq *);
129
130 #ifdef NFS_SOCKET_DEBUGGING
131 #define NFS_SOCK_DBG(X) printf X
132 #else
133 #define NFS_SOCK_DBG(X)
134 #endif
135
136 /*
137 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
138 * Use the mean and mean deviation of rtt for the appropriate type of rpc
139 * for the frequent rpcs and a default for the others.
140 * The justification for doing "other" this way is that these rpcs
141 * happen so infrequently that timer est. would probably be stale.
142 * Also, since many of these rpcs are
143 * non-idempotent, a conservative timeout is desired.
144 * getattr, lookup - A+2D
145 * read, write - A+4D
146 * other - nm_timeo
147 */
148 #define NFS_RTO(n, t) \
149 ((t) == 0 ? (n)->nm_timeo : \
150 ((t) < 3 ? \
151 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
152 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
153 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
154 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
155
156 /*
157 * Defines which timer to use for the procnum.
158 * 0 - default
159 * 1 - getattr
160 * 2 - lookup
161 * 3 - read
162 * 4 - write
163 */
164 static int proct[NFS_NPROCS] = {
165 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
166 };
167
168 /*
169 * There is a congestion window for outstanding rpcs maintained per mount
170 * point. The cwnd size is adjusted in roughly the way that:
171 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
172 * SIGCOMM '88". ACM, August 1988.
173 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
174 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
175 * of rpcs is in progress.
176 * (The sent count and cwnd are scaled for integer arith.)
177 * Variants of "slow start" were tried and were found to be too much of a
178 * performance hit (ave. rtt 3 times larger),
179 * I suspect due to the large rtt that nfs rpcs have.
180 */
181 #define NFS_CWNDSCALE 256
182 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
183 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
184
185 /*
186 * Initialize socket state and perform setup for a new NFS connection.
187 */
188 int
189 nfs_connect(struct nfsmount *nmp)
190 {
191 socket_t so;
192 int error, on = 1, proto;
193 sock_upcall upcall;
194 struct sockaddr *saddr;
195 struct sockaddr_in sin;
196 struct timeval timeo;
197 u_short tport;
198
199 lck_mtx_lock(&nmp->nm_lock);
200 nmp->nm_sockflags |= NMSOCK_CONNECTING;
201 saddr = mbuf_data(nmp->nm_nam);
202 upcall = (nmp->nm_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
203 lck_mtx_unlock(&nmp->nm_lock);
204 error = sock_socket(saddr->sa_family, nmp->nm_sotype,
205 nmp->nm_soproto, upcall, nmp, &nmp->nm_so);
206 if (error)
207 goto bad;
208 lck_mtx_lock(&nmp->nm_lock);
209 so = nmp->nm_so;
210
211 /*
212 * Some servers require that the client port be a reserved port number.
213 */
214 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
215 lck_mtx_unlock(&nmp->nm_lock);
216 sin.sin_len = sizeof (struct sockaddr_in);
217 sin.sin_family = AF_INET;
218 sin.sin_addr.s_addr = INADDR_ANY;
219 tport = IPPORT_RESERVED - 1;
220 sin.sin_port = htons(tport);
221 while (((error = sock_bind(so, (struct sockaddr *) &sin)) == EADDRINUSE) &&
222 (--tport > IPPORT_RESERVED / 2))
223 sin.sin_port = htons(tport);
224 if (error)
225 goto bad;
226 lck_mtx_lock(&nmp->nm_lock);
227 }
228
229 /*
230 * Protocols that do not require connections may be optionally left
231 * unconnected for servers that reply from a different address/port.
232 */
233 if (nmp->nm_flag & NFSMNT_NOCONN) {
234 if (nmp->nm_sotype == SOCK_STREAM) {
235 error = ENOTCONN;
236 lck_mtx_unlock(&nmp->nm_lock);
237 goto bad;
238 }
239 } else {
240 int tocnt = 0, optlen = sizeof(error);
241 struct timespec ts = { 2, 0 };
242
243 lck_mtx_unlock(&nmp->nm_lock);
244 error = sock_connect(so, mbuf_data(nmp->nm_nam), MSG_DONTWAIT);
245 if (error && (error != EINPROGRESS))
246 goto bad;
247 lck_mtx_lock(&nmp->nm_lock);
248 while (!sock_isconnected(so)) {
249 if (tocnt++ == 15) /* log a warning if connect is taking a while */
250 log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n",
251 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
252 /* check for error on socket */
253 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &error, &optlen);
254 if (error) {
255 log(LOG_INFO, "nfs_connect: socket error %d for %s\n",
256 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
257 break;
258 }
259 if (tocnt > 60) {
260 /* abort if this is taking too long */
261 error = ENOTCONN;
262 break;
263 }
264 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
265 break;
266 msleep(&nmp->nm_so, &nmp->nm_lock, PSOCK, "nfs_socket_connect", &ts);
267 }
268 if (tocnt > 15)
269 log(LOG_INFO, "nfs_connect: socket connect %s for %s\n",
270 error ? "aborted" : "completed",
271 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
272 if (error) {
273 lck_mtx_unlock(&nmp->nm_lock);
274 goto bad;
275 }
276 }
277
278 /*
279 * Set socket send/receive timeouts
280 * - Receive timeout shouldn't matter because all receives are performed
281 * in the socket upcall non-blocking.
282 * - Send timeout should allow us to react to a blocked socket.
283 * Soft mounts will want to abort sooner.
284 */
285 timeo.tv_usec = 0;
286 timeo.tv_sec = (nmp->nm_flag & NFSMNT_SOFT) ? 10 : 60;
287 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
288 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
289 if (error) {
290 log(LOG_INFO, "nfs_connect: socket timeout setting errors for %s\n",
291 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
292 error = 0;
293 }
294
295 if (nmp->nm_sotype == SOCK_STREAM) {
296 /* Assume that SOCK_STREAM always requires a connection */
297 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
298 /* set nodelay for TCP */
299 sock_gettype(so, NULL, NULL, &proto);
300 if (proto == IPPROTO_TCP)
301 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
302 }
303
304 if (nmp->nm_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
305 int reserve = NFS_UDPSOCKBUF;
306 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
307 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
308 if (error) {
309 log(LOG_INFO, "nfs_connect: socket buffer setting errors for %s\n",
310 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
311 error = 0;
312 }
313 }
314
315 /* set SO_NOADDRERR to detect network changes ASAP */
316 error = sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
317 if (error) {
318 lck_mtx_unlock(&nmp->nm_lock);
319 goto bad;
320 }
321 /* just playin' it safe */
322 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
323
324 if (!(nmp->nm_flag & NFSMNT_INT))
325 sock_nointerrupt(so, 1);
326
327 /* Initialize socket state variables */
328 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
329 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
330 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
331 nmp->nm_sdrtt[3] = 0;
332 if (nmp->nm_sotype == SOCK_DGRAM) {
333 /* XXX do we really want to reset this on each reconnect? */
334 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
335 nmp->nm_sent = 0;
336 } else if (nmp->nm_sotype == SOCK_STREAM) {
337 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
338 nmp->nm_fragleft = nmp->nm_reclen = 0;
339 nmp->nm_timeouts = 0;
340 }
341 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
342 nmp->nm_sockflags |= NMSOCK_SETUP;
343 FSDBG(529, nmp, nmp->nm_state, nmp->nm_flag, nmp->nm_cwnd);
344 lck_mtx_unlock(&nmp->nm_lock);
345 error = nfs_connect_setup(nmp);
346 bad:
347 lck_mtx_lock(&nmp->nm_lock);
348 nmp->nm_sockflags &= ~(NMSOCK_CONNECTING|NMSOCK_SETUP);
349 if (!error) {
350 nmp->nm_sockflags |= NMSOCK_READY;
351 wakeup(&nmp->nm_sockflags);
352 }
353 lck_mtx_unlock(&nmp->nm_lock);
354 if (error)
355 nfs_disconnect(nmp);
356 return (error);
357 }
358
359 /* setup & confirm socket connection is functional */
360 static int
361 nfs_connect_setup(struct nfsmount *nmp)
362 {
363 struct nfsm_chain nmreq, nmrep;
364 int error = 0, status;
365 u_int64_t xid;
366
367 if (nmp->nm_vers >= NFS_VER4) {
368 error = nfs4_setclientid(nmp);
369 } else {
370 /* verify connection's OK by sending a NULL request */
371 nfsm_chain_null(&nmreq);
372 nfsm_chain_null(&nmrep);
373 nfsm_chain_build_alloc_init(error, &nmreq, 0);
374 nfsm_chain_build_done(error, &nmreq);
375 nfsmout_if(error);
376 error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC_NULL,
377 current_thread(), NULL, R_SETUP, &nmrep, &xid, &status);
378 if (!error)
379 error = status;
380 nfsmout:
381 nfsm_chain_cleanup(&nmreq);
382 nfsm_chain_cleanup(&nmrep);
383 }
384 return (error);
385 }
386
387 /*
388 * NFS socket reconnect routine:
389 * Called when a connection is broken.
390 * - disconnect the old socket
391 * - nfs_connect() again
392 * - set R_MUSTRESEND for all outstanding requests on mount point
393 * If this fails the mount point is DEAD!
394 */
395 static int
396 nfs_reconnect(struct nfsmount *nmp)
397 {
398 struct nfsreq *rq;
399 struct timeval now;
400 thread_t thd = current_thread();
401 int error, lastmsg, wentdown = 0;
402
403 microuptime(&now);
404 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay);
405
406 nfs_disconnect(nmp);
407
408 while ((error = nfs_connect(nmp))) {
409 if (error == EINTR || error == ERESTART)
410 return (EINTR);
411 if (error == EIO)
412 return (EIO);
413 microuptime(&now);
414 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) {
415 lastmsg = now.tv_sec;
416 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect");
417 wentdown = 1;
418 }
419 lck_mtx_lock(&nmp->nm_lock);
420 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
421 /* we're not yet completely mounted and */
422 /* we can't reconnect, so we fail */
423 lck_mtx_unlock(&nmp->nm_lock);
424 return (error);
425 }
426 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) {
427 lck_mtx_unlock(&nmp->nm_lock);
428 return (error);
429 }
430 lck_mtx_unlock(&nmp->nm_lock);
431 tsleep(&lbolt, PSOCK, "nfs_reconnect_delay", 0);
432 if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
433 return (error);
434 }
435
436 if (wentdown)
437 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected");
438
439 /*
440 * Loop through outstanding request list and mark all requests
441 * as needing a resend. (Though nfs_need_reconnect() probably
442 * marked them all already.)
443 */
444 lck_mtx_lock(nfs_request_mutex);
445 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
446 if (rq->r_nmp == nmp) {
447 lck_mtx_lock(&rq->r_mtx);
448 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
449 rq->r_flags |= R_MUSTRESEND;
450 rq->r_rtt = -1;
451 wakeup(rq);
452 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
453 nfs_asyncio_resend(rq);
454 }
455 lck_mtx_unlock(&rq->r_mtx);
456 }
457 }
458 lck_mtx_unlock(nfs_request_mutex);
459 return (0);
460 }
461
462 /*
463 * NFS disconnect. Clean up and unlink.
464 */
465 void
466 nfs_disconnect(struct nfsmount *nmp)
467 {
468 socket_t so;
469
470 lck_mtx_lock(&nmp->nm_lock);
471 if ((nmp->nm_sotype == SOCK_STREAM) && nmp->nm_m) {
472 mbuf_freem(nmp->nm_m);
473 nmp->nm_m = nmp->nm_mlast = NULL;
474 }
475 if (nmp->nm_so) {
476 so = nmp->nm_so;
477 nmp->nm_so = NULL;
478 lck_mtx_unlock(&nmp->nm_lock);
479 sock_shutdown(so, SHUT_RDWR);
480 sock_close(so);
481 } else {
482 lck_mtx_unlock(&nmp->nm_lock);
483 }
484 }
485
486 /*
487 * mark an NFS mount as needing a reconnect/resends.
488 */
489 static void
490 nfs_need_reconnect(struct nfsmount *nmp)
491 {
492 struct nfsreq *rq;
493
494 lck_mtx_lock(&nmp->nm_lock);
495 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP);
496 lck_mtx_unlock(&nmp->nm_lock);
497
498 /*
499 * Loop through outstanding request list and
500 * mark all requests as needing a resend.
501 */
502 lck_mtx_lock(nfs_request_mutex);
503 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
504 if (rq->r_nmp == nmp) {
505 lck_mtx_lock(&rq->r_mtx);
506 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
507 rq->r_flags |= R_MUSTRESEND;
508 rq->r_rtt = -1;
509 wakeup(rq);
510 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
511 nfs_asyncio_resend(rq);
512 }
513 lck_mtx_unlock(&rq->r_mtx);
514 }
515 }
516 lck_mtx_unlock(nfs_request_mutex);
517 }
518
519 /*
520 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
521 */
522 static void
523 nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
524 {
525 struct nfsmount *nmp = arg;
526 struct timespec ts = { 30, 0 };
527 thread_t thd = current_thread();
528 struct nfsreq *req;
529 struct timeval now;
530 int error, dofinish, force;
531
532 lck_mtx_lock(&nmp->nm_lock);
533
534 while (!(nmp->nm_sockflags & NMSOCK_READY) || !TAILQ_EMPTY(&nmp->nm_resendq)) {
535 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
536 break;
537 force = (nmp->nm_state & NFSSTA_FORCE);
538 /* do reconnect, if necessary */
539 if (!(nmp->nm_sockflags & NMSOCK_READY) && !force) {
540 if (nmp->nm_reconnect_start <= 0) {
541 microuptime(&now);
542 nmp->nm_reconnect_start = now.tv_sec;
543 }
544 lck_mtx_unlock(&nmp->nm_lock);
545 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname));
546 if ((error = nfs_reconnect(nmp)))
547 printf("nfs_reconnect failed %d for %s\n", error,
548 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
549 else
550 nmp->nm_reconnect_start = 0;
551 lck_mtx_lock(&nmp->nm_lock);
552 }
553 /* do resends, if necessary/possible */
554 while (((nmp->nm_sockflags & NMSOCK_READY) || force) && ((req = TAILQ_FIRST(&nmp->nm_resendq)))) {
555 if (req->r_resendtime)
556 microuptime(&now);
557 while (req && !force && req->r_resendtime && (now.tv_sec < req->r_resendtime))
558 req = TAILQ_NEXT(req, r_rchain);
559 if (!req)
560 break;
561 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
562 req->r_rchain.tqe_next = NFSREQNOLIST;
563 lck_mtx_unlock(&nmp->nm_lock);
564 lck_mtx_lock(&req->r_mtx);
565 if (req->r_error || req->r_nmrep.nmc_mhead) {
566 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
567 req->r_flags &= ~R_RESENDQ;
568 wakeup(req);
569 lck_mtx_unlock(&req->r_mtx);
570 if (dofinish)
571 nfs_asyncio_finish(req);
572 lck_mtx_lock(&nmp->nm_lock);
573 continue;
574 }
575 if ((req->r_flags & R_RESTART) || req->r_gss_ctx) {
576 req->r_flags &= ~R_RESTART;
577 req->r_resendtime = 0;
578 lck_mtx_unlock(&req->r_mtx);
579 /* async RPCs on GSS mounts need to be rebuilt and resent. */
580 nfs_reqdequeue(req);
581 if (req->r_gss_ctx) {
582 nfs_gss_clnt_rpcdone(req);
583 error = nfs_gss_clnt_args_restore(req);
584 if (error == ENEEDAUTH)
585 req->r_xid = 0;
586 }
587 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
588 req->r_gss_ctx ? " gss" : "", req->r_procnum, req->r_xid,
589 req->r_flags, req->r_rtt));
590 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
591 if (!error)
592 error = nfs_sigintr(nmp, req, req->r_thread, 0);
593 if (!error)
594 error = nfs_request_add_header(req);
595 if (!error)
596 error = nfs_request_send(req, 0);
597 lck_mtx_lock(&req->r_mtx);
598 if (req->r_rchain.tqe_next == NFSREQNOLIST)
599 req->r_flags &= ~R_RESENDQ;
600 if (error)
601 req->r_error = error;
602 wakeup(req);
603 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
604 lck_mtx_unlock(&req->r_mtx);
605 if (dofinish)
606 nfs_asyncio_finish(req);
607 lck_mtx_lock(&nmp->nm_lock);
608 error = 0;
609 continue;
610 }
611 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
612 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
613 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
614 if (!error)
615 error = nfs_sigintr(nmp, req, req->r_thread, 0);
616 if (!error) {
617 lck_mtx_unlock(&req->r_mtx);
618 error = nfs_send(req, 0);
619 lck_mtx_lock(&req->r_mtx);
620 if (!error) {
621 if (req->r_rchain.tqe_next == NFSREQNOLIST)
622 req->r_flags &= ~R_RESENDQ;
623 wakeup(req);
624 lck_mtx_unlock(&req->r_mtx);
625 lck_mtx_lock(&nmp->nm_lock);
626 continue;
627 }
628 }
629 req->r_error = error;
630 if (req->r_rchain.tqe_next == NFSREQNOLIST)
631 req->r_flags &= ~R_RESENDQ;
632 wakeup(req);
633 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
634 lck_mtx_unlock(&req->r_mtx);
635 if (dofinish)
636 nfs_asyncio_finish(req);
637 lck_mtx_lock(&nmp->nm_lock);
638 }
639 if (nmp->nm_sockflags & NMSOCK_READY) {
640 ts.tv_sec = TAILQ_EMPTY(&nmp->nm_resendq) ? 30 : 1;
641 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts);
642 } else if (force)
643 break;
644 }
645
646 if (nmp->nm_sockthd == thd)
647 nmp->nm_sockthd = NULL;
648 lck_mtx_unlock(&nmp->nm_lock);
649 wakeup(&nmp->nm_sockthd);
650 thread_terminate(thd);
651 }
652
653 /* start or wake a mount's socket thread */
654 void
655 nfs_mount_sock_thread_wake(struct nfsmount *nmp)
656 {
657 if (nmp->nm_sockthd)
658 wakeup(&nmp->nm_sockthd);
659 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS)
660 thread_deallocate(nmp->nm_sockthd);
661 }
662
663 /*
664 * The NFS client send routine.
665 *
666 * Send the given NFS request out the mount's socket.
667 * Holds nfs_sndlock() for the duration of this call.
668 *
669 * - check for request termination (sigintr)
670 * - perform reconnect, if necessary
671 * - UDP: check the congestion window
672 * - make a copy of the request to send
673 * - UDP: update the congestion window
674 * - send the request
675 *
676 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
677 * rexmit count is also updated if this isn't the first send.
678 *
679 * If the send is not successful, make sure R_MUSTRESEND is set.
680 * If this wasn't the first transmit, set R_RESENDERR.
681 * Also, undo any UDP congestion window changes made.
682 *
683 * If the error appears to indicate that the socket should
684 * be reconnected, mark the socket for reconnection.
685 *
686 * Only return errors when the request should be aborted.
687 */
688 int
689 nfs_send(struct nfsreq *req, int wait)
690 {
691 struct nfsmount *nmp;
692 socket_t so;
693 int error, error2, sotype, rexmit, slpflag = 0, needrecon;
694 struct msghdr msg;
695 struct sockaddr *sendnam;
696 mbuf_t mreqcopy;
697 size_t sentlen = 0;
698 struct timespec ts = { 2, 0 };
699
700 again:
701 error = nfs_sndlock(req);
702 if (error)
703 return (error);
704
705 error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
706 if (error) {
707 nfs_sndunlock(req);
708 return (error);
709 }
710 nmp = req->r_nmp;
711 sotype = nmp->nm_sotype;
712
713 if ((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) {
714 /* a setup RPC but we're not in SETUP... must need reconnect */
715 nfs_sndunlock(req);
716 return (EPIPE);
717 }
718
719 /* If the socket needs reconnection, do that now. */
720 /* wait until socket is ready - unless this request is part of setup */
721 lck_mtx_lock(&nmp->nm_lock);
722 if (!(nmp->nm_sockflags & NMSOCK_READY) &&
723 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) {
724 if (nmp->nm_flag & NFSMNT_INT)
725 slpflag |= PCATCH;
726 lck_mtx_unlock(&nmp->nm_lock);
727 nfs_sndunlock(req);
728 if (!wait) {
729 lck_mtx_lock(&req->r_mtx);
730 req->r_flags |= R_MUSTRESEND;
731 req->r_rtt = 0;
732 lck_mtx_unlock(&req->r_mtx);
733 return (0);
734 }
735 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req->r_xid));
736 lck_mtx_lock(&req->r_mtx);
737 req->r_flags &= ~R_MUSTRESEND;
738 req->r_rtt = 0;
739 lck_mtx_unlock(&req->r_mtx);
740 lck_mtx_lock(&nmp->nm_lock);
741 while (!(nmp->nm_sockflags & NMSOCK_READY)) {
742 /* don't bother waiting if the socket thread won't be reconnecting it */
743 if (nmp->nm_state & NFSSTA_FORCE) {
744 error = EIO;
745 break;
746 }
747 /* make sure socket thread is running, then wait */
748 nfs_mount_sock_thread_wake(nmp);
749 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
750 break;
751 msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait", &ts);
752 slpflag = 0;
753 }
754 lck_mtx_unlock(&nmp->nm_lock);
755 if (error)
756 return (error);
757 goto again;
758 }
759 so = nmp->nm_so;
760 lck_mtx_unlock(&nmp->nm_lock);
761 if (!so) {
762 nfs_sndunlock(req);
763 lck_mtx_lock(&req->r_mtx);
764 req->r_flags |= R_MUSTRESEND;
765 req->r_rtt = 0;
766 lck_mtx_unlock(&req->r_mtx);
767 return (0);
768 }
769
770 lck_mtx_lock(&req->r_mtx);
771 rexmit = (req->r_flags & R_SENT);
772
773 if (sotype == SOCK_DGRAM) {
774 lck_mtx_lock(&nmp->nm_lock);
775 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) {
776 /* if we can't send this out yet, wait on the cwnd queue */
777 slpflag = ((nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
778 lck_mtx_unlock(&nmp->nm_lock);
779 nfs_sndunlock(req);
780 req->r_flags |= R_MUSTRESEND;
781 lck_mtx_unlock(&req->r_mtx);
782 if (!wait) {
783 req->r_rtt = 0;
784 return (0);
785 }
786 lck_mtx_lock(&nmp->nm_lock);
787 while (nmp->nm_sent >= nmp->nm_cwnd) {
788 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
789 break;
790 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain);
791 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts);
792 slpflag = 0;
793 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) {
794 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
795 req->r_cchain.tqe_next = NFSREQNOLIST;
796 }
797 }
798 lck_mtx_unlock(&nmp->nm_lock);
799 goto again;
800 }
801 /*
802 * We update these *before* the send to avoid racing
803 * against others who may be looking to send requests.
804 */
805 if (!rexmit) {
806 /* first transmit */
807 req->r_flags |= R_CWND;
808 nmp->nm_sent += NFS_CWNDSCALE;
809 } else {
810 /*
811 * When retransmitting, turn timing off
812 * and divide congestion window by 2.
813 */
814 req->r_flags &= ~R_TIMING;
815 nmp->nm_cwnd >>= 1;
816 if (nmp->nm_cwnd < NFS_CWNDSCALE)
817 nmp->nm_cwnd = NFS_CWNDSCALE;
818 }
819 lck_mtx_unlock(&nmp->nm_lock);
820 }
821
822 req->r_flags &= ~R_MUSTRESEND;
823 lck_mtx_unlock(&req->r_mtx);
824
825 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL,
826 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy);
827 if (error) {
828 if (wait)
829 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error);
830 nfs_sndunlock(req);
831 lck_mtx_lock(&req->r_mtx);
832 req->r_flags |= R_MUSTRESEND;
833 req->r_rtt = 0;
834 lck_mtx_unlock(&req->r_mtx);
835 return (0);
836 }
837
838 bzero(&msg, sizeof(msg));
839 if (nmp->nm_nam && (sotype != SOCK_STREAM) && !sock_isconnected(so)) {
840 if ((sendnam = mbuf_data(nmp->nm_nam))) {
841 msg.msg_name = (caddr_t)sendnam;
842 msg.msg_namelen = sendnam->sa_len;
843 }
844 }
845 error = sock_sendmbuf(so, &msg, mreqcopy, 0, &sentlen);
846 #ifdef NFS_SOCKET_DEBUGGING
847 if (error || (sentlen != req->r_mreqlen))
848 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
849 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error));
850 #endif
851 if (!error && (sentlen != req->r_mreqlen))
852 error = EWOULDBLOCK;
853 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen));
854
855 lck_mtx_lock(&req->r_mtx);
856 req->r_rtt = 0;
857 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT))
858 req->r_rexmit = NFS_MAXREXMIT;
859
860 if (!error) {
861 /* SUCCESS */
862 req->r_flags &= ~R_RESENDERR;
863 if (rexmit)
864 OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries);
865 req->r_flags |= R_SENT;
866 if (req->r_flags & R_WAITSENT) {
867 req->r_flags &= ~R_WAITSENT;
868 wakeup(req);
869 }
870 nfs_sndunlock(req);
871 lck_mtx_unlock(&req->r_mtx);
872 return (0);
873 }
874
875 /* send failed */
876 req->r_flags |= R_MUSTRESEND;
877 if (rexmit)
878 req->r_flags |= R_RESENDERR;
879 if ((error == EINTR) || (error == ERESTART))
880 req->r_error = error;
881 lck_mtx_unlock(&req->r_mtx);
882
883 if (sotype == SOCK_DGRAM) {
884 /*
885 * Note: even though a first send may fail, we consider
886 * the request sent for congestion window purposes.
887 * So we don't need to undo any of the changes made above.
888 */
889 /*
890 * Socket errors ignored for connectionless sockets??
891 * For now, ignore them all
892 */
893 if ((error != EINTR) && (error != ERESTART) &&
894 (error != EWOULDBLOCK) && (error != EIO)) {
895 int clearerror = 0, optlen = sizeof(clearerror);
896 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
897 #ifdef NFS_SOCKET_DEBUGGING
898 if (clearerror)
899 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
900 error, clearerror));
901 #endif
902 }
903 }
904
905 /* check if it appears we should reconnect the socket */
906 switch (error) {
907 case EWOULDBLOCK:
908 /* if send timed out, reconnect if on TCP */
909 if (sotype != SOCK_STREAM)
910 break;
911 case EPIPE:
912 case EADDRNOTAVAIL:
913 case ENETDOWN:
914 case ENETUNREACH:
915 case ENETRESET:
916 case ECONNABORTED:
917 case ECONNRESET:
918 case ENOTCONN:
919 case ESHUTDOWN:
920 case ECONNREFUSED:
921 case EHOSTDOWN:
922 case EHOSTUNREACH:
923 needrecon = 1;
924 break;
925 }
926 if (needrecon) { /* mark socket as needing reconnect */
927 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error));
928 nfs_need_reconnect(nmp);
929 }
930
931 nfs_sndunlock(req);
932
933 /*
934 * Don't log some errors:
935 * EPIPE errors may be common with servers that drop idle connections.
936 * EADDRNOTAVAIL may occur on network transitions.
937 * ENOTCONN may occur under some network conditions.
938 */
939 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN))
940 error = 0;
941 if (error && (error != EINTR) && (error != ERESTART))
942 log(LOG_INFO, "nfs send error %d for server %s\n", error,
943 !req->r_nmp ? "<unmounted>" :
944 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
945
946 /* prefer request termination error over other errors */
947 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
948 if (error2)
949 error = error2;
950
951 /* only allow the following errors to be returned */
952 if ((error != EINTR) && (error != ERESTART) && (error != EIO) &&
953 (error != ENXIO) && (error != ETIMEDOUT))
954 error = 0;
955 return (error);
956 }
957
958 /*
959 * NFS client socket upcalls
960 *
961 * Pull RPC replies out of an NFS mount's socket and match them
962 * up with the pending request.
963 *
964 * The datagram code is simple because we always get whole
965 * messages out of the socket.
966 *
967 * The stream code is more involved because we have to parse
968 * the RPC records out of the stream.
969 */
970
971 /* NFS client UDP socket upcall */
972 static void
973 nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag)
974 {
975 struct nfsmount *nmp = arg;
976 size_t rcvlen;
977 mbuf_t m;
978 int error = 0;
979
980 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
981 wakeup(&nmp->nm_so);
982 return;
983 }
984
985 /* make sure we're on the current socket */
986 if (nmp->nm_so != so)
987 return;
988
989 do {
990 m = NULL;
991 rcvlen = 1000000;
992 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
993 if (m)
994 nfs_request_match_reply(nmp, m);
995 } while (m && !error);
996
997 if (error && (error != EWOULDBLOCK)) {
998 /* problems with the socket... mark for reconnection */
999 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error));
1000 nfs_need_reconnect(nmp);
1001 }
1002 }
1003
1004 /* NFS client TCP socket upcall */
1005 static void
1006 nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag)
1007 {
1008 struct nfsmount *nmp = arg;
1009 struct iovec_32 aio;
1010 struct msghdr msg;
1011 size_t rcvlen;
1012 mbuf_t m;
1013 int error = 0;
1014 int recv;
1015
1016 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
1017 wakeup(&nmp->nm_so);
1018 return;
1019 }
1020
1021 /* make sure we're on the current socket */
1022 if (nmp->nm_so != so)
1023 return;
1024
1025 lck_mtx_lock(&nmp->nm_lock);
1026 if (nmp->nm_sockflags & NMSOCK_UPCALL) {
1027 /* upcall is already receiving data - just return */
1028 lck_mtx_unlock(&nmp->nm_lock);
1029 return;
1030 }
1031 nmp->nm_sockflags |= NMSOCK_UPCALL;
1032
1033 nextfrag:
1034 recv = 0;
1035
1036 /* read the TCP RPC record marker */
1037 while (!error && nmp->nm_markerleft) {
1038 aio.iov_base = (uintptr_t)((char*)&nmp->nm_fragleft +
1039 sizeof(nmp->nm_fragleft) - nmp->nm_markerleft);
1040 aio.iov_len = nmp->nm_markerleft;
1041 bzero(&msg, sizeof(msg));
1042 msg.msg_iov = (struct iovec *) &aio;
1043 msg.msg_iovlen = 1;
1044 lck_mtx_unlock(&nmp->nm_lock);
1045 error = sock_receive(so, &msg, MSG_DONTWAIT, &rcvlen);
1046 lck_mtx_lock(&nmp->nm_lock);
1047 if (error || !rcvlen)
1048 break;
1049 recv = 1;
1050 nmp->nm_markerleft -= rcvlen;
1051 if (nmp->nm_markerleft)
1052 continue;
1053 /* record marker complete */
1054 nmp->nm_fragleft = ntohl(nmp->nm_fragleft);
1055 if (nmp->nm_fragleft & 0x80000000) {
1056 nmp->nm_sockflags |= NMSOCK_LASTFRAG;
1057 nmp->nm_fragleft &= ~0x80000000;
1058 }
1059 nmp->nm_reclen += nmp->nm_fragleft;
1060 if (nmp->nm_reclen > NFS_MAXPACKET) {
1061 /*
1062 * This is SERIOUS! We are out of sync with the sender
1063 * and forcing a disconnect/reconnect is all I can do.
1064 */
1065 log(LOG_ERR, "%s (%d) from nfs server %s\n",
1066 "impossible RPC record length", nmp->nm_reclen,
1067 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1068 error = EFBIG;
1069 }
1070 }
1071
1072 /* read the TCP RPC record fragment */
1073 while (!error && !nmp->nm_markerleft && nmp->nm_fragleft) {
1074 m = NULL;
1075 rcvlen = nmp->nm_fragleft;
1076 lck_mtx_unlock(&nmp->nm_lock);
1077 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
1078 lck_mtx_lock(&nmp->nm_lock);
1079 if (error || !rcvlen || !m)
1080 break;
1081 recv = 1;
1082 /* append mbufs to list */
1083 nmp->nm_fragleft -= rcvlen;
1084 if (!nmp->nm_m) {
1085 nmp->nm_m = m;
1086 } else {
1087 error = mbuf_setnext(nmp->nm_mlast, m);
1088 if (error) {
1089 printf("nfs_tcp_rcv: mbuf_setnext failed %d\n", error);
1090 mbuf_freem(m);
1091 break;
1092 }
1093 }
1094 while (mbuf_next(m))
1095 m = mbuf_next(m);
1096 nmp->nm_mlast = m;
1097 }
1098
1099 /* done reading fragment? */
1100 m = NULL;
1101 if (!error && !nmp->nm_markerleft && !nmp->nm_fragleft) {
1102 /* reset socket fragment parsing state */
1103 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
1104 if (nmp->nm_sockflags & NMSOCK_LASTFRAG) {
1105 /* RPC record complete */
1106 m = nmp->nm_m;
1107 /* reset socket record parsing state */
1108 nmp->nm_reclen = 0;
1109 nmp->nm_m = nmp->nm_mlast = NULL;
1110 nmp->nm_sockflags &= ~NMSOCK_LASTFRAG;
1111 }
1112 }
1113
1114 if (m) { /* match completed response with request */
1115 lck_mtx_unlock(&nmp->nm_lock);
1116 nfs_request_match_reply(nmp, m);
1117 lck_mtx_lock(&nmp->nm_lock);
1118 }
1119
1120 /* loop if we've been making error-free progress */
1121 if (!error && recv)
1122 goto nextfrag;
1123
1124 nmp->nm_sockflags &= ~NMSOCK_UPCALL;
1125 lck_mtx_unlock(&nmp->nm_lock);
1126 #ifdef NFS_SOCKET_DEBUGGING
1127 if (!recv && (error != EWOULDBLOCK))
1128 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error));
1129 #endif
1130 /* note: no error and no data indicates server closed its end */
1131 if ((error != EWOULDBLOCK) && (error || !recv)) {
1132 /* problems with the socket... mark for reconnection */
1133 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error));
1134 nfs_need_reconnect(nmp);
1135 }
1136 }
1137
1138 /*
1139 * "poke" a socket to try to provoke any pending errors
1140 */
1141 static void
1142 nfs_sock_poke(struct nfsmount *nmp)
1143 {
1144 struct iovec_32 aio;
1145 struct msghdr msg;
1146 size_t len;
1147 int error = 0;
1148 int dummy;
1149
1150 lck_mtx_lock(&nmp->nm_lock);
1151 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || !nmp->nm_so) {
1152 lck_mtx_unlock(&nmp->nm_lock);
1153 return;
1154 }
1155 lck_mtx_unlock(&nmp->nm_lock);
1156 aio.iov_base = (uintptr_t)&dummy;
1157 aio.iov_len = 0;
1158 len = 0;
1159 bzero(&msg, sizeof(msg));
1160 msg.msg_iov = (struct iovec *) &aio;
1161 msg.msg_iovlen = 1;
1162 error = sock_send(nmp->nm_so, &msg, MSG_DONTWAIT, &len);
1163 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error));
1164 }
1165
1166 /*
1167 * Match an RPC reply with the corresponding request
1168 */
1169 static void
1170 nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
1171 {
1172 struct nfsreq *req;
1173 struct nfsm_chain nmrep;
1174 u_long reply = 0, rxid = 0;
1175 long t1;
1176 int error = 0, asyncioq, asyncgss;
1177
1178 /* Get the xid and check that it is an rpc reply */
1179 nfsm_chain_dissect_init(error, &nmrep, mrep);
1180 nfsm_chain_get_32(error, &nmrep, rxid);
1181 nfsm_chain_get_32(error, &nmrep, reply);
1182 if (error || (reply != RPC_REPLY)) {
1183 OSAddAtomic(1, (SInt32*)&nfsstats.rpcinvalid);
1184 mbuf_freem(mrep);
1185 return;
1186 }
1187
1188 /*
1189 * Loop through the request list to match up the reply
1190 * Iff no match, just drop it.
1191 */
1192 lck_mtx_lock(nfs_request_mutex);
1193 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
1194 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid)))
1195 continue;
1196 /* looks like we have it, grab lock and double check */
1197 lck_mtx_lock(&req->r_mtx);
1198 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
1199 lck_mtx_unlock(&req->r_mtx);
1200 continue;
1201 }
1202 /* Found it.. */
1203 req->r_nmrep = nmrep;
1204 lck_mtx_lock(&nmp->nm_lock);
1205 if (nmp->nm_sotype == SOCK_DGRAM) {
1206 /*
1207 * Update congestion window.
1208 * Do the additive increase of one rpc/rtt.
1209 */
1210 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
1211 if (nmp->nm_cwnd <= nmp->nm_sent) {
1212 nmp->nm_cwnd +=
1213 ((NFS_CWNDSCALE * NFS_CWNDSCALE) +
1214 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
1215 if (nmp->nm_cwnd > NFS_MAXCWND)
1216 nmp->nm_cwnd = NFS_MAXCWND;
1217 }
1218 if (req->r_flags & R_CWND) {
1219 nmp->nm_sent -= NFS_CWNDSCALE;
1220 req->r_flags &= ~R_CWND;
1221 }
1222 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
1223 /* congestion window is open, poke the cwnd queue */
1224 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
1225 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
1226 req2->r_cchain.tqe_next = NFSREQNOLIST;
1227 wakeup(req2);
1228 }
1229 }
1230 /*
1231 * Update rtt using a gain of 0.125 on the mean
1232 * and a gain of 0.25 on the deviation.
1233 */
1234 if (req->r_flags & R_TIMING) {
1235 /*
1236 * Since the timer resolution of
1237 * NFS_HZ is so course, it can often
1238 * result in r_rtt == 0. Since
1239 * r_rtt == N means that the actual
1240 * rtt is between N+dt and N+2-dt ticks,
1241 * add 1.
1242 */
1243 if (proct[req->r_procnum] == 0)
1244 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum);
1245 t1 = req->r_rtt + 1;
1246 t1 -= (NFS_SRTT(req) >> 3);
1247 NFS_SRTT(req) += t1;
1248 if (t1 < 0)
1249 t1 = -t1;
1250 t1 -= (NFS_SDRTT(req) >> 2);
1251 NFS_SDRTT(req) += t1;
1252 }
1253 nmp->nm_timeouts = 0;
1254 lck_mtx_unlock(&nmp->nm_lock);
1255 /* signal anyone waiting on this request */
1256 wakeup(req);
1257 asyncioq = (req->r_callback.rcb_func != NULL);
1258 if ((asyncgss = ((req->r_gss_ctx != NULL) && ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_ALLOCATED)) == (R_ASYNC|R_ALLOCATED)))))
1259 nfs_request_ref(req, 1);
1260 lck_mtx_unlock(&req->r_mtx);
1261 lck_mtx_unlock(nfs_request_mutex);
1262 if (asyncgss) {
1263 nfs_gss_clnt_rpcdone(req);
1264 nfs_request_rele(req);
1265 }
1266 /* if it's an async RPC with a callback, queue it up */
1267 if (asyncioq)
1268 nfs_asyncio_finish(req);
1269 break;
1270 }
1271
1272 if (!req) {
1273 /* not matched to a request, so drop it. */
1274 lck_mtx_unlock(nfs_request_mutex);
1275 OSAddAtomic(1, (SInt32*)&nfsstats.rpcunexpected);
1276 mbuf_freem(mrep);
1277 }
1278 }
1279
1280 /*
1281 * Wait for the reply for a given request...
1282 * ...potentially resending the request if necessary.
1283 */
1284 static int
1285 nfs_wait_reply(struct nfsreq *req)
1286 {
1287 struct nfsmount *nmp = req->r_nmp;
1288 struct timespec ts = { 30, 0 };
1289 int error = 0, slpflag;
1290
1291 if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
1292 slpflag = PCATCH;
1293 else
1294 slpflag = 0;
1295
1296 lck_mtx_lock(&req->r_mtx);
1297 while (!req->r_nmrep.nmc_mhead) {
1298 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0)))
1299 break;
1300 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
1301 break;
1302 /* check if we need to resend */
1303 if (req->r_flags & R_MUSTRESEND) {
1304 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
1305 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
1306 lck_mtx_unlock(&req->r_mtx);
1307 if (req->r_gss_ctx) {
1308 /*
1309 * It's an RPCSEC_GSS mount.
1310 * Can't just resend the original request
1311 * without bumping the cred sequence number.
1312 * Go back and re-build the request.
1313 */
1314 return (EAGAIN);
1315 }
1316 error = nfs_send(req, 1);
1317 lck_mtx_lock(&req->r_mtx);
1318 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
1319 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error));
1320 if (error)
1321 break;
1322 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
1323 break;
1324 }
1325 /* need to poll if we're P_NOREMOTEHANG */
1326 if (nfs_noremotehang(req->r_thread))
1327 ts.tv_sec = 1;
1328 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts);
1329 slpflag = 0;
1330 }
1331 lck_mtx_unlock(&req->r_mtx);
1332
1333 return (error);
1334 }
1335
1336 /*
1337 * An NFS request goes something like this:
1338 * (nb: always frees up mreq mbuf list)
1339 * nfs_request_create()
1340 * - allocates a request struct if one is not provided
1341 * - initial fill-in of the request struct
1342 * nfs_request_add_header()
1343 * - add the RPC header
1344 * nfs_request_send()
1345 * - link it into list
1346 * - call nfs_send() for first transmit
1347 * nfs_request_wait()
1348 * - call nfs_wait_reply() to wait for the reply
1349 * nfs_request_finish()
1350 * - break down rpc header and return with error or nfs reply
1351 * pointed to by nmrep.
1352 * nfs_request_rele()
1353 * nfs_request_destroy()
1354 * - clean up the request struct
1355 * - free the request struct if it was allocated by nfs_request_create()
1356 */
1357
1358 /*
1359 * Set up an NFS request struct (allocating if no request passed in).
1360 */
1361 int
1362 nfs_request_create(
1363 nfsnode_t np,
1364 mount_t mp, /* used only if !np */
1365 struct nfsm_chain *nmrest,
1366 int procnum,
1367 thread_t thd,
1368 kauth_cred_t cred,
1369 struct nfsreq **reqp)
1370 {
1371 struct nfsreq *req, *newreq = NULL;
1372 struct nfsmount *nmp;
1373
1374 req = *reqp;
1375 if (!req) {
1376 /* allocate a new NFS request structure */
1377 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK);
1378 if (!newreq) {
1379 mbuf_freem(nmrest->nmc_mhead);
1380 nmrest->nmc_mhead = NULL;
1381 return (ENOMEM);
1382 }
1383 req = newreq;
1384 }
1385
1386 bzero(req, sizeof(*req));
1387 if (req == newreq)
1388 req->r_flags = R_ALLOCATED;
1389
1390 nmp = VFSTONFS(np ? NFSTOMP(np) : mp);
1391 if (!nmp) {
1392 if (newreq)
1393 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
1394 return (ENXIO);
1395 }
1396 lck_mtx_lock(&nmp->nm_lock);
1397 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1398 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
1399 lck_mtx_unlock(&nmp->nm_lock);
1400 mbuf_freem(nmrest->nmc_mhead);
1401 nmrest->nmc_mhead = NULL;
1402 if (newreq)
1403 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
1404 return (ENXIO);
1405 }
1406
1407 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS))
1408 OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[procnum]);
1409 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL))
1410 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
1411
1412 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL);
1413 req->r_nmp = nmp;
1414 req->r_np = np;
1415 req->r_thread = thd;
1416 if (IS_VALID_CRED(cred)) {
1417 kauth_cred_ref(cred);
1418 req->r_cred = cred;
1419 }
1420 req->r_procnum = procnum;
1421 if (proct[procnum] > 0)
1422 req->r_flags |= R_TIMING;
1423 req->r_nmrep.nmc_mhead = NULL;
1424 SLIST_INIT(&req->r_gss_seqlist);
1425 req->r_achain.tqe_next = NFSREQNOLIST;
1426 req->r_rchain.tqe_next = NFSREQNOLIST;
1427 req->r_cchain.tqe_next = NFSREQNOLIST;
1428
1429 lck_mtx_unlock(&nmp->nm_lock);
1430
1431 /* move the request mbuf chain to the nfsreq */
1432 req->r_mrest = nmrest->nmc_mhead;
1433 nmrest->nmc_mhead = NULL;
1434
1435 req->r_flags |= R_INITTED;
1436 req->r_refs = 1;
1437 if (newreq)
1438 *reqp = req;
1439 return (0);
1440 }
1441
1442 /*
1443 * Clean up and free an NFS request structure.
1444 */
1445 void
1446 nfs_request_destroy(struct nfsreq *req)
1447 {
1448 struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1449 struct gss_seq *gsp, *ngsp;
1450 struct timespec ts = { 1, 0 };
1451
1452 if (!req || !(req->r_flags & R_INITTED))
1453 return;
1454 req->r_flags &= ~R_INITTED;
1455 if (req->r_lflags & RL_QUEUED)
1456 nfs_reqdequeue(req);
1457 if (req->r_achain.tqe_next != NFSREQNOLIST) {
1458 /* still on an async I/O queue? */
1459 lck_mtx_lock(nfsiod_mutex);
1460 if (nmp && (req->r_achain.tqe_next != NFSREQNOLIST)) {
1461 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
1462 req->r_achain.tqe_next = NFSREQNOLIST;
1463 }
1464 lck_mtx_unlock(nfsiod_mutex);
1465 }
1466 if (nmp) {
1467 lck_mtx_lock(&nmp->nm_lock);
1468 if (req->r_rchain.tqe_next != NFSREQNOLIST) {
1469 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
1470 req->r_rchain.tqe_next = NFSREQNOLIST;
1471 req->r_flags &= ~R_RESENDQ;
1472 }
1473 if (req->r_cchain.tqe_next != NFSREQNOLIST) {
1474 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
1475 req->r_cchain.tqe_next = NFSREQNOLIST;
1476 }
1477 lck_mtx_unlock(&nmp->nm_lock);
1478 }
1479 lck_mtx_lock(&req->r_mtx);
1480 while (req->r_flags & R_RESENDQ)
1481 msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts);
1482 lck_mtx_unlock(&req->r_mtx);
1483 if (req->r_mhead)
1484 mbuf_freem(req->r_mhead);
1485 else if (req->r_mrest)
1486 mbuf_freem(req->r_mrest);
1487 if (req->r_nmrep.nmc_mhead)
1488 mbuf_freem(req->r_nmrep.nmc_mhead);
1489 if (IS_VALID_CRED(req->r_cred))
1490 kauth_cred_unref(&req->r_cred);
1491 if (req->r_gss_ctx)
1492 nfs_gss_clnt_rpcdone(req);
1493 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp)
1494 FREE(gsp, M_TEMP);
1495 if (req->r_gss_ctx)
1496 nfs_gss_clnt_ctx_unref(req);
1497
1498 lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
1499 if (req->r_flags & R_ALLOCATED)
1500 FREE_ZONE(req, sizeof(*req), M_NFSREQ);
1501 }
1502
1503 void
1504 nfs_request_ref(struct nfsreq *req, int locked)
1505 {
1506 if (!locked)
1507 lck_mtx_lock(&req->r_mtx);
1508 if (req->r_refs <= 0)
1509 panic("nfsreq reference error");
1510 req->r_refs++;
1511 if (!locked)
1512 lck_mtx_unlock(&req->r_mtx);
1513 }
1514
1515 void
1516 nfs_request_rele(struct nfsreq *req)
1517 {
1518 int destroy;
1519
1520 lck_mtx_lock(&req->r_mtx);
1521 if (req->r_refs <= 0)
1522 panic("nfsreq reference underflow");
1523 req->r_refs--;
1524 destroy = (req->r_refs == 0);
1525 lck_mtx_unlock(&req->r_mtx);
1526 if (destroy)
1527 nfs_request_destroy(req);
1528 }
1529
1530
1531 /*
1532 * Add an (updated) RPC header with authorization to an NFS request.
1533 */
1534 int
1535 nfs_request_add_header(struct nfsreq *req)
1536 {
1537 struct nfsmount *nmp;
1538 int error = 0, auth_len = 0;
1539 mbuf_t m;
1540
1541 /* free up any previous header */
1542 if ((m = req->r_mhead)) {
1543 while (m && (m != req->r_mrest))
1544 m = mbuf_free(m);
1545 req->r_mhead = NULL;
1546 }
1547
1548 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1549 if (!nmp)
1550 return (ENXIO);
1551
1552 if (!req->r_cred) /* RPCAUTH_NULL */
1553 auth_len = 0;
1554 else switch (nmp->nm_auth) {
1555 case RPCAUTH_UNIX:
1556 if (req->r_cred->cr_ngroups < 1)
1557 return (EINVAL);
1558 auth_len = ((((req->r_cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1559 nmp->nm_numgrps : (req->r_cred->cr_ngroups - 1)) << 2) +
1560 5 * NFSX_UNSIGNED;
1561 break;
1562 case RPCAUTH_KRB5:
1563 case RPCAUTH_KRB5I:
1564 case RPCAUTH_KRB5P:
1565 auth_len = 5 * NFSX_UNSIGNED + 0; // zero context handle for now
1566 break;
1567 }
1568
1569 error = nfsm_rpchead(req, auth_len, req->r_mrest, &req->r_xid, &req->r_mhead);
1570 if (error)
1571 return (error);
1572
1573 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead);
1574 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1575 if (!nmp)
1576 return (ENXIO);
1577 lck_mtx_lock(&nmp->nm_lock);
1578 if (nmp->nm_flag & NFSMNT_SOFT)
1579 req->r_retry = nmp->nm_retry;
1580 else
1581 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1582 lck_mtx_unlock(&nmp->nm_lock);
1583
1584 return (error);
1585 }
1586
1587
1588 /*
1589 * Queue an NFS request up and send it out.
1590 */
1591 int
1592 nfs_request_send(struct nfsreq *req, int wait)
1593 {
1594 struct nfsmount *nmp;
1595 struct timeval now;
1596
1597 lck_mtx_lock(nfs_request_mutex);
1598
1599 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1600 if (!nmp) {
1601 lck_mtx_unlock(nfs_request_mutex);
1602 return (ENXIO);
1603 }
1604
1605 microuptime(&now);
1606 if (!req->r_start) {
1607 req->r_start = now.tv_sec;
1608 req->r_lastmsg = now.tv_sec -
1609 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
1610 }
1611
1612 OSAddAtomic(1, (SInt32*)&nfsstats.rpcrequests);
1613
1614 /*
1615 * Chain request into list of outstanding requests. Be sure
1616 * to put it LAST so timer finds oldest requests first.
1617 * Make sure that the request queue timer is running
1618 * to check for possible request timeout.
1619 */
1620 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain);
1621 req->r_lflags |= RL_QUEUED;
1622 if (!nfs_request_timer_on) {
1623 nfs_request_timer_on = 1;
1624 nfs_interval_timer_start(nfs_request_timer_call,
1625 NFS_REQUESTDELAY);
1626 }
1627 lck_mtx_unlock(nfs_request_mutex);
1628
1629 /* Send the request... */
1630 return (nfs_send(req, wait));
1631 }
1632
1633 /*
1634 * Call nfs_wait_reply() to wait for the reply.
1635 */
1636 void
1637 nfs_request_wait(struct nfsreq *req)
1638 {
1639 req->r_error = nfs_wait_reply(req);
1640 }
1641
1642 /*
1643 * Finish up an NFS request by dequeueing it and
1644 * doing the initial NFS request reply processing.
1645 */
1646 int
1647 nfs_request_finish(
1648 struct nfsreq *req,
1649 struct nfsm_chain *nmrepp,
1650 int *status)
1651 {
1652 struct nfsmount *nmp;
1653 mbuf_t mrep;
1654 int verf_type = 0;
1655 uint32_t verf_len = 0;
1656 uint32_t reply_status = 0;
1657 uint32_t rejected_status = 0;
1658 uint32_t auth_status = 0;
1659 uint32_t accepted_status = 0;
1660 struct nfsm_chain nmrep;
1661 int error, auth;
1662
1663 error = req->r_error;
1664
1665 if (nmrepp)
1666 nmrepp->nmc_mhead = NULL;
1667
1668 /* RPC done, unlink the request. */
1669 nfs_reqdequeue(req);
1670
1671 mrep = req->r_nmrep.nmc_mhead;
1672
1673 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1674
1675 /*
1676 * Decrement the outstanding request count.
1677 */
1678 if (req->r_flags & R_CWND) {
1679 req->r_flags &= ~R_CWND;
1680 lck_mtx_lock(&nmp->nm_lock);
1681 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
1682 nmp->nm_sent -= NFS_CWNDSCALE;
1683 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
1684 /* congestion window is open, poke the cwnd queue */
1685 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
1686 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
1687 req2->r_cchain.tqe_next = NFSREQNOLIST;
1688 wakeup(req2);
1689 }
1690 lck_mtx_unlock(&nmp->nm_lock);
1691 }
1692
1693 if (req->r_gss_ctx) { // Using gss cred ?
1694 /*
1695 * If the request had an RPCSEC_GSS credential
1696 * then reset its sequence number bit in the
1697 * request window.
1698 */
1699 nfs_gss_clnt_rpcdone(req);
1700
1701 /*
1702 * If we need to re-send, go back and re-build the
1703 * request based on a new sequence number.
1704 * Note that we're using the original XID.
1705 */
1706 if (error == EAGAIN) {
1707 req->r_error = 0;
1708 if (mrep)
1709 mbuf_freem(mrep);
1710 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs
1711 req->r_nmrep.nmc_mhead = NULL;
1712 req->r_flags |= R_RESTART;
1713 if (error == ENEEDAUTH) {
1714 req->r_xid = 0; // get a new XID
1715 error = 0;
1716 }
1717 goto nfsmout;
1718 }
1719 }
1720
1721 /*
1722 * If there was a successful reply, make sure to mark the mount as up.
1723 * If a tprintf message was given (or if this is a timed-out soft mount)
1724 * then post a tprintf message indicating the server is alive again.
1725 */
1726 if (!error) {
1727 if ((req->r_flags & R_TPRINTFMSG) ||
1728 (nmp && (nmp->nm_flag & NFSMNT_SOFT) &&
1729 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO)))
1730 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again");
1731 else
1732 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL);
1733 }
1734 if (!error && !nmp)
1735 error = ENXIO;
1736 nfsmout_if(error);
1737
1738 /*
1739 * break down the RPC header and check if ok
1740 */
1741 nmrep = req->r_nmrep;
1742 nfsm_chain_get_32(error, &nmrep, reply_status);
1743 nfsmout_if(error);
1744 if (reply_status == RPC_MSGDENIED) {
1745 nfsm_chain_get_32(error, &nmrep, rejected_status);
1746 nfsmout_if(error);
1747 if (rejected_status == RPC_MISMATCH) {
1748 error = ENOTSUP;
1749 goto nfsmout;
1750 }
1751 nfsm_chain_get_32(error, &nmrep, auth_status);
1752 nfsmout_if(error);
1753 switch (auth_status) {
1754 case RPCSEC_GSS_CREDPROBLEM:
1755 case RPCSEC_GSS_CTXPROBLEM:
1756 /*
1757 * An RPCSEC_GSS cred or context problem.
1758 * We can't use it anymore.
1759 * Restore the args, renew the context
1760 * and set up for a resend.
1761 */
1762 error = nfs_gss_clnt_args_restore(req);
1763 if (error && error != ENEEDAUTH)
1764 break;
1765
1766 if (!error) {
1767 error = nfs_gss_clnt_ctx_renew(req);
1768 if (error)
1769 break;
1770 }
1771 mbuf_freem(mrep);
1772 req->r_nmrep.nmc_mhead = NULL;
1773 req->r_xid = 0; // get a new XID
1774 req->r_flags |= R_RESTART;
1775 goto nfsmout;
1776 default:
1777 error = EACCES;
1778 break;
1779 }
1780 goto nfsmout;
1781 }
1782
1783 /* Now check the verifier */
1784 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor
1785 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length
1786 nfsmout_if(error);
1787
1788 auth = !req->r_cred ? RPCAUTH_NULL : nmp->nm_auth;
1789 switch (auth) {
1790 case RPCAUTH_NULL:
1791 case RPCAUTH_UNIX:
1792 /* Any AUTH_UNIX verifier is ignored */
1793 if (verf_len > 0)
1794 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
1795 nfsm_chain_get_32(error, &nmrep, accepted_status);
1796 break;
1797 case RPCAUTH_KRB5:
1798 case RPCAUTH_KRB5I:
1799 case RPCAUTH_KRB5P:
1800 error = nfs_gss_clnt_verf_get(req, &nmrep,
1801 verf_type, verf_len, &accepted_status);
1802 break;
1803 }
1804 nfsmout_if(error);
1805
1806 switch (accepted_status) {
1807 case RPC_SUCCESS:
1808 if (req->r_procnum == NFSPROC_NULL) {
1809 /*
1810 * The NFS null procedure is unique,
1811 * in not returning an NFS status.
1812 */
1813 *status = NFS_OK;
1814 } else {
1815 nfsm_chain_get_32(error, &nmrep, *status);
1816 nfsmout_if(error);
1817 }
1818
1819 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) {
1820 /*
1821 * It's a JUKEBOX error - delay and try again
1822 */
1823 int delay, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
1824
1825 mbuf_freem(mrep);
1826 req->r_nmrep.nmc_mhead = NULL;
1827 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) {
1828 /* we're not yet completely mounted and */
1829 /* we can't complete an RPC, so we fail */
1830 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
1831 nfs_softterm(req);
1832 error = req->r_error;
1833 goto nfsmout;
1834 }
1835 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2);
1836 if (req->r_delay > 30)
1837 req->r_delay = 30;
1838 if (nmp->nm_tprintf_initial_delay && (req->r_delay == 30)) {
1839 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO,
1840 "resource temporarily unavailable (jukebox)");
1841 req->r_flags |= R_JBTPRINTFMSG;
1842 }
1843 delay = req->r_delay;
1844 if (req->r_callback.rcb_func) {
1845 struct timeval now;
1846 microuptime(&now);
1847 req->r_resendtime = now.tv_sec + delay;
1848 } else {
1849 do {
1850 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
1851 return (error);
1852 tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0);
1853 } while (--delay > 0);
1854 }
1855 req->r_xid = 0; // get a new XID
1856 req->r_flags |= R_RESTART;
1857 req->r_start = 0;
1858 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER);
1859 return (0);
1860 }
1861
1862 if (req->r_flags & R_JBTPRINTFMSG)
1863 nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, "resource available again");
1864
1865 if (*status == NFS_OK) {
1866 /*
1867 * Successful NFS request
1868 */
1869 *nmrepp = nmrep;
1870 req->r_nmrep.nmc_mhead = NULL;
1871 break;
1872 }
1873 /* Got an NFS error of some kind */
1874
1875 /*
1876 * If the File Handle was stale, invalidate the
1877 * lookup cache, just in case.
1878 */
1879 if ((*status == ESTALE) && req->r_np)
1880 cache_purge(NFSTOV(req->r_np));
1881 if (nmp->nm_vers == NFS_VER2)
1882 mbuf_freem(mrep);
1883 else
1884 *nmrepp = nmrep;
1885 req->r_nmrep.nmc_mhead = NULL;
1886 error = 0;
1887 break;
1888 case RPC_PROGUNAVAIL:
1889 error = EPROGUNAVAIL;
1890 break;
1891 case RPC_PROGMISMATCH:
1892 error = ERPCMISMATCH;
1893 break;
1894 case RPC_PROCUNAVAIL:
1895 error = EPROCUNAVAIL;
1896 break;
1897 case RPC_GARBAGE:
1898 error = EBADRPC;
1899 break;
1900 case RPC_SYSTEM_ERR:
1901 default:
1902 error = EIO;
1903 break;
1904 }
1905 nfsmout:
1906 if (!error && (req->r_flags & R_JBTPRINTFMSG))
1907 nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, NULL);
1908 FSDBG(273, R_XID32(req->r_xid), nmp, req,
1909 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error);
1910 return (error);
1911 }
1912
1913
1914 /*
1915 * Perform an NFS request synchronously.
1916 */
1917
1918 int
1919 nfs_request(
1920 nfsnode_t np,
1921 mount_t mp, /* used only if !np */
1922 struct nfsm_chain *nmrest,
1923 int procnum,
1924 vfs_context_t ctx,
1925 struct nfsm_chain *nmrepp,
1926 u_int64_t *xidp,
1927 int *status)
1928 {
1929 return nfs_request2(np, mp, nmrest, procnum,
1930 vfs_context_thread(ctx), vfs_context_ucred(ctx),
1931 0, nmrepp, xidp, status);
1932 }
1933
1934 int
1935 nfs_request2(
1936 nfsnode_t np,
1937 mount_t mp, /* used only if !np */
1938 struct nfsm_chain *nmrest,
1939 int procnum,
1940 thread_t thd,
1941 kauth_cred_t cred,
1942 int flags,
1943 struct nfsm_chain *nmrepp,
1944 u_int64_t *xidp,
1945 int *status)
1946 {
1947 struct nfsreq rq, *req = &rq;
1948 int error;
1949
1950 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req)))
1951 return (error);
1952 req->r_flags |= (flags & R_OPTMASK);
1953
1954 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0);
1955 do {
1956 req->r_error = 0;
1957 req->r_flags &= ~R_RESTART;
1958 if ((error = nfs_request_add_header(req)))
1959 break;
1960 if (xidp)
1961 *xidp = req->r_xid;
1962 if ((error = nfs_request_send(req, 1)))
1963 break;
1964 nfs_request_wait(req);
1965 if ((error = nfs_request_finish(req, nmrepp, status)))
1966 break;
1967 } while (req->r_flags & R_RESTART);
1968
1969 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error);
1970 nfs_request_rele(req);
1971 return (error);
1972 }
1973
1974 /*
1975 * Create and start an asynchronous NFS request.
1976 */
1977 int
1978 nfs_request_async(
1979 nfsnode_t np,
1980 mount_t mp, /* used only if !np */
1981 struct nfsm_chain *nmrest,
1982 int procnum,
1983 thread_t thd,
1984 kauth_cred_t cred,
1985 struct nfsreq_cbinfo *cb,
1986 struct nfsreq **reqp)
1987 {
1988 struct nfsreq *req;
1989 int error, sent;
1990
1991 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp);
1992 req = *reqp;
1993 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error);
1994 if (error)
1995 return (error);
1996 req->r_flags |= R_ASYNC;
1997 if (cb)
1998 req->r_callback = *cb;
1999 error = nfs_request_add_header(req);
2000 if (!error) {
2001 req->r_flags |= R_WAITSENT;
2002 if (req->r_callback.rcb_func)
2003 nfs_request_ref(req, 0);
2004 error = nfs_request_send(req, 1);
2005 lck_mtx_lock(&req->r_mtx);
2006 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) {
2007 /* make sure to wait until this async I/O request gets sent */
2008 int slpflag = (req->r_nmp && (req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
2009 struct timespec ts = { 2, 0 };
2010 while (!(req->r_flags & R_SENT)) {
2011 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
2012 break;
2013 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts);
2014 slpflag = 0;
2015 }
2016 }
2017 sent = req->r_flags & R_SENT;
2018 lck_mtx_unlock(&req->r_mtx);
2019 if (error && req->r_callback.rcb_func && !sent)
2020 nfs_request_rele(req);
2021 }
2022 FSDBG(274, R_XID32(req->r_xid), np, procnum, error);
2023 if (error || req->r_callback.rcb_func)
2024 nfs_request_rele(req);
2025 return (error);
2026 }
2027
2028 /*
2029 * Wait for and finish an asynchronous NFS request.
2030 */
2031 int
2032 nfs_request_async_finish(
2033 struct nfsreq *req,
2034 struct nfsm_chain *nmrepp,
2035 u_int64_t *xidp,
2036 int *status)
2037 {
2038 int error, asyncio = req->r_callback.rcb_func ? 1 : 0;
2039
2040 lck_mtx_lock(&req->r_mtx);
2041 if (!asyncio)
2042 req->r_flags |= R_ASYNCWAIT;
2043 while (req->r_flags & R_RESENDQ) /* wait until the request is off the resend queue */
2044 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", NULL);
2045 lck_mtx_unlock(&req->r_mtx);
2046
2047 nfs_request_wait(req);
2048 error = nfs_request_finish(req, nmrepp, status);
2049
2050 while (!error && (req->r_flags & R_RESTART)) {
2051 if (asyncio && req->r_resendtime) { /* send later */
2052 lck_mtx_lock(&req->r_mtx);
2053 nfs_asyncio_resend(req);
2054 lck_mtx_unlock(&req->r_mtx);
2055 return (EINPROGRESS);
2056 }
2057 req->r_error = 0;
2058 req->r_flags &= ~R_RESTART;
2059 if ((error = nfs_request_add_header(req)))
2060 break;
2061 if ((error = nfs_request_send(req, !asyncio)))
2062 break;
2063 if (asyncio)
2064 return (EINPROGRESS);
2065 nfs_request_wait(req);
2066 if ((error = nfs_request_finish(req, nmrepp, status)))
2067 break;
2068 }
2069 if (xidp)
2070 *xidp = req->r_xid;
2071
2072 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error);
2073 nfs_request_rele(req);
2074 return (error);
2075 }
2076
2077 /*
2078 * Cancel a pending asynchronous NFS request.
2079 */
2080 void
2081 nfs_request_async_cancel(struct nfsreq *req)
2082 {
2083 nfs_reqdequeue(req);
2084 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E);
2085 nfs_request_rele(req);
2086 }
2087
2088 /*
2089 * Flag a request as being terminated.
2090 */
2091 static void
2092 nfs_softterm(struct nfsreq *req)
2093 {
2094 struct nfsmount *nmp = req->r_nmp;
2095 req->r_flags |= R_SOFTTERM;
2096 req->r_error = ETIMEDOUT;
2097 if (!(req->r_flags & R_CWND) || !nmp)
2098 return;
2099 /* update congestion window */
2100 req->r_flags &= ~R_CWND;
2101 lck_mtx_lock(&nmp->nm_lock);
2102 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
2103 nmp->nm_sent -= NFS_CWNDSCALE;
2104 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
2105 /* congestion window is open, poke the cwnd queue */
2106 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
2107 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
2108 req2->r_cchain.tqe_next = NFSREQNOLIST;
2109 wakeup(req2);
2110 }
2111 lck_mtx_unlock(&nmp->nm_lock);
2112 }
2113
2114 /*
2115 * Ensure req isn't in use by the timer, then dequeue it.
2116 */
2117 static void
2118 nfs_reqdequeue(struct nfsreq *req)
2119 {
2120 lck_mtx_lock(nfs_request_mutex);
2121 while (req->r_lflags & RL_BUSY) {
2122 req->r_lflags |= RL_WAITING;
2123 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL);
2124 }
2125 if (req->r_lflags & RL_QUEUED) {
2126 TAILQ_REMOVE(&nfs_reqq, req, r_chain);
2127 req->r_lflags &= ~RL_QUEUED;
2128 }
2129 lck_mtx_unlock(nfs_request_mutex);
2130 }
2131
2132 /*
2133 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
2134 * free()'d out from under it.
2135 */
2136 static void
2137 nfs_reqbusy(struct nfsreq *req)
2138 {
2139 if (req->r_lflags & RL_BUSY)
2140 panic("req locked");
2141 req->r_lflags |= RL_BUSY;
2142 }
2143
2144 /*
2145 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
2146 */
2147 static struct nfsreq *
2148 nfs_reqnext(struct nfsreq *req)
2149 {
2150 struct nfsreq * nextreq;
2151
2152 if (req == NULL)
2153 return (NULL);
2154 /*
2155 * We need to get and busy the next req before signalling the
2156 * current one, otherwise wakeup() may block us and we'll race to
2157 * grab the next req.
2158 */
2159 nextreq = TAILQ_NEXT(req, r_chain);
2160 if (nextreq != NULL)
2161 nfs_reqbusy(nextreq);
2162 /* unbusy and signal. */
2163 req->r_lflags &= ~RL_BUSY;
2164 if (req->r_lflags & RL_WAITING) {
2165 req->r_lflags &= ~RL_WAITING;
2166 wakeup(&req->r_lflags);
2167 }
2168 return (nextreq);
2169 }
2170
2171 /*
2172 * NFS request queue timer routine
2173 *
2174 * Scan the NFS request queue for any requests that have timed out.
2175 *
2176 * Alert the system of unresponsive servers.
2177 * Mark expired requests on soft mounts as terminated.
2178 * For UDP, mark/signal requests for retransmission.
2179 */
2180 void
2181 nfs_request_timer(__unused void *param0, __unused void *param1)
2182 {
2183 struct nfsreq *req;
2184 struct nfsmount *nmp;
2185 int timeo, maxtime, finish_asyncio, error;
2186 struct timeval now;
2187 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue;
2188
2189 lck_mtx_lock(nfs_request_mutex);
2190 req = TAILQ_FIRST(&nfs_reqq);
2191 if (req == NULL) { /* no requests - turn timer off */
2192 nfs_request_timer_on = 0;
2193 lck_mtx_unlock(nfs_request_mutex);
2194 return;
2195 }
2196
2197 nfs_reqbusy(req);
2198 TAILQ_INIT(&nfs_mount_poke_queue);
2199
2200 microuptime(&now);
2201 for ( ; req != NULL ; req = nfs_reqnext(req)) {
2202 nmp = req->r_nmp;
2203 if (!nmp) /* unmounted */
2204 continue;
2205 if (req->r_error || req->r_nmrep.nmc_mhead)
2206 continue;
2207 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) {
2208 if (req->r_callback.rcb_func != NULL) {
2209 /* async I/O RPC needs to be finished */
2210 lck_mtx_lock(&req->r_mtx);
2211 req->r_error = error;
2212 finish_asyncio = !(req->r_flags & R_WAITSENT);
2213 wakeup(req);
2214 lck_mtx_unlock(&req->r_mtx);
2215 if (finish_asyncio)
2216 nfs_asyncio_finish(req);
2217 }
2218 continue;
2219 }
2220
2221 lck_mtx_lock(&req->r_mtx);
2222
2223 if (nmp->nm_tprintf_initial_delay &&
2224 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) &&
2225 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
2226 req->r_lastmsg = now.tv_sec;
2227 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
2228 "not responding");
2229 req->r_flags |= R_TPRINTFMSG;
2230 lck_mtx_lock(&nmp->nm_lock);
2231 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
2232 lck_mtx_unlock(&nmp->nm_lock);
2233 /* we're not yet completely mounted and */
2234 /* we can't complete an RPC, so we fail */
2235 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
2236 nfs_softterm(req);
2237 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
2238 wakeup(req);
2239 lck_mtx_unlock(&req->r_mtx);
2240 if (finish_asyncio)
2241 nfs_asyncio_finish(req);
2242 continue;
2243 }
2244 lck_mtx_unlock(&nmp->nm_lock);
2245 }
2246
2247 /*
2248 * Put a reasonable limit on the maximum timeout,
2249 * and reduce that limit when soft mounts get timeouts or are in reconnect.
2250 */
2251 if (!(nmp->nm_flag & NFSMNT_SOFT))
2252 maxtime = NFS_MAXTIMEO;
2253 else if ((req->r_flags & R_SETUP) || ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
2254 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2;
2255 else
2256 maxtime = NFS_MINTIMEO/4;
2257
2258 /*
2259 * Check for request timeout.
2260 */
2261 if (req->r_rtt >= 0) {
2262 req->r_rtt++;
2263 lck_mtx_lock(&nmp->nm_lock);
2264 if (req->r_flags & R_RESENDERR) {
2265 /* with resend errors, retry every few seconds */
2266 timeo = 4*hz;
2267 } else {
2268 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL)
2269 timeo = NFS_MINIDEMTIMEO; // gss context setup
2270 else if (nmp->nm_flag & NFSMNT_DUMBTIMR)
2271 timeo = nmp->nm_timeo;
2272 else
2273 timeo = NFS_RTO(nmp, proct[req->r_procnum]);
2274
2275 /* ensure 62.5 ms floor */
2276 while (16 * timeo < hz)
2277 timeo *= 2;
2278 if (nmp->nm_timeouts > 0)
2279 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
2280 }
2281 /* limit timeout to max */
2282 if (timeo > maxtime)
2283 timeo = maxtime;
2284 if (req->r_rtt <= timeo) {
2285 lck_mtx_unlock(&nmp->nm_lock);
2286 lck_mtx_unlock(&req->r_mtx);
2287 continue;
2288 }
2289 /* The request has timed out */
2290 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
2291 req->r_procnum, proct[req->r_procnum],
2292 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts,
2293 (now.tv_sec - req->r_start)*NFS_HZ, maxtime));
2294 if (nmp->nm_timeouts < 8)
2295 nmp->nm_timeouts++;
2296 /* if it's been a few seconds, try poking the socket */
2297 if ((nmp->nm_sotype == SOCK_STREAM) &&
2298 ((now.tv_sec - req->r_start) >= 3) &&
2299 !(nmp->nm_sockflags & NMSOCK_POKE)) {
2300 nmp->nm_sockflags |= NMSOCK_POKE;
2301 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq);
2302 }
2303 lck_mtx_unlock(&nmp->nm_lock);
2304 }
2305
2306 /* For soft mounts (& SETUPs), check for too many retransmits/timeout. */
2307 if (((nmp->nm_flag & NFSMNT_SOFT) || (req->r_flags & R_SETUP)) &&
2308 ((req->r_rexmit >= req->r_retry) || /* too many */
2309 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */
2310 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
2311 lck_mtx_lock(&nmp->nm_lock);
2312 if (!(nmp->nm_state & NFSSTA_TIMEO)) {
2313 lck_mtx_unlock(&nmp->nm_lock);
2314 /* make sure we note the unresponsive server */
2315 /* (maxtime may be less than tprintf delay) */
2316 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
2317 "not responding");
2318 req->r_lastmsg = now.tv_sec;
2319 req->r_flags |= R_TPRINTFMSG;
2320 } else {
2321 lck_mtx_unlock(&nmp->nm_lock);
2322 }
2323 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
2324 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt,
2325 now.tv_sec - req->r_start));
2326 nfs_softterm(req);
2327 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
2328 wakeup(req);
2329 lck_mtx_unlock(&req->r_mtx);
2330 if (finish_asyncio)
2331 nfs_asyncio_finish(req);
2332 continue;
2333 }
2334
2335 /* for TCP, only resend if explicitly requested */
2336 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) {
2337 if (++req->r_rexmit > NFS_MAXREXMIT)
2338 req->r_rexmit = NFS_MAXREXMIT;
2339 req->r_rtt = 0;
2340 lck_mtx_unlock(&req->r_mtx);
2341 continue;
2342 }
2343
2344 /*
2345 * The request needs to be (re)sent. Kick the requester to resend it.
2346 * (unless it's already marked as needing a resend)
2347 */
2348 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) {
2349 lck_mtx_unlock(&req->r_mtx);
2350 continue;
2351 }
2352 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
2353 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
2354 req->r_flags |= R_MUSTRESEND;
2355 req->r_rtt = -1;
2356 wakeup(req);
2357 if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
2358 nfs_asyncio_resend(req);
2359 lck_mtx_unlock(&req->r_mtx);
2360 }
2361
2362 lck_mtx_unlock(nfs_request_mutex);
2363
2364 /* poke any sockets */
2365 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) {
2366 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
2367 nfs_sock_poke(nmp);
2368 lck_mtx_lock(&nmp->nm_lock);
2369 nmp->nm_sockflags &= ~NMSOCK_POKE;
2370 if (!(nmp->nm_state & NFSSTA_MOUNTED))
2371 wakeup(&nmp->nm_sockflags);
2372 lck_mtx_unlock(&nmp->nm_lock);
2373 }
2374
2375 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY);
2376 }
2377
2378 /*
2379 * check a thread's proc for the "noremotehang" flag.
2380 */
2381 int
2382 nfs_noremotehang(thread_t thd)
2383 {
2384 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
2385 return (p && proc_noremotehang(p));
2386 }
2387
2388 /*
2389 * Test for a termination condition pending on the process.
2390 * This is used to determine if we need to bail on a mount.
2391 * ETIMEDOUT is returned if there has been a soft timeout.
2392 * EINTR is returned if there is a signal pending that is not being ignored
2393 * and the mount is interruptable, or if we are a thread that is in the process
2394 * of cancellation (also SIGKILL posted).
2395 */
2396 int
2397 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked)
2398 {
2399 int error = 0;
2400
2401 if (nmp == NULL)
2402 return (ENXIO);
2403
2404 if (req && (req->r_flags & R_SOFTTERM))
2405 return (ETIMEDOUT); /* request has been terminated. */
2406
2407 /*
2408 * If we're in the progress of a force unmount and there's
2409 * been a timeout, we're dead and fail IO.
2410 */
2411 if (!nmplocked)
2412 lck_mtx_lock(&nmp->nm_lock);
2413 if ((nmp->nm_state & NFSSTA_FORCE) &&
2414 (nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO|NFSSTA_LOCKTIMEO))) {
2415 error = EIO;
2416 } else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) {
2417 /* Someone is unmounting us, go soft and mark it. */
2418 nmp->nm_flag |= NFSMNT_SOFT;
2419 nmp->nm_state |= NFSSTA_FORCE;
2420 }
2421
2422 /*
2423 * If the mount is hung and we've requested not to hang
2424 * on remote filesystems, then bail now.
2425 */
2426 if (!error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd))
2427 error = EIO;
2428
2429 if (!nmplocked)
2430 lck_mtx_unlock(&nmp->nm_lock);
2431 if (error)
2432 return (error);
2433
2434 /* may not have a thread for async I/O */
2435 if (thd == NULL)
2436 return (0);
2437
2438 /* If this thread belongs to kernel task; then abort check is not needed */
2439 if ((current_proc() != kernproc) && current_thread_aborted())
2440 return (EINTR);
2441
2442 /* mask off thread and process blocked signals. */
2443 if ((nmp->nm_flag & NFSMNT_INT) &&
2444 proc_pendingsignals(get_bsdthreadtask_info(thd), NFSINT_SIGMASK))
2445 return (EINTR);
2446 return (0);
2447 }
2448
2449 /*
2450 * Lock a socket against others.
2451 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2452 * and also to avoid race conditions between the processes with nfs requests
2453 * in progress when a reconnect is necessary.
2454 */
2455 int
2456 nfs_sndlock(struct nfsreq *req)
2457 {
2458 struct nfsmount *nmp = req->r_nmp;
2459 int *statep;
2460 int error = 0, slpflag = 0;
2461 struct timespec ts = { 0, 0 };
2462
2463 if (nmp == NULL)
2464 return (ENXIO);
2465
2466 lck_mtx_lock(&nmp->nm_lock);
2467 statep = &nmp->nm_state;
2468
2469 if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
2470 slpflag = PCATCH;
2471 while (*statep & NFSSTA_SNDLOCK) {
2472 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1)))
2473 break;
2474 *statep |= NFSSTA_WANTSND;
2475 if (nfs_noremotehang(req->r_thread))
2476 ts.tv_sec = 1;
2477 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts);
2478 if (slpflag == PCATCH) {
2479 slpflag = 0;
2480 ts.tv_sec = 2;
2481 }
2482 }
2483 if (!error)
2484 *statep |= NFSSTA_SNDLOCK;
2485 lck_mtx_unlock(&nmp->nm_lock);
2486 return (error);
2487 }
2488
2489 /*
2490 * Unlock the stream socket for others.
2491 */
2492 void
2493 nfs_sndunlock(struct nfsreq *req)
2494 {
2495 struct nfsmount *nmp = req->r_nmp;
2496 int *statep, wake = 0;
2497
2498 if (nmp == NULL)
2499 return;
2500 lck_mtx_lock(&nmp->nm_lock);
2501 statep = &nmp->nm_state;
2502 if ((*statep & NFSSTA_SNDLOCK) == 0)
2503 panic("nfs sndunlock");
2504 *statep &= ~NFSSTA_SNDLOCK;
2505 if (*statep & NFSSTA_WANTSND) {
2506 *statep &= ~NFSSTA_WANTSND;
2507 wake = 1;
2508 }
2509 lck_mtx_unlock(&nmp->nm_lock);
2510 if (wake)
2511 wakeup(statep);
2512 }
2513
2514 #endif /* NFSCLIENT */
2515
2516 #if NFSSERVER
2517
2518 /*
2519 * Generate the rpc reply header
2520 * siz arg. is used to decide if adding a cluster is worthwhile
2521 */
2522 int
2523 nfsrv_rephead(
2524 struct nfsrv_descript *nd,
2525 __unused struct nfsrv_sock *slp,
2526 struct nfsm_chain *nmrepp,
2527 size_t siz)
2528 {
2529 mbuf_t mrep;
2530 u_long *tl;
2531 struct nfsm_chain nmrep;
2532 int err, error;
2533
2534 err = nd->nd_repstat;
2535 if (err && (nd->nd_vers == NFS_VER2))
2536 siz = 0;
2537
2538 /*
2539 * If this is a big reply, use a cluster else
2540 * try and leave leading space for the lower level headers.
2541 */
2542 siz += RPC_REPLYSIZ;
2543 if (siz >= nfs_mbuf_minclsize) {
2544 error = mbuf_getpacket(MBUF_WAITOK, &mrep);
2545 } else {
2546 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
2547 }
2548 if (error) {
2549 /* unable to allocate packet */
2550 /* XXX should we keep statistics for these errors? */
2551 return (error);
2552 }
2553 if (siz < nfs_mbuf_minclsize) {
2554 /* leave space for lower level headers */
2555 tl = mbuf_data(mrep);
2556 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */
2557 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
2558 }
2559 nfsm_chain_init(&nmrep, mrep);
2560 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
2561 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
2562 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
2563 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
2564 if (err & NFSERR_AUTHERR) {
2565 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
2566 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
2567 } else {
2568 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
2569 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2570 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2571 }
2572 } else {
2573 /* reply status */
2574 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
2575 if (nd->nd_gss_context != NULL) {
2576 /* RPCSEC_GSS verifier */
2577 error = nfs_gss_svc_verf_put(nd, &nmrep);
2578 if (error) {
2579 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
2580 goto done;
2581 }
2582 } else {
2583 /* RPCAUTH_NULL verifier */
2584 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
2585 nfsm_chain_add_32(error, &nmrep, 0);
2586 }
2587 /* accepted status */
2588 switch (err) {
2589 case EPROGUNAVAIL:
2590 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
2591 break;
2592 case EPROGMISMATCH:
2593 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
2594 /* XXX hard coded versions? */
2595 nfsm_chain_add_32(error, &nmrep, NFS_VER2);
2596 nfsm_chain_add_32(error, &nmrep, NFS_VER3);
2597 break;
2598 case EPROCUNAVAIL:
2599 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
2600 break;
2601 case EBADRPC:
2602 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
2603 break;
2604 default:
2605 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
2606 if (nd->nd_gss_context != NULL)
2607 error = nfs_gss_svc_prepare_reply(nd, &nmrep);
2608 if (err != NFSERR_RETVOID)
2609 nfsm_chain_add_32(error, &nmrep,
2610 (err ? nfsrv_errmap(nd, err) : 0));
2611 break;
2612 }
2613 }
2614
2615 done:
2616 nfsm_chain_build_done(error, &nmrep);
2617 if (error) {
2618 /* error composing reply header */
2619 /* XXX should we keep statistics for these errors? */
2620 mbuf_freem(mrep);
2621 return (error);
2622 }
2623
2624 *nmrepp = nmrep;
2625 if ((err != 0) && (err != NFSERR_RETVOID))
2626 OSAddAtomic(1, (SInt32*)&nfsstats.srvrpc_errs);
2627 return (0);
2628 }
2629
2630 /*
2631 * The nfs server send routine.
2632 *
2633 * - return EINTR or ERESTART if interrupted by a signal
2634 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
2635 * - do any cleanup required by recoverable socket errors (???)
2636 */
2637 int
2638 nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
2639 {
2640 int error;
2641 socket_t so = slp->ns_so;
2642 struct sockaddr *sendnam;
2643 struct msghdr msg;
2644
2645 bzero(&msg, sizeof(msg));
2646 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
2647 if ((sendnam = mbuf_data(nam))) {
2648 msg.msg_name = (caddr_t)sendnam;
2649 msg.msg_namelen = sendnam->sa_len;
2650 }
2651 }
2652 error = sock_sendmbuf(so, &msg, top, 0, NULL);
2653 if (!error)
2654 return (0);
2655 log(LOG_INFO, "nfsd send error %d\n", error);
2656
2657 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM))
2658 error = EPIPE; /* zap TCP sockets if they time out on send */
2659
2660 /* Handle any recoverable (soft) socket errors here. (???) */
2661 if (error != EINTR && error != ERESTART && error != EIO &&
2662 error != EWOULDBLOCK && error != EPIPE)
2663 error = 0;
2664
2665 return (error);
2666 }
2667
2668 /*
2669 * Socket upcall routine for the nfsd sockets.
2670 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
2671 * Essentially do as much as possible non-blocking, else punt and it will
2672 * be called with MBUF_WAITOK from an nfsd.
2673 */
2674 void
2675 nfsrv_rcv(socket_t so, caddr_t arg, int waitflag)
2676 {
2677 struct nfsrv_sock *slp = (struct nfsrv_sock *)arg;
2678
2679 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID))
2680 return;
2681
2682 lck_rw_lock_exclusive(&slp->ns_rwlock);
2683 nfsrv_rcv_locked(so, slp, waitflag);
2684 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
2685 }
2686 void
2687 nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
2688 {
2689 mbuf_t m, mp, mhck, m2;
2690 int ns_flag=0, error;
2691 struct msghdr msg;
2692 size_t bytes_read;
2693
2694 if ((slp->ns_flag & SLP_VALID) == 0) {
2695 if (waitflag == MBUF_DONTWAIT)
2696 lck_rw_done(&slp->ns_rwlock);
2697 return;
2698 }
2699
2700 #ifdef notdef
2701 /*
2702 * Define this to test for nfsds handling this under heavy load.
2703 */
2704 if (waitflag == MBUF_DONTWAIT) {
2705 ns_flag = SLP_NEEDQ;
2706 goto dorecs;
2707 }
2708 #endif
2709 if (slp->ns_sotype == SOCK_STREAM) {
2710 /*
2711 * If there are already records on the queue, defer soreceive()
2712 * to an nfsd so that there is feedback to the TCP layer that
2713 * the nfs servers are heavily loaded.
2714 */
2715 if (slp->ns_rec && waitflag == MBUF_DONTWAIT) {
2716 ns_flag = SLP_NEEDQ;
2717 goto dorecs;
2718 }
2719
2720 /*
2721 * Do soreceive().
2722 */
2723 bytes_read = 1000000000;
2724 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
2725 if (error || mp == NULL) {
2726 if (error == EWOULDBLOCK)
2727 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
2728 else
2729 ns_flag = SLP_DISCONN;
2730 goto dorecs;
2731 }
2732 m = mp;
2733 if (slp->ns_rawend) {
2734 if ((error = mbuf_setnext(slp->ns_rawend, m)))
2735 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
2736 slp->ns_cc += bytes_read;
2737 } else {
2738 slp->ns_raw = m;
2739 slp->ns_cc = bytes_read;
2740 }
2741 while ((m2 = mbuf_next(m)))
2742 m = m2;
2743 slp->ns_rawend = m;
2744
2745 /*
2746 * Now try and parse record(s) out of the raw stream data.
2747 */
2748 error = nfsrv_getstream(slp, waitflag);
2749 if (error) {
2750 if (error == EPERM)
2751 ns_flag = SLP_DISCONN;
2752 else
2753 ns_flag = SLP_NEEDQ;
2754 }
2755 } else {
2756 struct sockaddr_storage nam;
2757
2758 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
2759 /* already have max # RPC records queued on this socket */
2760 ns_flag = SLP_NEEDQ;
2761 goto dorecs;
2762 }
2763
2764 bzero(&msg, sizeof(msg));
2765 msg.msg_name = (caddr_t)&nam;
2766 msg.msg_namelen = sizeof(nam);
2767
2768 do {
2769 bytes_read = 1000000000;
2770 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
2771 if (mp) {
2772 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
2773 mbuf_setlen(mhck, nam.ss_len);
2774 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
2775 m = mhck;
2776 if (mbuf_setnext(m, mp)) {
2777 /* trouble... just drop it */
2778 printf("nfsrv_rcv: mbuf_setnext failed\n");
2779 mbuf_free(mhck);
2780 m = mp;
2781 }
2782 } else {
2783 m = mp;
2784 }
2785 if (slp->ns_recend)
2786 mbuf_setnextpkt(slp->ns_recend, m);
2787 else {
2788 slp->ns_rec = m;
2789 slp->ns_flag |= SLP_DOREC;
2790 }
2791 slp->ns_recend = m;
2792 mbuf_setnextpkt(m, NULL);
2793 slp->ns_reccnt++;
2794 }
2795 } while (mp);
2796 }
2797
2798 /*
2799 * Now try and process the request records, non-blocking.
2800 */
2801 dorecs:
2802 if (ns_flag)
2803 slp->ns_flag |= ns_flag;
2804 if (waitflag == MBUF_DONTWAIT) {
2805 int wake = (slp->ns_flag & SLP_WORKTODO);
2806 lck_rw_done(&slp->ns_rwlock);
2807 if (wake && nfsd_thread_count) {
2808 lck_mtx_lock(nfsd_mutex);
2809 nfsrv_wakenfsd(slp);
2810 lck_mtx_unlock(nfsd_mutex);
2811 }
2812 }
2813 }
2814
2815 /*
2816 * Try and extract an RPC request from the mbuf data list received on a
2817 * stream socket. The "waitflag" argument indicates whether or not it
2818 * can sleep.
2819 */
2820 static int
2821 nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
2822 {
2823 mbuf_t m;
2824 char *cp1, *cp2, *mdata;
2825 int len, mlen, error;
2826 mbuf_t om, m2, recm;
2827 u_long recmark;
2828
2829 if (slp->ns_flag & SLP_GETSTREAM)
2830 panic("nfs getstream");
2831 slp->ns_flag |= SLP_GETSTREAM;
2832 for (;;) {
2833 if (slp->ns_reclen == 0) {
2834 if (slp->ns_cc < NFSX_UNSIGNED) {
2835 slp->ns_flag &= ~SLP_GETSTREAM;
2836 return (0);
2837 }
2838 m = slp->ns_raw;
2839 mdata = mbuf_data(m);
2840 mlen = mbuf_len(m);
2841 if (mlen >= NFSX_UNSIGNED) {
2842 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
2843 mdata += NFSX_UNSIGNED;
2844 mlen -= NFSX_UNSIGNED;
2845 mbuf_setdata(m, mdata, mlen);
2846 } else {
2847 cp1 = (caddr_t)&recmark;
2848 cp2 = mdata;
2849 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2850 while (mlen == 0) {
2851 m = mbuf_next(m);
2852 cp2 = mbuf_data(m);
2853 mlen = mbuf_len(m);
2854 }
2855 *cp1++ = *cp2++;
2856 mlen--;
2857 mbuf_setdata(m, cp2, mlen);
2858 }
2859 }
2860 slp->ns_cc -= NFSX_UNSIGNED;
2861 recmark = ntohl(recmark);
2862 slp->ns_reclen = recmark & ~0x80000000;
2863 if (recmark & 0x80000000)
2864 slp->ns_flag |= SLP_LASTFRAG;
2865 else
2866 slp->ns_flag &= ~SLP_LASTFRAG;
2867 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
2868 slp->ns_flag &= ~SLP_GETSTREAM;
2869 return (EPERM);
2870 }
2871 }
2872
2873 /*
2874 * Now get the record part.
2875 *
2876 * Note that slp->ns_reclen may be 0. Linux sometimes
2877 * generates 0-length RPCs
2878 */
2879 recm = NULL;
2880 if (slp->ns_cc == slp->ns_reclen) {
2881 recm = slp->ns_raw;
2882 slp->ns_raw = slp->ns_rawend = NULL;
2883 slp->ns_cc = slp->ns_reclen = 0;
2884 } else if (slp->ns_cc > slp->ns_reclen) {
2885 len = 0;
2886 m = slp->ns_raw;
2887 mlen = mbuf_len(m);
2888 mdata = mbuf_data(m);
2889 om = NULL;
2890 while (len < slp->ns_reclen) {
2891 if ((len + mlen) > slp->ns_reclen) {
2892 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
2893 slp->ns_flag &= ~SLP_GETSTREAM;
2894 return (EWOULDBLOCK);
2895 }
2896 if (om) {
2897 if (mbuf_setnext(om, m2)) {
2898 /* trouble... just drop it */
2899 printf("nfsrv_getstream: mbuf_setnext failed\n");
2900 mbuf_freem(m2);
2901 slp->ns_flag &= ~SLP_GETSTREAM;
2902 return (EWOULDBLOCK);
2903 }
2904 recm = slp->ns_raw;
2905 } else {
2906 recm = m2;
2907 }
2908 mdata += slp->ns_reclen - len;
2909 mlen -= slp->ns_reclen - len;
2910 mbuf_setdata(m, mdata, mlen);
2911 len = slp->ns_reclen;
2912 } else if ((len + mlen) == slp->ns_reclen) {
2913 om = m;
2914 len += mlen;
2915 m = mbuf_next(m);
2916 recm = slp->ns_raw;
2917 if (mbuf_setnext(om, NULL)) {
2918 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
2919 slp->ns_flag &= ~SLP_GETSTREAM;
2920 return (EWOULDBLOCK);
2921 }
2922 mlen = mbuf_len(m);
2923 mdata = mbuf_data(m);
2924 } else {
2925 om = m;
2926 len += mlen;
2927 m = mbuf_next(m);
2928 mlen = mbuf_len(m);
2929 mdata = mbuf_data(m);
2930 }
2931 }
2932 slp->ns_raw = m;
2933 slp->ns_cc -= len;
2934 slp->ns_reclen = 0;
2935 } else {
2936 slp->ns_flag &= ~SLP_GETSTREAM;
2937 return (0);
2938 }
2939
2940 /*
2941 * Accumulate the fragments into a record.
2942 */
2943 if (slp->ns_frag == NULL) {
2944 slp->ns_frag = recm;
2945 } else {
2946 m = slp->ns_frag;
2947 while ((m2 = mbuf_next(m)))
2948 m = m2;
2949 if ((error = mbuf_setnext(m, recm)))
2950 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
2951 }
2952 if (slp->ns_flag & SLP_LASTFRAG) {
2953 if (slp->ns_recend)
2954 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
2955 else {
2956 slp->ns_rec = slp->ns_frag;
2957 slp->ns_flag |= SLP_DOREC;
2958 }
2959 slp->ns_recend = slp->ns_frag;
2960 slp->ns_frag = NULL;
2961 }
2962 }
2963 }
2964
2965 /*
2966 * Parse an RPC header.
2967 */
2968 int
2969 nfsrv_dorec(
2970 struct nfsrv_sock *slp,
2971 struct nfsd *nfsd,
2972 struct nfsrv_descript **ndp)
2973 {
2974 mbuf_t m;
2975 mbuf_t nam;
2976 struct nfsrv_descript *nd;
2977 int error = 0;
2978
2979 *ndp = NULL;
2980 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL))
2981 return (ENOBUFS);
2982 MALLOC_ZONE(nd, struct nfsrv_descript *,
2983 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2984 if (!nd)
2985 return (ENOMEM);
2986 m = slp->ns_rec;
2987 slp->ns_rec = mbuf_nextpkt(m);
2988 if (slp->ns_rec)
2989 mbuf_setnextpkt(m, NULL);
2990 else {
2991 slp->ns_flag &= ~SLP_DOREC;
2992 slp->ns_recend = NULL;
2993 }
2994 slp->ns_reccnt--;
2995 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
2996 nam = m;
2997 m = mbuf_next(m);
2998 if ((error = mbuf_setnext(nam, NULL)))
2999 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
3000 } else
3001 nam = NULL;
3002 nd->nd_nam2 = nam;
3003 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
3004 if (!error)
3005 error = nfsrv_getreq(nd);
3006 if (error) {
3007 if (nam)
3008 mbuf_freem(nam);
3009 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
3010 return (error);
3011 }
3012 nd->nd_mrep = NULL;
3013 *ndp = nd;
3014 nfsd->nfsd_nd = nd;
3015 return (0);
3016 }
3017
3018 /*
3019 * Parse an RPC request
3020 * - verify it
3021 * - fill in the cred struct.
3022 */
3023 static int
3024 nfsrv_getreq(struct nfsrv_descript *nd)
3025 {
3026 struct nfsm_chain *nmreq;
3027 int len, i;
3028 u_long nfsvers, auth_type;
3029 int error = 0;
3030 uid_t user_id;
3031 gid_t group_id;
3032 int ngroups;
3033 struct ucred temp_cred;
3034 uint32_t val;
3035
3036 nd->nd_cr = NULL;
3037 nd->nd_gss_context = NULL;
3038 nd->nd_gss_seqnum = 0;
3039 nd->nd_gss_mb = NULL;
3040
3041 user_id = group_id = -2;
3042 val = auth_type = len = 0;
3043
3044 nmreq = &nd->nd_nmreq;
3045 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
3046 nfsm_chain_get_32(error, nmreq, val); // RPC Call
3047 if (!error && (val != RPC_CALL))
3048 error = EBADRPC;
3049 nfsmout_if(error);
3050 nd->nd_repstat = 0;
3051 nfsm_chain_get_32(error, nmreq, val); // RPC Version
3052 nfsmout_if(error);
3053 if (val != RPC_VER2) {
3054 nd->nd_repstat = ERPCMISMATCH;
3055 nd->nd_procnum = NFSPROC_NOOP;
3056 return (0);
3057 }
3058 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number
3059 nfsmout_if(error);
3060 if (val != NFS_PROG) {
3061 nd->nd_repstat = EPROGUNAVAIL;
3062 nd->nd_procnum = NFSPROC_NOOP;
3063 return (0);
3064 }
3065 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
3066 nfsmout_if(error);
3067 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
3068 nd->nd_repstat = EPROGMISMATCH;
3069 nd->nd_procnum = NFSPROC_NOOP;
3070 return (0);
3071 }
3072 nd->nd_vers = nfsvers;
3073 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
3074 nfsmout_if(error);
3075 if ((nd->nd_procnum >= NFS_NPROCS) ||
3076 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
3077 nd->nd_repstat = EPROCUNAVAIL;
3078 nd->nd_procnum = NFSPROC_NOOP;
3079 return (0);
3080 }
3081 if (nfsvers != NFS_VER3)
3082 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
3083 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor
3084 nfsm_chain_get_32(error, nmreq, len); // Auth Length
3085 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ))
3086 error = EBADRPC;
3087 nfsmout_if(error);
3088
3089 /* Handle authentication */
3090 if (auth_type == RPCAUTH_UNIX) {
3091 if (nd->nd_procnum == NFSPROC_NULL)
3092 return (0);
3093 nd->nd_sec = RPCAUTH_UNIX;
3094 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp
3095 nfsm_chain_get_32(error, nmreq, len); // hostname length
3096 if (len < 0 || len > NFS_MAXNAMLEN)
3097 error = EBADRPC;
3098 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname
3099 nfsmout_if(error);
3100
3101 /* create a temporary credential using the bits from the wire */
3102 bzero(&temp_cred, sizeof(temp_cred));
3103 nfsm_chain_get_32(error, nmreq, user_id);
3104 nfsm_chain_get_32(error, nmreq, group_id);
3105 temp_cred.cr_groups[0] = group_id;
3106 nfsm_chain_get_32(error, nmreq, len); // extra GID count
3107 if ((len < 0) || (len > RPCAUTH_UNIXGIDS))
3108 error = EBADRPC;
3109 nfsmout_if(error);
3110 for (i = 1; i <= len; i++)
3111 if (i < NGROUPS)
3112 nfsm_chain_get_32(error, nmreq, temp_cred.cr_groups[i]);
3113 else
3114 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
3115 nfsmout_if(error);
3116 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
3117 if (ngroups > 1)
3118 nfsrv_group_sort(&temp_cred.cr_groups[0], ngroups);
3119 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
3120 nfsm_chain_get_32(error, nmreq, len); // verifier length
3121 if (len < 0 || len > RPCAUTH_MAXSIZ)
3122 error = EBADRPC;
3123 if (len > 0)
3124 nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
3125
3126 /* request creation of a real credential */
3127 temp_cred.cr_uid = user_id;
3128 temp_cred.cr_ngroups = ngroups;
3129 nd->nd_cr = kauth_cred_create(&temp_cred);
3130 if (nd->nd_cr == NULL) {
3131 nd->nd_repstat = ENOMEM;
3132 nd->nd_procnum = NFSPROC_NOOP;
3133 return (0);
3134 }
3135 } else if (auth_type == RPCSEC_GSS) {
3136 error = nfs_gss_svc_cred_get(nd, nmreq);
3137 if (error) {
3138 if (error == EINVAL)
3139 goto nfsmout; // drop the request
3140 nd->nd_repstat = error;
3141 nd->nd_procnum = NFSPROC_NOOP;
3142 return (0);
3143 }
3144 } else {
3145 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE
3146 return (0);
3147 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
3148 nd->nd_procnum = NFSPROC_NOOP;
3149 return (0);
3150 }
3151 return (0);
3152 nfsmout:
3153 if (IS_VALID_CRED(nd->nd_cr))
3154 kauth_cred_unref(&nd->nd_cr);
3155 nfsm_chain_cleanup(nmreq);
3156 return (error);
3157 }
3158
3159 /*
3160 * Search for a sleeping nfsd and wake it up.
3161 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
3162 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
3163 * Note: Must be called with nfsd_mutex held.
3164 */
3165 void
3166 nfsrv_wakenfsd(struct nfsrv_sock *slp)
3167 {
3168 struct nfsd *nd;
3169
3170 if ((slp->ns_flag & SLP_VALID) == 0)
3171 return;
3172
3173 lck_rw_lock_exclusive(&slp->ns_rwlock);
3174 /* if there's work to do on this socket, make sure it's queued up */
3175 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
3176 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
3177 slp->ns_flag |= SLP_WAITQ;
3178 }
3179 lck_rw_done(&slp->ns_rwlock);
3180
3181 /* wake up a waiting nfsd, if possible */
3182 nd = TAILQ_FIRST(&nfsd_queue);
3183 if (!nd)
3184 return;
3185
3186 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
3187 nd->nfsd_flag &= ~NFSD_WAITING;
3188 wakeup(nd);
3189 }
3190
3191 #endif /* NFSSERVER */
3192
3193 static int
3194 nfs_msg(thread_t thd,
3195 const char *server,
3196 const char *msg,
3197 int error)
3198 {
3199 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
3200 tpr_t tpr;
3201
3202 if (p)
3203 tpr = tprintf_open(p);
3204 else
3205 tpr = NULL;
3206 if (error)
3207 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error);
3208 else
3209 tprintf(tpr, "nfs server %s: %s\n", server, msg);
3210 tprintf_close(tpr);
3211 return (0);
3212 }
3213
3214 void
3215 nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg)
3216 {
3217 int ostate;
3218
3219 if (nmp == NULL)
3220 return;
3221
3222 lck_mtx_lock(&nmp->nm_lock);
3223 ostate = nmp->nm_state;
3224 if ((flags & NFSSTA_TIMEO) && !(ostate & NFSSTA_TIMEO))
3225 nmp->nm_state |= NFSSTA_TIMEO;
3226 if ((flags & NFSSTA_LOCKTIMEO) && !(ostate & NFSSTA_LOCKTIMEO))
3227 nmp->nm_state |= NFSSTA_LOCKTIMEO;
3228 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(ostate & NFSSTA_JUKEBOXTIMEO))
3229 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO;
3230 lck_mtx_unlock(&nmp->nm_lock);
3231
3232 if (!(ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)))
3233 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 0);
3234
3235 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
3236 }
3237
3238 void
3239 nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
3240 {
3241 int ostate, state;
3242
3243 if (nmp == NULL)
3244 return;
3245
3246 if (msg)
3247 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
3248
3249 lck_mtx_lock(&nmp->nm_lock);
3250 ostate = nmp->nm_state;
3251 if ((flags & NFSSTA_TIMEO) && (ostate & NFSSTA_TIMEO))
3252 nmp->nm_state &= ~NFSSTA_TIMEO;
3253 if ((flags & NFSSTA_LOCKTIMEO) && (ostate & NFSSTA_LOCKTIMEO))
3254 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
3255 if ((flags & NFSSTA_JUKEBOXTIMEO) && (ostate & NFSSTA_JUKEBOXTIMEO))
3256 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO;
3257 state = nmp->nm_state;
3258 lck_mtx_unlock(&nmp->nm_lock);
3259
3260 if ((ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)) &&
3261 !(state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)))
3262 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
3263 }
3264