]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
xnu-1228.0.2.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66 */
67
68 /*
69 * Socket operations for use by nfs
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/proc.h>
75 #include <sys/kauth.h>
76 #include <sys/mount_internal.h>
77 #include <sys/kernel.h>
78 #include <sys/kpi_mbuf.h>
79 #include <sys/malloc.h>
80 #include <sys/vnode.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/socket.h>
84 #include <sys/syslog.h>
85 #include <sys/tprintf.h>
86 #include <sys/uio_internal.h>
87 #include <libkern/OSAtomic.h>
88
89 #include <sys/time.h>
90 #include <kern/clock.h>
91 #include <kern/task.h>
92 #include <kern/thread.h>
93 #include <kern/thread_call.h>
94 #include <sys/user.h>
95
96 #include <netinet/in.h>
97 #include <netinet/tcp.h>
98
99 #include <nfs/rpcv2.h>
100 #include <nfs/nfsproto.h>
101 #include <nfs/nfs.h>
102 #include <nfs/xdr_subs.h>
103 #include <nfs/nfsm_subs.h>
104 #include <nfs/nfs_gss.h>
105 #include <nfs/nfsmount.h>
106 #include <nfs/nfsnode.h>
107
108 /* XXX */
109 boolean_t current_thread_aborted(void);
110 kern_return_t thread_terminate(thread_t);
111
112
113 #if NFSSERVER
114 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
115
116 static int nfsrv_getstream(struct nfsrv_sock *,int);
117 static int nfsrv_getreq(struct nfsrv_descript *);
118 extern int nfsv3_procid[NFS_NPROCS];
119 #endif /* NFSSERVER */
120
121 #if NFSCLIENT
122
123 static int nfs_connect_setup(struct nfsmount *);
124 static void nfs_reqdequeue(struct nfsreq *);
125 static void nfs_udp_rcv(socket_t, void*, int);
126 static void nfs_tcp_rcv(socket_t, void*, int);
127 static void nfs_request_match_reply(struct nfsmount *, mbuf_t);
128 static void nfs_softterm(struct nfsreq *);
129
130 #ifdef NFS_SOCKET_DEBUGGING
131 #define NFS_SOCK_DBG(X) printf X
132 #else
133 #define NFS_SOCK_DBG(X)
134 #endif
135
136 /*
137 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
138 * Use the mean and mean deviation of rtt for the appropriate type of rpc
139 * for the frequent rpcs and a default for the others.
140 * The justification for doing "other" this way is that these rpcs
141 * happen so infrequently that timer est. would probably be stale.
142 * Also, since many of these rpcs are
143 * non-idempotent, a conservative timeout is desired.
144 * getattr, lookup - A+2D
145 * read, write - A+4D
146 * other - nm_timeo
147 */
148 #define NFS_RTO(n, t) \
149 ((t) == 0 ? (n)->nm_timeo : \
150 ((t) < 3 ? \
151 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
152 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
153 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
154 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
155
156 /*
157 * Defines which timer to use for the procnum.
158 * 0 - default
159 * 1 - getattr
160 * 2 - lookup
161 * 3 - read
162 * 4 - write
163 */
164 static int proct[NFS_NPROCS] = {
165 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
166 };
167
168 /*
169 * There is a congestion window for outstanding rpcs maintained per mount
170 * point. The cwnd size is adjusted in roughly the way that:
171 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
172 * SIGCOMM '88". ACM, August 1988.
173 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
174 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
175 * of rpcs is in progress.
176 * (The sent count and cwnd are scaled for integer arith.)
177 * Variants of "slow start" were tried and were found to be too much of a
178 * performance hit (ave. rtt 3 times larger),
179 * I suspect due to the large rtt that nfs rpcs have.
180 */
181 #define NFS_CWNDSCALE 256
182 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
183 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
184
185 /*
186 * Initialize socket state and perform setup for a new NFS connection.
187 */
188 int
189 nfs_connect(struct nfsmount *nmp)
190 {
191 socket_t so;
192 int error, on = 1, proto;
193 sock_upcall upcall;
194 struct sockaddr *saddr;
195 struct sockaddr_in sin;
196 struct timeval timeo;
197 u_short tport;
198
199 lck_mtx_lock(&nmp->nm_lock);
200 nmp->nm_sockflags |= NMSOCK_CONNECTING;
201 saddr = mbuf_data(nmp->nm_nam);
202 upcall = (nmp->nm_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
203 lck_mtx_unlock(&nmp->nm_lock);
204 error = sock_socket(saddr->sa_family, nmp->nm_sotype,
205 nmp->nm_soproto, upcall, nmp, &nmp->nm_so);
206 if (error)
207 goto bad;
208 lck_mtx_lock(&nmp->nm_lock);
209 so = nmp->nm_so;
210
211 /*
212 * Some servers require that the client port be a reserved port number.
213 */
214 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
215 lck_mtx_unlock(&nmp->nm_lock);
216 sin.sin_len = sizeof (struct sockaddr_in);
217 sin.sin_family = AF_INET;
218 sin.sin_addr.s_addr = INADDR_ANY;
219 tport = IPPORT_RESERVED - 1;
220 sin.sin_port = htons(tport);
221 while (((error = sock_bind(so, (struct sockaddr *) &sin)) == EADDRINUSE) &&
222 (--tport > IPPORT_RESERVED / 2))
223 sin.sin_port = htons(tport);
224 if (error)
225 goto bad;
226 lck_mtx_lock(&nmp->nm_lock);
227 }
228
229 /*
230 * Protocols that do not require connections may be optionally left
231 * unconnected for servers that reply from a different address/port.
232 */
233 if (nmp->nm_flag & NFSMNT_NOCONN) {
234 if (nmp->nm_sotype == SOCK_STREAM) {
235 error = ENOTCONN;
236 lck_mtx_unlock(&nmp->nm_lock);
237 goto bad;
238 }
239 } else {
240 int tocnt = 0, optlen = sizeof(error);
241 struct timespec ts = { 2, 0 };
242
243 lck_mtx_unlock(&nmp->nm_lock);
244 error = sock_connect(so, mbuf_data(nmp->nm_nam), MSG_DONTWAIT);
245 if (error && (error != EINPROGRESS))
246 goto bad;
247 lck_mtx_lock(&nmp->nm_lock);
248 while (!sock_isconnected(so)) {
249 if (tocnt++ == 15) /* log a warning if connect is taking a while */
250 log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n",
251 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
252 /* check for error on socket */
253 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &error, &optlen);
254 if (error) {
255 log(LOG_INFO, "nfs_connect: socket error %d for %s\n",
256 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
257 break;
258 }
259 if (tocnt > 60) {
260 /* abort if this is taking too long */
261 error = ENOTCONN;
262 break;
263 }
264 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
265 break;
266 msleep(&nmp->nm_so, &nmp->nm_lock, PSOCK, "nfs_socket_connect", &ts);
267 }
268 if (tocnt > 15)
269 log(LOG_INFO, "nfs_connect: socket connect %s for %s\n",
270 error ? "aborted" : "completed",
271 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
272 if (error) {
273 lck_mtx_unlock(&nmp->nm_lock);
274 goto bad;
275 }
276 }
277
278 /*
279 * Set socket send/receive timeouts
280 * - Receive timeout shouldn't matter because all receives are performed
281 * in the socket upcall non-blocking.
282 * - Send timeout should allow us to react to a blocked socket.
283 * Soft mounts will want to abort sooner.
284 */
285 timeo.tv_usec = 0;
286 timeo.tv_sec = (nmp->nm_flag & NFSMNT_SOFT) ? 10 : 60;
287 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
288 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
289 if (error) {
290 log(LOG_INFO, "nfs_connect: socket timeout setting errors for %s\n",
291 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
292 error = 0;
293 }
294
295 if (nmp->nm_sotype == SOCK_STREAM) {
296 /* Assume that SOCK_STREAM always requires a connection */
297 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
298 /* set nodelay for TCP */
299 sock_gettype(so, NULL, NULL, &proto);
300 if (proto == IPPROTO_TCP)
301 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
302 }
303
304 if (nmp->nm_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
305 int reserve = NFS_UDPSOCKBUF;
306 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
307 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
308 if (error) {
309 log(LOG_INFO, "nfs_connect: socket buffer setting errors for %s\n",
310 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
311 error = 0;
312 }
313 }
314
315 /* set SO_NOADDRERR to detect network changes ASAP */
316 error = sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
317 if (error) {
318 lck_mtx_unlock(&nmp->nm_lock);
319 goto bad;
320 }
321
322 if (!(nmp->nm_flag & NFSMNT_INT))
323 sock_nointerrupt(so, 1);
324
325 /* Initialize socket state variables */
326 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
327 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
328 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
329 nmp->nm_sdrtt[3] = 0;
330 if (nmp->nm_sotype == SOCK_DGRAM) {
331 /* XXX do we really want to reset this on each reconnect? */
332 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
333 nmp->nm_sent = 0;
334 } else if (nmp->nm_sotype == SOCK_STREAM) {
335 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
336 nmp->nm_fragleft = nmp->nm_reclen = 0;
337 nmp->nm_timeouts = 0;
338 }
339 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
340 nmp->nm_sockflags |= NMSOCK_SETUP;
341 FSDBG(529, nmp, nmp->nm_state, nmp->nm_flag, nmp->nm_cwnd);
342 lck_mtx_unlock(&nmp->nm_lock);
343 error = nfs_connect_setup(nmp);
344 bad:
345 lck_mtx_lock(&nmp->nm_lock);
346 nmp->nm_sockflags &= ~(NMSOCK_CONNECTING|NMSOCK_SETUP);
347 if (!error) {
348 nmp->nm_sockflags |= NMSOCK_READY;
349 wakeup(&nmp->nm_sockflags);
350 }
351 lck_mtx_unlock(&nmp->nm_lock);
352 if (error)
353 nfs_disconnect(nmp);
354 return (error);
355 }
356
357 /* setup & confirm socket connection is functional */
358 static int
359 nfs_connect_setup(struct nfsmount *nmp)
360 {
361 struct nfsm_chain nmreq, nmrep;
362 int error = 0, status;
363 u_int64_t xid;
364
365 if (nmp->nm_vers >= NFS_VER4) {
366 error = nfs4_setclientid(nmp);
367 } else {
368 /* verify connection's OK by sending a NULL request */
369 nfsm_chain_null(&nmreq);
370 nfsm_chain_null(&nmrep);
371 nfsm_chain_build_alloc_init(error, &nmreq, 0);
372 nfsm_chain_build_done(error, &nmreq);
373 nfsmout_if(error);
374 error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC_NULL,
375 current_thread(), NULL, R_SETUP, &nmrep, &xid, &status);
376 if (!error)
377 error = status;
378 nfsmout:
379 nfsm_chain_cleanup(&nmreq);
380 nfsm_chain_cleanup(&nmrep);
381 }
382 return (error);
383 }
384
385 /*
386 * NFS socket reconnect routine:
387 * Called when a connection is broken.
388 * - disconnect the old socket
389 * - nfs_connect() again
390 * - set R_MUSTRESEND for all outstanding requests on mount point
391 * If this fails the mount point is DEAD!
392 */
393 static int
394 nfs_reconnect(struct nfsmount *nmp)
395 {
396 struct nfsreq *rq;
397 struct timeval now;
398 thread_t thd = current_thread();
399 int error, lastmsg, wentdown = 0;
400
401 microuptime(&now);
402 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay);
403
404 nfs_disconnect(nmp);
405
406 while ((error = nfs_connect(nmp))) {
407 if (error == EINTR || error == ERESTART)
408 return (EINTR);
409 if (error == EIO)
410 return (EIO);
411 microuptime(&now);
412 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) {
413 lastmsg = now.tv_sec;
414 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect");
415 wentdown = 1;
416 }
417 lck_mtx_lock(&nmp->nm_lock);
418 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
419 /* we're not yet completely mounted and */
420 /* we can't reconnect, so we fail */
421 lck_mtx_unlock(&nmp->nm_lock);
422 return (error);
423 }
424 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) {
425 lck_mtx_unlock(&nmp->nm_lock);
426 return (error);
427 }
428 lck_mtx_unlock(&nmp->nm_lock);
429 tsleep(&lbolt, PSOCK, "nfs_reconnect_delay", 0);
430 if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
431 return (error);
432 }
433
434 if (wentdown)
435 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected");
436
437 /*
438 * Loop through outstanding request list and mark all requests
439 * as needing a resend. (Though nfs_need_reconnect() probably
440 * marked them all already.)
441 */
442 lck_mtx_lock(nfs_request_mutex);
443 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
444 if (rq->r_nmp == nmp) {
445 lck_mtx_lock(&rq->r_mtx);
446 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
447 rq->r_flags |= R_MUSTRESEND;
448 rq->r_rtt = -1;
449 wakeup(rq);
450 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
451 nfs_asyncio_resend(rq);
452 }
453 lck_mtx_unlock(&rq->r_mtx);
454 }
455 }
456 lck_mtx_unlock(nfs_request_mutex);
457 return (0);
458 }
459
460 /*
461 * NFS disconnect. Clean up and unlink.
462 */
463 void
464 nfs_disconnect(struct nfsmount *nmp)
465 {
466 socket_t so;
467
468 lck_mtx_lock(&nmp->nm_lock);
469 if ((nmp->nm_sotype == SOCK_STREAM) && nmp->nm_m) {
470 mbuf_freem(nmp->nm_m);
471 nmp->nm_m = nmp->nm_mlast = NULL;
472 }
473 if (nmp->nm_so) {
474 so = nmp->nm_so;
475 nmp->nm_so = NULL;
476 lck_mtx_unlock(&nmp->nm_lock);
477 sock_shutdown(so, SHUT_RDWR);
478 sock_close(so);
479 } else {
480 lck_mtx_unlock(&nmp->nm_lock);
481 }
482 }
483
484 /*
485 * mark an NFS mount as needing a reconnect/resends.
486 */
487 static void
488 nfs_need_reconnect(struct nfsmount *nmp)
489 {
490 struct nfsreq *rq;
491
492 lck_mtx_lock(&nmp->nm_lock);
493 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP);
494 lck_mtx_unlock(&nmp->nm_lock);
495
496 /*
497 * Loop through outstanding request list and
498 * mark all requests as needing a resend.
499 */
500 lck_mtx_lock(nfs_request_mutex);
501 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
502 if (rq->r_nmp == nmp) {
503 lck_mtx_lock(&rq->r_mtx);
504 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
505 rq->r_flags |= R_MUSTRESEND;
506 rq->r_rtt = -1;
507 wakeup(rq);
508 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
509 nfs_asyncio_resend(rq);
510 }
511 lck_mtx_unlock(&rq->r_mtx);
512 }
513 }
514 lck_mtx_unlock(nfs_request_mutex);
515 }
516
517 /*
518 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
519 */
520 static void
521 nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
522 {
523 struct nfsmount *nmp = arg;
524 struct timespec ts = { 30, 0 };
525 thread_t thd = current_thread();
526 struct nfsreq *req;
527 struct timeval now;
528 int error, dofinish, force;
529
530 lck_mtx_lock(&nmp->nm_lock);
531
532 while (!(nmp->nm_sockflags & NMSOCK_READY) || !TAILQ_EMPTY(&nmp->nm_resendq)) {
533 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
534 break;
535 force = (nmp->nm_state & NFSSTA_FORCE);
536 /* do reconnect, if necessary */
537 if (!(nmp->nm_sockflags & NMSOCK_READY) && !force) {
538 if (nmp->nm_reconnect_start <= 0) {
539 microuptime(&now);
540 nmp->nm_reconnect_start = now.tv_sec;
541 }
542 lck_mtx_unlock(&nmp->nm_lock);
543 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname));
544 if ((error = nfs_reconnect(nmp)))
545 printf("nfs_reconnect failed %d for %s\n", error,
546 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
547 else
548 nmp->nm_reconnect_start = 0;
549 lck_mtx_lock(&nmp->nm_lock);
550 }
551 /* do resends, if necessary/possible */
552 while (((nmp->nm_sockflags & NMSOCK_READY) || force) && ((req = TAILQ_FIRST(&nmp->nm_resendq)))) {
553 if (req->r_resendtime)
554 microuptime(&now);
555 while (req && !force && req->r_resendtime && (now.tv_sec < req->r_resendtime))
556 req = TAILQ_NEXT(req, r_rchain);
557 if (!req)
558 break;
559 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
560 req->r_rchain.tqe_next = NFSREQNOLIST;
561 lck_mtx_unlock(&nmp->nm_lock);
562 lck_mtx_lock(&req->r_mtx);
563 if (req->r_error || req->r_nmrep.nmc_mhead) {
564 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
565 req->r_flags &= ~R_RESENDQ;
566 wakeup(req);
567 lck_mtx_unlock(&req->r_mtx);
568 if (dofinish)
569 nfs_asyncio_finish(req);
570 lck_mtx_lock(&nmp->nm_lock);
571 continue;
572 }
573 if ((req->r_flags & R_RESTART) || req->r_gss_ctx) {
574 req->r_flags &= ~R_RESTART;
575 req->r_resendtime = 0;
576 lck_mtx_unlock(&req->r_mtx);
577 /* async RPCs on GSS mounts need to be rebuilt and resent. */
578 nfs_reqdequeue(req);
579 if (req->r_gss_ctx) {
580 nfs_gss_clnt_rpcdone(req);
581 error = nfs_gss_clnt_args_restore(req);
582 if (error == ENEEDAUTH)
583 req->r_xid = 0;
584 }
585 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
586 req->r_gss_ctx ? " gss" : "", req->r_procnum, req->r_xid,
587 req->r_flags, req->r_rtt));
588 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
589 if (!error)
590 error = nfs_sigintr(nmp, req, req->r_thread, 0);
591 if (!error)
592 error = nfs_request_add_header(req);
593 if (!error)
594 error = nfs_request_send(req, 0);
595 lck_mtx_lock(&req->r_mtx);
596 if (req->r_rchain.tqe_next == NFSREQNOLIST)
597 req->r_flags &= ~R_RESENDQ;
598 if (error)
599 req->r_error = error;
600 wakeup(req);
601 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
602 lck_mtx_unlock(&req->r_mtx);
603 if (dofinish)
604 nfs_asyncio_finish(req);
605 lck_mtx_lock(&nmp->nm_lock);
606 error = 0;
607 continue;
608 }
609 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
610 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
611 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
612 if (!error)
613 error = nfs_sigintr(nmp, req, req->r_thread, 0);
614 if (!error) {
615 lck_mtx_unlock(&req->r_mtx);
616 error = nfs_send(req, 0);
617 lck_mtx_lock(&req->r_mtx);
618 if (!error) {
619 if (req->r_rchain.tqe_next == NFSREQNOLIST)
620 req->r_flags &= ~R_RESENDQ;
621 wakeup(req);
622 lck_mtx_unlock(&req->r_mtx);
623 lck_mtx_lock(&nmp->nm_lock);
624 continue;
625 }
626 }
627 req->r_error = error;
628 if (req->r_rchain.tqe_next == NFSREQNOLIST)
629 req->r_flags &= ~R_RESENDQ;
630 wakeup(req);
631 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
632 lck_mtx_unlock(&req->r_mtx);
633 if (dofinish)
634 nfs_asyncio_finish(req);
635 lck_mtx_lock(&nmp->nm_lock);
636 }
637 if (nmp->nm_sockflags & NMSOCK_READY) {
638 ts.tv_sec = TAILQ_EMPTY(&nmp->nm_resendq) ? 30 : 1;
639 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts);
640 } else if (force)
641 break;
642 }
643
644 if (nmp->nm_sockthd == thd)
645 nmp->nm_sockthd = NULL;
646 lck_mtx_unlock(&nmp->nm_lock);
647 wakeup(&nmp->nm_sockthd);
648 thread_terminate(thd);
649 }
650
651 /* start or wake a mount's socket thread */
652 void
653 nfs_mount_sock_thread_wake(struct nfsmount *nmp)
654 {
655 if (nmp->nm_sockthd)
656 wakeup(&nmp->nm_sockthd);
657 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS)
658 thread_deallocate(nmp->nm_sockthd);
659 }
660
661 /*
662 * The NFS client send routine.
663 *
664 * Send the given NFS request out the mount's socket.
665 * Holds nfs_sndlock() for the duration of this call.
666 *
667 * - check for request termination (sigintr)
668 * - perform reconnect, if necessary
669 * - UDP: check the congestion window
670 * - make a copy of the request to send
671 * - UDP: update the congestion window
672 * - send the request
673 *
674 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
675 * rexmit count is also updated if this isn't the first send.
676 *
677 * If the send is not successful, make sure R_MUSTRESEND is set.
678 * If this wasn't the first transmit, set R_RESENDERR.
679 * Also, undo any UDP congestion window changes made.
680 *
681 * If the error appears to indicate that the socket should
682 * be reconnected, mark the socket for reconnection.
683 *
684 * Only return errors when the request should be aborted.
685 */
686 int
687 nfs_send(struct nfsreq *req, int wait)
688 {
689 struct nfsmount *nmp;
690 socket_t so;
691 int error, error2, sotype, rexmit, slpflag = 0, needrecon;
692 struct msghdr msg;
693 struct sockaddr *sendnam;
694 mbuf_t mreqcopy;
695 size_t sentlen = 0;
696 struct timespec ts = { 2, 0 };
697
698 again:
699 error = nfs_sndlock(req);
700 if (error)
701 return (error);
702
703 error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
704 if (error) {
705 nfs_sndunlock(req);
706 return (error);
707 }
708 nmp = req->r_nmp;
709 sotype = nmp->nm_sotype;
710
711 if ((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) {
712 /* a setup RPC but we're not in SETUP... must need reconnect */
713 nfs_sndunlock(req);
714 return (EPIPE);
715 }
716
717 /* If the socket needs reconnection, do that now. */
718 /* wait until socket is ready - unless this request is part of setup */
719 lck_mtx_lock(&nmp->nm_lock);
720 if (!(nmp->nm_sockflags & NMSOCK_READY) &&
721 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) {
722 if (nmp->nm_flag & NFSMNT_INT)
723 slpflag |= PCATCH;
724 lck_mtx_unlock(&nmp->nm_lock);
725 nfs_sndunlock(req);
726 if (!wait) {
727 lck_mtx_lock(&req->r_mtx);
728 req->r_flags |= R_MUSTRESEND;
729 req->r_rtt = 0;
730 lck_mtx_unlock(&req->r_mtx);
731 return (0);
732 }
733 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req->r_xid));
734 lck_mtx_lock(&req->r_mtx);
735 req->r_flags &= ~R_MUSTRESEND;
736 req->r_rtt = 0;
737 lck_mtx_unlock(&req->r_mtx);
738 lck_mtx_lock(&nmp->nm_lock);
739 while (!(nmp->nm_sockflags & NMSOCK_READY)) {
740 /* don't bother waiting if the socket thread won't be reconnecting it */
741 if (nmp->nm_state & NFSSTA_FORCE) {
742 error = EIO;
743 break;
744 }
745 /* make sure socket thread is running, then wait */
746 nfs_mount_sock_thread_wake(nmp);
747 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
748 break;
749 msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait", &ts);
750 slpflag = 0;
751 }
752 lck_mtx_unlock(&nmp->nm_lock);
753 if (error)
754 return (error);
755 goto again;
756 }
757 so = nmp->nm_so;
758 lck_mtx_unlock(&nmp->nm_lock);
759 if (!so) {
760 nfs_sndunlock(req);
761 lck_mtx_lock(&req->r_mtx);
762 req->r_flags |= R_MUSTRESEND;
763 req->r_rtt = 0;
764 lck_mtx_unlock(&req->r_mtx);
765 return (0);
766 }
767
768 lck_mtx_lock(&req->r_mtx);
769 rexmit = (req->r_flags & R_SENT);
770
771 if (sotype == SOCK_DGRAM) {
772 lck_mtx_lock(&nmp->nm_lock);
773 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) {
774 /* if we can't send this out yet, wait on the cwnd queue */
775 slpflag = ((nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
776 lck_mtx_unlock(&nmp->nm_lock);
777 nfs_sndunlock(req);
778 req->r_flags |= R_MUSTRESEND;
779 lck_mtx_unlock(&req->r_mtx);
780 if (!wait) {
781 req->r_rtt = 0;
782 return (0);
783 }
784 lck_mtx_lock(&nmp->nm_lock);
785 while (nmp->nm_sent >= nmp->nm_cwnd) {
786 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
787 break;
788 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain);
789 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts);
790 slpflag = 0;
791 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) {
792 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
793 req->r_cchain.tqe_next = NFSREQNOLIST;
794 }
795 }
796 lck_mtx_unlock(&nmp->nm_lock);
797 goto again;
798 }
799 /*
800 * We update these *before* the send to avoid racing
801 * against others who may be looking to send requests.
802 */
803 if (!rexmit) {
804 /* first transmit */
805 req->r_flags |= R_CWND;
806 nmp->nm_sent += NFS_CWNDSCALE;
807 } else {
808 /*
809 * When retransmitting, turn timing off
810 * and divide congestion window by 2.
811 */
812 req->r_flags &= ~R_TIMING;
813 nmp->nm_cwnd >>= 1;
814 if (nmp->nm_cwnd < NFS_CWNDSCALE)
815 nmp->nm_cwnd = NFS_CWNDSCALE;
816 }
817 lck_mtx_unlock(&nmp->nm_lock);
818 }
819
820 req->r_flags &= ~R_MUSTRESEND;
821 lck_mtx_unlock(&req->r_mtx);
822
823 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL,
824 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy);
825 if (error) {
826 if (wait)
827 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error);
828 nfs_sndunlock(req);
829 lck_mtx_lock(&req->r_mtx);
830 req->r_flags |= R_MUSTRESEND;
831 req->r_rtt = 0;
832 lck_mtx_unlock(&req->r_mtx);
833 return (0);
834 }
835
836 bzero(&msg, sizeof(msg));
837 if (nmp->nm_nam && (sotype != SOCK_STREAM) && !sock_isconnected(so)) {
838 if ((sendnam = mbuf_data(nmp->nm_nam))) {
839 msg.msg_name = (caddr_t)sendnam;
840 msg.msg_namelen = sendnam->sa_len;
841 }
842 }
843 error = sock_sendmbuf(so, &msg, mreqcopy, 0, &sentlen);
844 #ifdef NFS_SOCKET_DEBUGGING
845 if (error || (sentlen != req->r_mreqlen))
846 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
847 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error));
848 #endif
849 if (!error && (sentlen != req->r_mreqlen))
850 error = EWOULDBLOCK;
851 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen));
852
853 lck_mtx_lock(&req->r_mtx);
854 req->r_rtt = 0;
855 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT))
856 req->r_rexmit = NFS_MAXREXMIT;
857
858 if (!error) {
859 /* SUCCESS */
860 req->r_flags &= ~R_RESENDERR;
861 if (rexmit)
862 OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries);
863 req->r_flags |= R_SENT;
864 if (req->r_flags & R_WAITSENT) {
865 req->r_flags &= ~R_WAITSENT;
866 wakeup(req);
867 }
868 nfs_sndunlock(req);
869 lck_mtx_unlock(&req->r_mtx);
870 return (0);
871 }
872
873 /* send failed */
874 req->r_flags |= R_MUSTRESEND;
875 if (rexmit)
876 req->r_flags |= R_RESENDERR;
877 if ((error == EINTR) || (error == ERESTART))
878 req->r_error = error;
879 lck_mtx_unlock(&req->r_mtx);
880
881 if (sotype == SOCK_DGRAM) {
882 /*
883 * Note: even though a first send may fail, we consider
884 * the request sent for congestion window purposes.
885 * So we don't need to undo any of the changes made above.
886 */
887 /*
888 * Socket errors ignored for connectionless sockets??
889 * For now, ignore them all
890 */
891 if ((error != EINTR) && (error != ERESTART) &&
892 (error != EWOULDBLOCK) && (error != EIO)) {
893 int clearerror = 0, optlen = sizeof(clearerror);
894 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
895 #ifdef NFS_SOCKET_DEBUGGING
896 if (clearerror)
897 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
898 error, clearerror));
899 #endif
900 }
901 }
902
903 /* check if it appears we should reconnect the socket */
904 switch (error) {
905 case EWOULDBLOCK:
906 /* if send timed out, reconnect if on TCP */
907 if (sotype != SOCK_STREAM)
908 break;
909 case EPIPE:
910 case EADDRNOTAVAIL:
911 case ENETDOWN:
912 case ENETUNREACH:
913 case ENETRESET:
914 case ECONNABORTED:
915 case ECONNRESET:
916 case ENOTCONN:
917 case ESHUTDOWN:
918 case ECONNREFUSED:
919 case EHOSTDOWN:
920 case EHOSTUNREACH:
921 needrecon = 1;
922 break;
923 }
924 if (needrecon) { /* mark socket as needing reconnect */
925 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error));
926 nfs_need_reconnect(nmp);
927 }
928
929 nfs_sndunlock(req);
930
931 /*
932 * Don't log some errors:
933 * EPIPE errors may be common with servers that drop idle connections.
934 * EADDRNOTAVAIL may occur on network transitions.
935 * ENOTCONN may occur under some network conditions.
936 */
937 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN))
938 error = 0;
939 if (error && (error != EINTR) && (error != ERESTART))
940 log(LOG_INFO, "nfs send error %d for server %s\n", error,
941 !req->r_nmp ? "<unmounted>" :
942 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
943
944 /* prefer request termination error over other errors */
945 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
946 if (error2)
947 error = error2;
948
949 /* only allow the following errors to be returned */
950 if ((error != EINTR) && (error != ERESTART) && (error != EIO) &&
951 (error != ENXIO) && (error != ETIMEDOUT))
952 error = 0;
953 return (error);
954 }
955
956 /*
957 * NFS client socket upcalls
958 *
959 * Pull RPC replies out of an NFS mount's socket and match them
960 * up with the pending request.
961 *
962 * The datagram code is simple because we always get whole
963 * messages out of the socket.
964 *
965 * The stream code is more involved because we have to parse
966 * the RPC records out of the stream.
967 */
968
969 /* NFS client UDP socket upcall */
970 static void
971 nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag)
972 {
973 struct nfsmount *nmp = arg;
974 size_t rcvlen;
975 mbuf_t m;
976 int error = 0;
977
978 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
979 wakeup(&nmp->nm_so);
980 return;
981 }
982
983 /* make sure we're on the current socket */
984 if (nmp->nm_so != so)
985 return;
986
987 do {
988 m = NULL;
989 rcvlen = 1000000;
990 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
991 if (m)
992 nfs_request_match_reply(nmp, m);
993 } while (m && !error);
994
995 if (error && (error != EWOULDBLOCK)) {
996 /* problems with the socket... mark for reconnection */
997 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error));
998 nfs_need_reconnect(nmp);
999 }
1000 }
1001
1002 /* NFS client TCP socket upcall */
1003 static void
1004 nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag)
1005 {
1006 struct nfsmount *nmp = arg;
1007 struct iovec_32 aio;
1008 struct msghdr msg;
1009 size_t rcvlen;
1010 mbuf_t m;
1011 int error = 0;
1012 int recv;
1013
1014 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
1015 wakeup(&nmp->nm_so);
1016 return;
1017 }
1018
1019 /* make sure we're on the current socket */
1020 if (nmp->nm_so != so)
1021 return;
1022
1023 lck_mtx_lock(&nmp->nm_lock);
1024 if (nmp->nm_sockflags & NMSOCK_UPCALL) {
1025 /* upcall is already receiving data - just return */
1026 lck_mtx_unlock(&nmp->nm_lock);
1027 return;
1028 }
1029 nmp->nm_sockflags |= NMSOCK_UPCALL;
1030
1031 nextfrag:
1032 recv = 0;
1033
1034 /* read the TCP RPC record marker */
1035 while (!error && nmp->nm_markerleft) {
1036 aio.iov_base = (uintptr_t)((char*)&nmp->nm_fragleft +
1037 sizeof(nmp->nm_fragleft) - nmp->nm_markerleft);
1038 aio.iov_len = nmp->nm_markerleft;
1039 bzero(&msg, sizeof(msg));
1040 msg.msg_iov = (struct iovec *) &aio;
1041 msg.msg_iovlen = 1;
1042 lck_mtx_unlock(&nmp->nm_lock);
1043 error = sock_receive(so, &msg, MSG_DONTWAIT, &rcvlen);
1044 lck_mtx_lock(&nmp->nm_lock);
1045 if (error || !rcvlen)
1046 break;
1047 recv = 1;
1048 nmp->nm_markerleft -= rcvlen;
1049 if (nmp->nm_markerleft)
1050 continue;
1051 /* record marker complete */
1052 nmp->nm_fragleft = ntohl(nmp->nm_fragleft);
1053 if (nmp->nm_fragleft & 0x80000000) {
1054 nmp->nm_sockflags |= NMSOCK_LASTFRAG;
1055 nmp->nm_fragleft &= ~0x80000000;
1056 }
1057 nmp->nm_reclen += nmp->nm_fragleft;
1058 if (nmp->nm_reclen > NFS_MAXPACKET) {
1059 /*
1060 * This is SERIOUS! We are out of sync with the sender
1061 * and forcing a disconnect/reconnect is all I can do.
1062 */
1063 log(LOG_ERR, "%s (%d) from nfs server %s\n",
1064 "impossible RPC record length", nmp->nm_reclen,
1065 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1066 error = EFBIG;
1067 }
1068 }
1069
1070 /* read the TCP RPC record fragment */
1071 while (!error && !nmp->nm_markerleft && nmp->nm_fragleft) {
1072 m = NULL;
1073 rcvlen = nmp->nm_fragleft;
1074 lck_mtx_unlock(&nmp->nm_lock);
1075 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
1076 lck_mtx_lock(&nmp->nm_lock);
1077 if (error || !rcvlen || !m)
1078 break;
1079 recv = 1;
1080 /* append mbufs to list */
1081 nmp->nm_fragleft -= rcvlen;
1082 if (!nmp->nm_m) {
1083 nmp->nm_m = m;
1084 } else {
1085 error = mbuf_setnext(nmp->nm_mlast, m);
1086 if (error) {
1087 printf("nfs_tcp_rcv: mbuf_setnext failed %d\n", error);
1088 mbuf_freem(m);
1089 break;
1090 }
1091 }
1092 while (mbuf_next(m))
1093 m = mbuf_next(m);
1094 nmp->nm_mlast = m;
1095 }
1096
1097 /* done reading fragment? */
1098 m = NULL;
1099 if (!error && !nmp->nm_markerleft && !nmp->nm_fragleft) {
1100 /* reset socket fragment parsing state */
1101 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
1102 if (nmp->nm_sockflags & NMSOCK_LASTFRAG) {
1103 /* RPC record complete */
1104 m = nmp->nm_m;
1105 /* reset socket record parsing state */
1106 nmp->nm_reclen = 0;
1107 nmp->nm_m = nmp->nm_mlast = NULL;
1108 nmp->nm_sockflags &= ~NMSOCK_LASTFRAG;
1109 }
1110 }
1111
1112 if (m) { /* match completed response with request */
1113 lck_mtx_unlock(&nmp->nm_lock);
1114 nfs_request_match_reply(nmp, m);
1115 lck_mtx_lock(&nmp->nm_lock);
1116 }
1117
1118 /* loop if we've been making error-free progress */
1119 if (!error && recv)
1120 goto nextfrag;
1121
1122 nmp->nm_sockflags &= ~NMSOCK_UPCALL;
1123 lck_mtx_unlock(&nmp->nm_lock);
1124 #ifdef NFS_SOCKET_DEBUGGING
1125 if (!recv && (error != EWOULDBLOCK))
1126 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error));
1127 #endif
1128 /* note: no error and no data indicates server closed its end */
1129 if ((error != EWOULDBLOCK) && (error || !recv)) {
1130 /* problems with the socket... mark for reconnection */
1131 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error));
1132 nfs_need_reconnect(nmp);
1133 }
1134 }
1135
1136 /*
1137 * "poke" a socket to try to provoke any pending errors
1138 */
1139 static void
1140 nfs_sock_poke(struct nfsmount *nmp)
1141 {
1142 struct iovec_32 aio;
1143 struct msghdr msg;
1144 size_t len;
1145 int error = 0;
1146 int dummy;
1147
1148 lck_mtx_lock(&nmp->nm_lock);
1149 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || !nmp->nm_so) {
1150 lck_mtx_unlock(&nmp->nm_lock);
1151 return;
1152 }
1153 lck_mtx_unlock(&nmp->nm_lock);
1154 aio.iov_base = (uintptr_t)&dummy;
1155 aio.iov_len = 0;
1156 len = 0;
1157 bzero(&msg, sizeof(msg));
1158 msg.msg_iov = (struct iovec *) &aio;
1159 msg.msg_iovlen = 1;
1160 error = sock_send(nmp->nm_so, &msg, MSG_DONTWAIT, &len);
1161 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error));
1162 }
1163
1164 /*
1165 * Match an RPC reply with the corresponding request
1166 */
1167 static void
1168 nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
1169 {
1170 struct nfsreq *req;
1171 struct nfsm_chain nmrep;
1172 u_long reply = 0, rxid = 0;
1173 long t1;
1174 int error = 0, asyncioq, asyncgss;
1175
1176 /* Get the xid and check that it is an rpc reply */
1177 nfsm_chain_dissect_init(error, &nmrep, mrep);
1178 nfsm_chain_get_32(error, &nmrep, rxid);
1179 nfsm_chain_get_32(error, &nmrep, reply);
1180 if (error || (reply != RPC_REPLY)) {
1181 OSAddAtomic(1, (SInt32*)&nfsstats.rpcinvalid);
1182 mbuf_freem(mrep);
1183 return;
1184 }
1185
1186 /*
1187 * Loop through the request list to match up the reply
1188 * Iff no match, just drop it.
1189 */
1190 lck_mtx_lock(nfs_request_mutex);
1191 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
1192 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid)))
1193 continue;
1194 /* looks like we have it, grab lock and double check */
1195 lck_mtx_lock(&req->r_mtx);
1196 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
1197 lck_mtx_unlock(&req->r_mtx);
1198 continue;
1199 }
1200 /* Found it.. */
1201 req->r_nmrep = nmrep;
1202 lck_mtx_lock(&nmp->nm_lock);
1203 if (nmp->nm_sotype == SOCK_DGRAM) {
1204 /*
1205 * Update congestion window.
1206 * Do the additive increase of one rpc/rtt.
1207 */
1208 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
1209 if (nmp->nm_cwnd <= nmp->nm_sent) {
1210 nmp->nm_cwnd +=
1211 ((NFS_CWNDSCALE * NFS_CWNDSCALE) +
1212 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
1213 if (nmp->nm_cwnd > NFS_MAXCWND)
1214 nmp->nm_cwnd = NFS_MAXCWND;
1215 }
1216 if (req->r_flags & R_CWND) {
1217 nmp->nm_sent -= NFS_CWNDSCALE;
1218 req->r_flags &= ~R_CWND;
1219 }
1220 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
1221 /* congestion window is open, poke the cwnd queue */
1222 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
1223 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
1224 req2->r_cchain.tqe_next = NFSREQNOLIST;
1225 wakeup(req2);
1226 }
1227 }
1228 /*
1229 * Update rtt using a gain of 0.125 on the mean
1230 * and a gain of 0.25 on the deviation.
1231 */
1232 if (req->r_flags & R_TIMING) {
1233 /*
1234 * Since the timer resolution of
1235 * NFS_HZ is so course, it can often
1236 * result in r_rtt == 0. Since
1237 * r_rtt == N means that the actual
1238 * rtt is between N+dt and N+2-dt ticks,
1239 * add 1.
1240 */
1241 if (proct[req->r_procnum] == 0)
1242 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum);
1243 t1 = req->r_rtt + 1;
1244 t1 -= (NFS_SRTT(req) >> 3);
1245 NFS_SRTT(req) += t1;
1246 if (t1 < 0)
1247 t1 = -t1;
1248 t1 -= (NFS_SDRTT(req) >> 2);
1249 NFS_SDRTT(req) += t1;
1250 }
1251 nmp->nm_timeouts = 0;
1252 lck_mtx_unlock(&nmp->nm_lock);
1253 /* signal anyone waiting on this request */
1254 wakeup(req);
1255 asyncioq = (req->r_callback.rcb_func != NULL);
1256 if ((asyncgss = ((req->r_gss_ctx != NULL) && ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_ALLOCATED)) == (R_ASYNC|R_ALLOCATED)))))
1257 nfs_request_ref(req, 1);
1258 lck_mtx_unlock(&req->r_mtx);
1259 lck_mtx_unlock(nfs_request_mutex);
1260 if (asyncgss) {
1261 nfs_gss_clnt_rpcdone(req);
1262 nfs_request_rele(req);
1263 }
1264 /* if it's an async RPC with a callback, queue it up */
1265 if (asyncioq)
1266 nfs_asyncio_finish(req);
1267 break;
1268 }
1269
1270 if (!req) {
1271 /* not matched to a request, so drop it. */
1272 lck_mtx_unlock(nfs_request_mutex);
1273 OSAddAtomic(1, (SInt32*)&nfsstats.rpcunexpected);
1274 mbuf_freem(mrep);
1275 }
1276 }
1277
1278 /*
1279 * Wait for the reply for a given request...
1280 * ...potentially resending the request if necessary.
1281 */
1282 static int
1283 nfs_wait_reply(struct nfsreq *req)
1284 {
1285 struct nfsmount *nmp = req->r_nmp;
1286 struct timespec ts = { 30, 0 };
1287 int error = 0, slpflag;
1288
1289 if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
1290 slpflag = PCATCH;
1291 else
1292 slpflag = 0;
1293
1294 lck_mtx_lock(&req->r_mtx);
1295 while (!req->r_nmrep.nmc_mhead) {
1296 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0)))
1297 break;
1298 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
1299 break;
1300 /* check if we need to resend */
1301 if (req->r_flags & R_MUSTRESEND) {
1302 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
1303 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
1304 lck_mtx_unlock(&req->r_mtx);
1305 if (req->r_gss_ctx) {
1306 /*
1307 * It's an RPCSEC_GSS mount.
1308 * Can't just resend the original request
1309 * without bumping the cred sequence number.
1310 * Go back and re-build the request.
1311 */
1312 return (EAGAIN);
1313 }
1314 error = nfs_send(req, 1);
1315 lck_mtx_lock(&req->r_mtx);
1316 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
1317 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error));
1318 if (error)
1319 break;
1320 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
1321 break;
1322 }
1323 /* need to poll if we're P_NOREMOTEHANG */
1324 if (nfs_noremotehang(req->r_thread))
1325 ts.tv_sec = 1;
1326 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts);
1327 slpflag = 0;
1328 }
1329 lck_mtx_unlock(&req->r_mtx);
1330
1331 return (error);
1332 }
1333
1334 /*
1335 * An NFS request goes something like this:
1336 * (nb: always frees up mreq mbuf list)
1337 * nfs_request_create()
1338 * - allocates a request struct if one is not provided
1339 * - initial fill-in of the request struct
1340 * nfs_request_add_header()
1341 * - add the RPC header
1342 * nfs_request_send()
1343 * - link it into list
1344 * - call nfs_send() for first transmit
1345 * nfs_request_wait()
1346 * - call nfs_wait_reply() to wait for the reply
1347 * nfs_request_finish()
1348 * - break down rpc header and return with error or nfs reply
1349 * pointed to by nmrep.
1350 * nfs_request_rele()
1351 * nfs_request_destroy()
1352 * - clean up the request struct
1353 * - free the request struct if it was allocated by nfs_request_create()
1354 */
1355
1356 /*
1357 * Set up an NFS request struct (allocating if no request passed in).
1358 */
1359 int
1360 nfs_request_create(
1361 nfsnode_t np,
1362 mount_t mp, /* used only if !np */
1363 struct nfsm_chain *nmrest,
1364 int procnum,
1365 thread_t thd,
1366 kauth_cred_t cred,
1367 struct nfsreq **reqp)
1368 {
1369 struct nfsreq *req, *newreq = NULL;
1370 struct nfsmount *nmp;
1371
1372 req = *reqp;
1373 if (!req) {
1374 /* allocate a new NFS request structure */
1375 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK);
1376 if (!newreq) {
1377 mbuf_freem(nmrest->nmc_mhead);
1378 nmrest->nmc_mhead = NULL;
1379 return (ENOMEM);
1380 }
1381 req = newreq;
1382 }
1383
1384 bzero(req, sizeof(*req));
1385 if (req == newreq)
1386 req->r_flags = R_ALLOCATED;
1387
1388 nmp = VFSTONFS(np ? NFSTOMP(np) : mp);
1389 if (!nmp) {
1390 if (newreq)
1391 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
1392 return (ENXIO);
1393 }
1394 lck_mtx_lock(&nmp->nm_lock);
1395 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1396 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
1397 lck_mtx_unlock(&nmp->nm_lock);
1398 mbuf_freem(nmrest->nmc_mhead);
1399 nmrest->nmc_mhead = NULL;
1400 if (newreq)
1401 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
1402 return (ENXIO);
1403 }
1404
1405 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS))
1406 OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[procnum]);
1407 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL))
1408 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
1409
1410 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL);
1411 req->r_nmp = nmp;
1412 req->r_np = np;
1413 req->r_thread = thd;
1414 if (IS_VALID_CRED(cred)) {
1415 kauth_cred_ref(cred);
1416 req->r_cred = cred;
1417 }
1418 req->r_procnum = procnum;
1419 if (proct[procnum] > 0)
1420 req->r_flags |= R_TIMING;
1421 req->r_nmrep.nmc_mhead = NULL;
1422 SLIST_INIT(&req->r_gss_seqlist);
1423 req->r_achain.tqe_next = NFSREQNOLIST;
1424 req->r_rchain.tqe_next = NFSREQNOLIST;
1425 req->r_cchain.tqe_next = NFSREQNOLIST;
1426
1427 lck_mtx_unlock(&nmp->nm_lock);
1428
1429 /* move the request mbuf chain to the nfsreq */
1430 req->r_mrest = nmrest->nmc_mhead;
1431 nmrest->nmc_mhead = NULL;
1432
1433 req->r_flags |= R_INITTED;
1434 req->r_refs = 1;
1435 if (newreq)
1436 *reqp = req;
1437 return (0);
1438 }
1439
1440 /*
1441 * Clean up and free an NFS request structure.
1442 */
1443 void
1444 nfs_request_destroy(struct nfsreq *req)
1445 {
1446 struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1447 struct gss_seq *gsp, *ngsp;
1448 struct timespec ts = { 1, 0 };
1449
1450 if (!req || !(req->r_flags & R_INITTED))
1451 return;
1452 req->r_flags &= ~R_INITTED;
1453 if (req->r_lflags & RL_QUEUED)
1454 nfs_reqdequeue(req);
1455 if (req->r_achain.tqe_next != NFSREQNOLIST) {
1456 /* still on an async I/O queue? */
1457 lck_mtx_lock(nfsiod_mutex);
1458 if (nmp && (req->r_achain.tqe_next != NFSREQNOLIST)) {
1459 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
1460 req->r_achain.tqe_next = NFSREQNOLIST;
1461 }
1462 lck_mtx_unlock(nfsiod_mutex);
1463 }
1464 if (nmp) {
1465 lck_mtx_lock(&nmp->nm_lock);
1466 if (req->r_rchain.tqe_next != NFSREQNOLIST) {
1467 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
1468 req->r_rchain.tqe_next = NFSREQNOLIST;
1469 req->r_flags &= ~R_RESENDQ;
1470 }
1471 if (req->r_cchain.tqe_next != NFSREQNOLIST) {
1472 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
1473 req->r_cchain.tqe_next = NFSREQNOLIST;
1474 }
1475 lck_mtx_unlock(&nmp->nm_lock);
1476 }
1477 lck_mtx_lock(&req->r_mtx);
1478 while (req->r_flags & R_RESENDQ)
1479 msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts);
1480 lck_mtx_unlock(&req->r_mtx);
1481 if (req->r_mhead)
1482 mbuf_freem(req->r_mhead);
1483 else if (req->r_mrest)
1484 mbuf_freem(req->r_mrest);
1485 if (req->r_nmrep.nmc_mhead)
1486 mbuf_freem(req->r_nmrep.nmc_mhead);
1487 if (IS_VALID_CRED(req->r_cred))
1488 kauth_cred_unref(&req->r_cred);
1489 if (req->r_gss_ctx)
1490 nfs_gss_clnt_rpcdone(req);
1491 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp)
1492 FREE(gsp, M_TEMP);
1493 if (req->r_gss_ctx)
1494 nfs_gss_clnt_ctx_unref(req);
1495
1496 lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
1497 if (req->r_flags & R_ALLOCATED)
1498 FREE_ZONE(req, sizeof(*req), M_NFSREQ);
1499 }
1500
1501 void
1502 nfs_request_ref(struct nfsreq *req, int locked)
1503 {
1504 if (!locked)
1505 lck_mtx_lock(&req->r_mtx);
1506 if (req->r_refs <= 0)
1507 panic("nfsreq reference error");
1508 req->r_refs++;
1509 if (!locked)
1510 lck_mtx_unlock(&req->r_mtx);
1511 }
1512
1513 void
1514 nfs_request_rele(struct nfsreq *req)
1515 {
1516 int destroy;
1517
1518 lck_mtx_lock(&req->r_mtx);
1519 if (req->r_refs <= 0)
1520 panic("nfsreq reference underflow");
1521 req->r_refs--;
1522 destroy = (req->r_refs == 0);
1523 lck_mtx_unlock(&req->r_mtx);
1524 if (destroy)
1525 nfs_request_destroy(req);
1526 }
1527
1528
1529 /*
1530 * Add an (updated) RPC header with authorization to an NFS request.
1531 */
1532 int
1533 nfs_request_add_header(struct nfsreq *req)
1534 {
1535 struct nfsmount *nmp;
1536 int error = 0, auth_len = 0;
1537 mbuf_t m;
1538
1539 /* free up any previous header */
1540 if ((m = req->r_mhead)) {
1541 while (m && (m != req->r_mrest))
1542 m = mbuf_free(m);
1543 req->r_mhead = NULL;
1544 }
1545
1546 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1547 if (!nmp)
1548 return (ENXIO);
1549
1550 if (!req->r_cred) /* RPCAUTH_NULL */
1551 auth_len = 0;
1552 else switch (nmp->nm_auth) {
1553 case RPCAUTH_UNIX:
1554 if (req->r_cred->cr_ngroups < 1)
1555 return (EINVAL);
1556 auth_len = ((((req->r_cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1557 nmp->nm_numgrps : (req->r_cred->cr_ngroups - 1)) << 2) +
1558 5 * NFSX_UNSIGNED;
1559 break;
1560 case RPCAUTH_KRB5:
1561 case RPCAUTH_KRB5I:
1562 case RPCAUTH_KRB5P:
1563 auth_len = 5 * NFSX_UNSIGNED + 0; // zero context handle for now
1564 break;
1565 }
1566
1567 error = nfsm_rpchead(req, auth_len, req->r_mrest, &req->r_xid, &req->r_mhead);
1568 if (error)
1569 return (error);
1570
1571 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead);
1572 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1573 if (!nmp)
1574 return (ENXIO);
1575 lck_mtx_lock(&nmp->nm_lock);
1576 if (nmp->nm_flag & NFSMNT_SOFT)
1577 req->r_retry = nmp->nm_retry;
1578 else
1579 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1580 lck_mtx_unlock(&nmp->nm_lock);
1581
1582 return (error);
1583 }
1584
1585
1586 /*
1587 * Queue an NFS request up and send it out.
1588 */
1589 int
1590 nfs_request_send(struct nfsreq *req, int wait)
1591 {
1592 struct nfsmount *nmp;
1593 struct timeval now;
1594
1595 lck_mtx_lock(nfs_request_mutex);
1596
1597 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1598 if (!nmp) {
1599 lck_mtx_unlock(nfs_request_mutex);
1600 return (ENXIO);
1601 }
1602
1603 microuptime(&now);
1604 if (!req->r_start) {
1605 req->r_start = now.tv_sec;
1606 req->r_lastmsg = now.tv_sec -
1607 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
1608 }
1609
1610 OSAddAtomic(1, (SInt32*)&nfsstats.rpcrequests);
1611
1612 /*
1613 * Chain request into list of outstanding requests. Be sure
1614 * to put it LAST so timer finds oldest requests first.
1615 * Make sure that the request queue timer is running
1616 * to check for possible request timeout.
1617 */
1618 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain);
1619 req->r_lflags |= RL_QUEUED;
1620 if (!nfs_request_timer_on) {
1621 nfs_request_timer_on = 1;
1622 nfs_interval_timer_start(nfs_request_timer_call,
1623 NFS_REQUESTDELAY);
1624 }
1625 lck_mtx_unlock(nfs_request_mutex);
1626
1627 /* Send the request... */
1628 return (nfs_send(req, wait));
1629 }
1630
1631 /*
1632 * Call nfs_wait_reply() to wait for the reply.
1633 */
1634 void
1635 nfs_request_wait(struct nfsreq *req)
1636 {
1637 req->r_error = nfs_wait_reply(req);
1638 }
1639
1640 /*
1641 * Finish up an NFS request by dequeueing it and
1642 * doing the initial NFS request reply processing.
1643 */
1644 int
1645 nfs_request_finish(
1646 struct nfsreq *req,
1647 struct nfsm_chain *nmrepp,
1648 int *status)
1649 {
1650 struct nfsmount *nmp;
1651 mbuf_t mrep;
1652 int verf_type = 0;
1653 uint32_t verf_len = 0;
1654 uint32_t reply_status = 0;
1655 uint32_t rejected_status = 0;
1656 uint32_t auth_status = 0;
1657 uint32_t accepted_status = 0;
1658 struct nfsm_chain nmrep;
1659 int error, auth;
1660
1661 error = req->r_error;
1662
1663 if (nmrepp)
1664 nmrepp->nmc_mhead = NULL;
1665
1666 /* RPC done, unlink the request. */
1667 nfs_reqdequeue(req);
1668
1669 mrep = req->r_nmrep.nmc_mhead;
1670
1671 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1672
1673 /*
1674 * Decrement the outstanding request count.
1675 */
1676 if (req->r_flags & R_CWND) {
1677 req->r_flags &= ~R_CWND;
1678 lck_mtx_lock(&nmp->nm_lock);
1679 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
1680 nmp->nm_sent -= NFS_CWNDSCALE;
1681 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
1682 /* congestion window is open, poke the cwnd queue */
1683 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
1684 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
1685 req2->r_cchain.tqe_next = NFSREQNOLIST;
1686 wakeup(req2);
1687 }
1688 lck_mtx_unlock(&nmp->nm_lock);
1689 }
1690
1691 if (req->r_gss_ctx) { // Using gss cred ?
1692 /*
1693 * If the request had an RPCSEC_GSS credential
1694 * then reset its sequence number bit in the
1695 * request window.
1696 */
1697 nfs_gss_clnt_rpcdone(req);
1698
1699 /*
1700 * If we need to re-send, go back and re-build the
1701 * request based on a new sequence number.
1702 * Note that we're using the original XID.
1703 */
1704 if (error == EAGAIN) {
1705 req->r_error = 0;
1706 if (mrep)
1707 mbuf_freem(mrep);
1708 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs
1709 req->r_nmrep.nmc_mhead = NULL;
1710 req->r_flags |= R_RESTART;
1711 if (error == ENEEDAUTH) {
1712 req->r_xid = 0; // get a new XID
1713 error = 0;
1714 }
1715 goto nfsmout;
1716 }
1717 }
1718
1719 /*
1720 * If there was a successful reply, make sure to mark the mount as up.
1721 * If a tprintf message was given (or if this is a timed-out soft mount)
1722 * then post a tprintf message indicating the server is alive again.
1723 */
1724 if (!error) {
1725 if ((req->r_flags & R_TPRINTFMSG) ||
1726 (nmp && (nmp->nm_flag & NFSMNT_SOFT) &&
1727 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO)))
1728 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again");
1729 else
1730 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL);
1731 }
1732 if (!error && !nmp)
1733 error = ENXIO;
1734 nfsmout_if(error);
1735
1736 /*
1737 * break down the RPC header and check if ok
1738 */
1739 nmrep = req->r_nmrep;
1740 nfsm_chain_get_32(error, &nmrep, reply_status);
1741 nfsmout_if(error);
1742 if (reply_status == RPC_MSGDENIED) {
1743 nfsm_chain_get_32(error, &nmrep, rejected_status);
1744 nfsmout_if(error);
1745 if (rejected_status == RPC_MISMATCH) {
1746 error = ENOTSUP;
1747 goto nfsmout;
1748 }
1749 nfsm_chain_get_32(error, &nmrep, auth_status);
1750 nfsmout_if(error);
1751 switch (auth_status) {
1752 case RPCSEC_GSS_CREDPROBLEM:
1753 case RPCSEC_GSS_CTXPROBLEM:
1754 /*
1755 * An RPCSEC_GSS cred or context problem.
1756 * We can't use it anymore.
1757 * Restore the args, renew the context
1758 * and set up for a resend.
1759 */
1760 error = nfs_gss_clnt_args_restore(req);
1761 if (error && error != ENEEDAUTH)
1762 break;
1763
1764 if (!error) {
1765 error = nfs_gss_clnt_ctx_renew(req);
1766 if (error)
1767 break;
1768 }
1769 mbuf_freem(mrep);
1770 req->r_nmrep.nmc_mhead = NULL;
1771 req->r_xid = 0; // get a new XID
1772 req->r_flags |= R_RESTART;
1773 goto nfsmout;
1774 default:
1775 error = EACCES;
1776 break;
1777 }
1778 goto nfsmout;
1779 }
1780
1781 /* Now check the verifier */
1782 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor
1783 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length
1784 nfsmout_if(error);
1785
1786 auth = !req->r_cred ? RPCAUTH_NULL : nmp->nm_auth;
1787 switch (auth) {
1788 case RPCAUTH_NULL:
1789 case RPCAUTH_UNIX:
1790 /* Any AUTH_UNIX verifier is ignored */
1791 if (verf_len > 0)
1792 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
1793 nfsm_chain_get_32(error, &nmrep, accepted_status);
1794 break;
1795 case RPCAUTH_KRB5:
1796 case RPCAUTH_KRB5I:
1797 case RPCAUTH_KRB5P:
1798 error = nfs_gss_clnt_verf_get(req, &nmrep,
1799 verf_type, verf_len, &accepted_status);
1800 break;
1801 }
1802 nfsmout_if(error);
1803
1804 switch (accepted_status) {
1805 case RPC_SUCCESS:
1806 if (req->r_procnum == NFSPROC_NULL) {
1807 /*
1808 * The NFS null procedure is unique,
1809 * in not returning an NFS status.
1810 */
1811 *status = NFS_OK;
1812 } else {
1813 nfsm_chain_get_32(error, &nmrep, *status);
1814 nfsmout_if(error);
1815 }
1816
1817 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) {
1818 /*
1819 * It's a JUKEBOX error - delay and try again
1820 */
1821 int delay, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
1822
1823 mbuf_freem(mrep);
1824 req->r_nmrep.nmc_mhead = NULL;
1825 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) {
1826 /* we're not yet completely mounted and */
1827 /* we can't complete an RPC, so we fail */
1828 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
1829 nfs_softterm(req);
1830 error = req->r_error;
1831 goto nfsmout;
1832 }
1833 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2);
1834 if (req->r_delay > 30)
1835 req->r_delay = 30;
1836 if (nmp->nm_tprintf_initial_delay && (req->r_delay == 30)) {
1837 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO,
1838 "resource temporarily unavailable (jukebox)");
1839 req->r_flags |= R_JBTPRINTFMSG;
1840 }
1841 delay = req->r_delay;
1842 if (req->r_callback.rcb_func) {
1843 struct timeval now;
1844 microuptime(&now);
1845 req->r_resendtime = now.tv_sec + delay;
1846 } else {
1847 do {
1848 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
1849 return (error);
1850 tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0);
1851 } while (--delay > 0);
1852 }
1853 req->r_xid = 0; // get a new XID
1854 req->r_flags |= R_RESTART;
1855 req->r_start = 0;
1856 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER);
1857 return (0);
1858 }
1859
1860 if (req->r_flags & R_JBTPRINTFMSG)
1861 nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, "resource available again");
1862
1863 if (*status == NFS_OK) {
1864 /*
1865 * Successful NFS request
1866 */
1867 *nmrepp = nmrep;
1868 req->r_nmrep.nmc_mhead = NULL;
1869 break;
1870 }
1871 /* Got an NFS error of some kind */
1872
1873 /*
1874 * If the File Handle was stale, invalidate the
1875 * lookup cache, just in case.
1876 */
1877 if ((*status == ESTALE) && req->r_np)
1878 cache_purge(NFSTOV(req->r_np));
1879 if (nmp->nm_vers == NFS_VER2)
1880 mbuf_freem(mrep);
1881 else
1882 *nmrepp = nmrep;
1883 req->r_nmrep.nmc_mhead = NULL;
1884 error = 0;
1885 break;
1886 case RPC_PROGUNAVAIL:
1887 error = EPROGUNAVAIL;
1888 break;
1889 case RPC_PROGMISMATCH:
1890 error = ERPCMISMATCH;
1891 break;
1892 case RPC_PROCUNAVAIL:
1893 error = EPROCUNAVAIL;
1894 break;
1895 case RPC_GARBAGE:
1896 error = EBADRPC;
1897 break;
1898 case RPC_SYSTEM_ERR:
1899 default:
1900 error = EIO;
1901 break;
1902 }
1903 nfsmout:
1904 if (!error && (req->r_flags & R_JBTPRINTFMSG))
1905 nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, NULL);
1906 FSDBG(273, R_XID32(req->r_xid), nmp, req,
1907 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error);
1908 return (error);
1909 }
1910
1911
1912 /*
1913 * Perform an NFS request synchronously.
1914 */
1915
1916 int
1917 nfs_request(
1918 nfsnode_t np,
1919 mount_t mp, /* used only if !np */
1920 struct nfsm_chain *nmrest,
1921 int procnum,
1922 vfs_context_t ctx,
1923 struct nfsm_chain *nmrepp,
1924 u_int64_t *xidp,
1925 int *status)
1926 {
1927 return nfs_request2(np, mp, nmrest, procnum,
1928 vfs_context_thread(ctx), vfs_context_ucred(ctx),
1929 0, nmrepp, xidp, status);
1930 }
1931
1932 int
1933 nfs_request2(
1934 nfsnode_t np,
1935 mount_t mp, /* used only if !np */
1936 struct nfsm_chain *nmrest,
1937 int procnum,
1938 thread_t thd,
1939 kauth_cred_t cred,
1940 int flags,
1941 struct nfsm_chain *nmrepp,
1942 u_int64_t *xidp,
1943 int *status)
1944 {
1945 struct nfsreq rq, *req = &rq;
1946 int error;
1947
1948 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req)))
1949 return (error);
1950 req->r_flags |= (flags & R_OPTMASK);
1951
1952 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0);
1953 do {
1954 req->r_error = 0;
1955 req->r_flags &= ~R_RESTART;
1956 if ((error = nfs_request_add_header(req)))
1957 break;
1958 if (xidp)
1959 *xidp = req->r_xid;
1960 if ((error = nfs_request_send(req, 1)))
1961 break;
1962 nfs_request_wait(req);
1963 if ((error = nfs_request_finish(req, nmrepp, status)))
1964 break;
1965 } while (req->r_flags & R_RESTART);
1966
1967 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error);
1968 nfs_request_rele(req);
1969 return (error);
1970 }
1971
1972 /*
1973 * Create and start an asynchronous NFS request.
1974 */
1975 int
1976 nfs_request_async(
1977 nfsnode_t np,
1978 mount_t mp, /* used only if !np */
1979 struct nfsm_chain *nmrest,
1980 int procnum,
1981 thread_t thd,
1982 kauth_cred_t cred,
1983 struct nfsreq_cbinfo *cb,
1984 struct nfsreq **reqp)
1985 {
1986 struct nfsreq *req;
1987 int error, sent;
1988
1989 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp);
1990 req = *reqp;
1991 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error);
1992 if (error)
1993 return (error);
1994 req->r_flags |= R_ASYNC;
1995 if (cb)
1996 req->r_callback = *cb;
1997 error = nfs_request_add_header(req);
1998 if (!error) {
1999 req->r_flags |= R_WAITSENT;
2000 if (req->r_callback.rcb_func)
2001 nfs_request_ref(req, 0);
2002 error = nfs_request_send(req, 1);
2003 lck_mtx_lock(&req->r_mtx);
2004 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) {
2005 /* make sure to wait until this async I/O request gets sent */
2006 int slpflag = (req->r_nmp && (req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
2007 struct timespec ts = { 2, 0 };
2008 while (!(req->r_flags & R_SENT)) {
2009 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
2010 break;
2011 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts);
2012 slpflag = 0;
2013 }
2014 }
2015 sent = req->r_flags & R_SENT;
2016 lck_mtx_unlock(&req->r_mtx);
2017 if (error && req->r_callback.rcb_func && !sent)
2018 nfs_request_rele(req);
2019 }
2020 FSDBG(274, R_XID32(req->r_xid), np, procnum, error);
2021 if (error || req->r_callback.rcb_func)
2022 nfs_request_rele(req);
2023 return (error);
2024 }
2025
2026 /*
2027 * Wait for and finish an asynchronous NFS request.
2028 */
2029 int
2030 nfs_request_async_finish(
2031 struct nfsreq *req,
2032 struct nfsm_chain *nmrepp,
2033 u_int64_t *xidp,
2034 int *status)
2035 {
2036 int error, asyncio = req->r_callback.rcb_func ? 1 : 0;
2037
2038 lck_mtx_lock(&req->r_mtx);
2039 if (!asyncio)
2040 req->r_flags |= R_ASYNCWAIT;
2041 while (req->r_flags & R_RESENDQ) /* wait until the request is off the resend queue */
2042 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", NULL);
2043 lck_mtx_unlock(&req->r_mtx);
2044
2045 nfs_request_wait(req);
2046 error = nfs_request_finish(req, nmrepp, status);
2047
2048 while (!error && (req->r_flags & R_RESTART)) {
2049 if (asyncio && req->r_resendtime) { /* send later */
2050 lck_mtx_lock(&req->r_mtx);
2051 nfs_asyncio_resend(req);
2052 lck_mtx_unlock(&req->r_mtx);
2053 return (EINPROGRESS);
2054 }
2055 req->r_error = 0;
2056 req->r_flags &= ~R_RESTART;
2057 if ((error = nfs_request_add_header(req)))
2058 break;
2059 if ((error = nfs_request_send(req, !asyncio)))
2060 break;
2061 if (asyncio)
2062 return (EINPROGRESS);
2063 nfs_request_wait(req);
2064 if ((error = nfs_request_finish(req, nmrepp, status)))
2065 break;
2066 }
2067 if (xidp)
2068 *xidp = req->r_xid;
2069
2070 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error);
2071 nfs_request_rele(req);
2072 return (error);
2073 }
2074
2075 /*
2076 * Cancel a pending asynchronous NFS request.
2077 */
2078 void
2079 nfs_request_async_cancel(struct nfsreq *req)
2080 {
2081 nfs_reqdequeue(req);
2082 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E);
2083 nfs_request_rele(req);
2084 }
2085
2086 /*
2087 * Flag a request as being terminated.
2088 */
2089 static void
2090 nfs_softterm(struct nfsreq *req)
2091 {
2092 struct nfsmount *nmp = req->r_nmp;
2093 req->r_flags |= R_SOFTTERM;
2094 req->r_error = ETIMEDOUT;
2095 if (!(req->r_flags & R_CWND) || !nmp)
2096 return;
2097 /* update congestion window */
2098 req->r_flags &= ~R_CWND;
2099 lck_mtx_lock(&nmp->nm_lock);
2100 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
2101 nmp->nm_sent -= NFS_CWNDSCALE;
2102 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
2103 /* congestion window is open, poke the cwnd queue */
2104 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
2105 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
2106 req2->r_cchain.tqe_next = NFSREQNOLIST;
2107 wakeup(req2);
2108 }
2109 lck_mtx_unlock(&nmp->nm_lock);
2110 }
2111
2112 /*
2113 * Ensure req isn't in use by the timer, then dequeue it.
2114 */
2115 static void
2116 nfs_reqdequeue(struct nfsreq *req)
2117 {
2118 lck_mtx_lock(nfs_request_mutex);
2119 while (req->r_lflags & RL_BUSY) {
2120 req->r_lflags |= RL_WAITING;
2121 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL);
2122 }
2123 if (req->r_lflags & RL_QUEUED) {
2124 TAILQ_REMOVE(&nfs_reqq, req, r_chain);
2125 req->r_lflags &= ~RL_QUEUED;
2126 }
2127 lck_mtx_unlock(nfs_request_mutex);
2128 }
2129
2130 /*
2131 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
2132 * free()'d out from under it.
2133 */
2134 static void
2135 nfs_reqbusy(struct nfsreq *req)
2136 {
2137 if (req->r_lflags & RL_BUSY)
2138 panic("req locked");
2139 req->r_lflags |= RL_BUSY;
2140 }
2141
2142 /*
2143 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
2144 */
2145 static struct nfsreq *
2146 nfs_reqnext(struct nfsreq *req)
2147 {
2148 struct nfsreq * nextreq;
2149
2150 if (req == NULL)
2151 return (NULL);
2152 /*
2153 * We need to get and busy the next req before signalling the
2154 * current one, otherwise wakeup() may block us and we'll race to
2155 * grab the next req.
2156 */
2157 nextreq = TAILQ_NEXT(req, r_chain);
2158 if (nextreq != NULL)
2159 nfs_reqbusy(nextreq);
2160 /* unbusy and signal. */
2161 req->r_lflags &= ~RL_BUSY;
2162 if (req->r_lflags & RL_WAITING) {
2163 req->r_lflags &= ~RL_WAITING;
2164 wakeup(&req->r_lflags);
2165 }
2166 return (nextreq);
2167 }
2168
2169 /*
2170 * NFS request queue timer routine
2171 *
2172 * Scan the NFS request queue for any requests that have timed out.
2173 *
2174 * Alert the system of unresponsive servers.
2175 * Mark expired requests on soft mounts as terminated.
2176 * For UDP, mark/signal requests for retransmission.
2177 */
2178 void
2179 nfs_request_timer(__unused void *param0, __unused void *param1)
2180 {
2181 struct nfsreq *req;
2182 struct nfsmount *nmp;
2183 int timeo, maxtime, finish_asyncio, error;
2184 struct timeval now;
2185 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue;
2186
2187 lck_mtx_lock(nfs_request_mutex);
2188 req = TAILQ_FIRST(&nfs_reqq);
2189 if (req == NULL) { /* no requests - turn timer off */
2190 nfs_request_timer_on = 0;
2191 lck_mtx_unlock(nfs_request_mutex);
2192 return;
2193 }
2194
2195 nfs_reqbusy(req);
2196 TAILQ_INIT(&nfs_mount_poke_queue);
2197
2198 microuptime(&now);
2199 for ( ; req != NULL ; req = nfs_reqnext(req)) {
2200 nmp = req->r_nmp;
2201 if (!nmp) /* unmounted */
2202 continue;
2203 if (req->r_error || req->r_nmrep.nmc_mhead)
2204 continue;
2205 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) {
2206 if (req->r_callback.rcb_func != NULL) {
2207 /* async I/O RPC needs to be finished */
2208 lck_mtx_lock(&req->r_mtx);
2209 req->r_error = error;
2210 finish_asyncio = !(req->r_flags & R_WAITSENT);
2211 wakeup(req);
2212 lck_mtx_unlock(&req->r_mtx);
2213 if (finish_asyncio)
2214 nfs_asyncio_finish(req);
2215 }
2216 continue;
2217 }
2218
2219 lck_mtx_lock(&req->r_mtx);
2220
2221 if (nmp->nm_tprintf_initial_delay &&
2222 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) &&
2223 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
2224 req->r_lastmsg = now.tv_sec;
2225 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
2226 "not responding");
2227 req->r_flags |= R_TPRINTFMSG;
2228 lck_mtx_lock(&nmp->nm_lock);
2229 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
2230 lck_mtx_unlock(&nmp->nm_lock);
2231 /* we're not yet completely mounted and */
2232 /* we can't complete an RPC, so we fail */
2233 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
2234 nfs_softterm(req);
2235 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
2236 wakeup(req);
2237 lck_mtx_unlock(&req->r_mtx);
2238 if (finish_asyncio)
2239 nfs_asyncio_finish(req);
2240 continue;
2241 }
2242 lck_mtx_unlock(&nmp->nm_lock);
2243 }
2244
2245 /*
2246 * Put a reasonable limit on the maximum timeout,
2247 * and reduce that limit when soft mounts get timeouts or are in reconnect.
2248 */
2249 if (!(nmp->nm_flag & NFSMNT_SOFT))
2250 maxtime = NFS_MAXTIMEO;
2251 else if ((req->r_flags & R_SETUP) || ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
2252 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2;
2253 else
2254 maxtime = NFS_MINTIMEO/4;
2255
2256 /*
2257 * Check for request timeout.
2258 */
2259 if (req->r_rtt >= 0) {
2260 req->r_rtt++;
2261 lck_mtx_lock(&nmp->nm_lock);
2262 if (req->r_flags & R_RESENDERR) {
2263 /* with resend errors, retry every few seconds */
2264 timeo = 4*hz;
2265 } else {
2266 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL)
2267 timeo = NFS_MINIDEMTIMEO; // gss context setup
2268 else if (nmp->nm_flag & NFSMNT_DUMBTIMR)
2269 timeo = nmp->nm_timeo;
2270 else
2271 timeo = NFS_RTO(nmp, proct[req->r_procnum]);
2272
2273 /* ensure 62.5 ms floor */
2274 while (16 * timeo < hz)
2275 timeo *= 2;
2276 if (nmp->nm_timeouts > 0)
2277 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
2278 }
2279 /* limit timeout to max */
2280 if (timeo > maxtime)
2281 timeo = maxtime;
2282 if (req->r_rtt <= timeo) {
2283 lck_mtx_unlock(&nmp->nm_lock);
2284 lck_mtx_unlock(&req->r_mtx);
2285 continue;
2286 }
2287 /* The request has timed out */
2288 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
2289 req->r_procnum, proct[req->r_procnum],
2290 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts,
2291 (now.tv_sec - req->r_start)*NFS_HZ, maxtime));
2292 if (nmp->nm_timeouts < 8)
2293 nmp->nm_timeouts++;
2294 /* if it's been a few seconds, try poking the socket */
2295 if ((nmp->nm_sotype == SOCK_STREAM) &&
2296 ((now.tv_sec - req->r_start) >= 3) &&
2297 !(nmp->nm_sockflags & NMSOCK_POKE)) {
2298 nmp->nm_sockflags |= NMSOCK_POKE;
2299 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq);
2300 }
2301 lck_mtx_unlock(&nmp->nm_lock);
2302 }
2303
2304 /* For soft mounts (& SETUPs), check for too many retransmits/timeout. */
2305 if (((nmp->nm_flag & NFSMNT_SOFT) || (req->r_flags & R_SETUP)) &&
2306 ((req->r_rexmit >= req->r_retry) || /* too many */
2307 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */
2308 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
2309 lck_mtx_lock(&nmp->nm_lock);
2310 if (!(nmp->nm_state & NFSSTA_TIMEO)) {
2311 lck_mtx_unlock(&nmp->nm_lock);
2312 /* make sure we note the unresponsive server */
2313 /* (maxtime may be less than tprintf delay) */
2314 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
2315 "not responding");
2316 req->r_lastmsg = now.tv_sec;
2317 req->r_flags |= R_TPRINTFMSG;
2318 } else {
2319 lck_mtx_unlock(&nmp->nm_lock);
2320 }
2321 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
2322 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt,
2323 now.tv_sec - req->r_start));
2324 nfs_softterm(req);
2325 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
2326 wakeup(req);
2327 lck_mtx_unlock(&req->r_mtx);
2328 if (finish_asyncio)
2329 nfs_asyncio_finish(req);
2330 continue;
2331 }
2332
2333 /* for TCP, only resend if explicitly requested */
2334 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) {
2335 if (++req->r_rexmit > NFS_MAXREXMIT)
2336 req->r_rexmit = NFS_MAXREXMIT;
2337 req->r_rtt = 0;
2338 lck_mtx_unlock(&req->r_mtx);
2339 continue;
2340 }
2341
2342 /*
2343 * The request needs to be (re)sent. Kick the requester to resend it.
2344 * (unless it's already marked as needing a resend)
2345 */
2346 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) {
2347 lck_mtx_unlock(&req->r_mtx);
2348 continue;
2349 }
2350 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
2351 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
2352 req->r_flags |= R_MUSTRESEND;
2353 req->r_rtt = -1;
2354 wakeup(req);
2355 if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
2356 nfs_asyncio_resend(req);
2357 lck_mtx_unlock(&req->r_mtx);
2358 }
2359
2360 lck_mtx_unlock(nfs_request_mutex);
2361
2362 /* poke any sockets */
2363 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) {
2364 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
2365 nfs_sock_poke(nmp);
2366 lck_mtx_lock(&nmp->nm_lock);
2367 nmp->nm_sockflags &= ~NMSOCK_POKE;
2368 if (!(nmp->nm_state & NFSSTA_MOUNTED))
2369 wakeup(&nmp->nm_sockflags);
2370 lck_mtx_unlock(&nmp->nm_lock);
2371 }
2372
2373 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY);
2374 }
2375
2376 /*
2377 * check a thread's proc for the "noremotehang" flag.
2378 */
2379 int
2380 nfs_noremotehang(thread_t thd)
2381 {
2382 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
2383 return (p && proc_noremotehang(p));
2384 }
2385
2386 /*
2387 * Test for a termination condition pending on the process.
2388 * This is used to determine if we need to bail on a mount.
2389 * ETIMEDOUT is returned if there has been a soft timeout.
2390 * EINTR is returned if there is a signal pending that is not being ignored
2391 * and the mount is interruptable, or if we are a thread that is in the process
2392 * of cancellation (also SIGKILL posted).
2393 */
2394 int
2395 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked)
2396 {
2397 int error = 0;
2398
2399 if (nmp == NULL)
2400 return (ENXIO);
2401
2402 if (req && (req->r_flags & R_SOFTTERM))
2403 return (ETIMEDOUT); /* request has been terminated. */
2404
2405 /*
2406 * If we're in the progress of a force unmount and there's
2407 * been a timeout, we're dead and fail IO.
2408 */
2409 if (!nmplocked)
2410 lck_mtx_lock(&nmp->nm_lock);
2411 if ((nmp->nm_state & NFSSTA_FORCE) &&
2412 (nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO|NFSSTA_LOCKTIMEO))) {
2413 error = EIO;
2414 } else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) {
2415 /* Someone is unmounting us, go soft and mark it. */
2416 nmp->nm_flag |= NFSMNT_SOFT;
2417 nmp->nm_state |= NFSSTA_FORCE;
2418 }
2419
2420 /*
2421 * If the mount is hung and we've requested not to hang
2422 * on remote filesystems, then bail now.
2423 */
2424 if (!error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd))
2425 error = EIO;
2426
2427 if (!nmplocked)
2428 lck_mtx_unlock(&nmp->nm_lock);
2429 if (error)
2430 return (error);
2431
2432 /* may not have a thread for async I/O */
2433 if (thd == NULL)
2434 return (0);
2435
2436 /* If this thread belongs to kernel task; then abort check is not needed */
2437 if ((current_proc() != kernproc) && current_thread_aborted())
2438 return (EINTR);
2439
2440 /* mask off thread and process blocked signals. */
2441 if ((nmp->nm_flag & NFSMNT_INT) &&
2442 proc_pendingsignals(get_bsdthreadtask_info(thd), NFSINT_SIGMASK))
2443 return (EINTR);
2444 return (0);
2445 }
2446
2447 /*
2448 * Lock a socket against others.
2449 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2450 * and also to avoid race conditions between the processes with nfs requests
2451 * in progress when a reconnect is necessary.
2452 */
2453 int
2454 nfs_sndlock(struct nfsreq *req)
2455 {
2456 struct nfsmount *nmp = req->r_nmp;
2457 int *statep;
2458 int error = 0, slpflag = 0;
2459 struct timespec ts = { 0, 0 };
2460
2461 if (nmp == NULL)
2462 return (ENXIO);
2463
2464 lck_mtx_lock(&nmp->nm_lock);
2465 statep = &nmp->nm_state;
2466
2467 if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
2468 slpflag = PCATCH;
2469 while (*statep & NFSSTA_SNDLOCK) {
2470 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1)))
2471 break;
2472 *statep |= NFSSTA_WANTSND;
2473 if (nfs_noremotehang(req->r_thread))
2474 ts.tv_sec = 1;
2475 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts);
2476 if (slpflag == PCATCH) {
2477 slpflag = 0;
2478 ts.tv_sec = 2;
2479 }
2480 }
2481 if (!error)
2482 *statep |= NFSSTA_SNDLOCK;
2483 lck_mtx_unlock(&nmp->nm_lock);
2484 return (error);
2485 }
2486
2487 /*
2488 * Unlock the stream socket for others.
2489 */
2490 void
2491 nfs_sndunlock(struct nfsreq *req)
2492 {
2493 struct nfsmount *nmp = req->r_nmp;
2494 int *statep, wake = 0;
2495
2496 if (nmp == NULL)
2497 return;
2498 lck_mtx_lock(&nmp->nm_lock);
2499 statep = &nmp->nm_state;
2500 if ((*statep & NFSSTA_SNDLOCK) == 0)
2501 panic("nfs sndunlock");
2502 *statep &= ~NFSSTA_SNDLOCK;
2503 if (*statep & NFSSTA_WANTSND) {
2504 *statep &= ~NFSSTA_WANTSND;
2505 wake = 1;
2506 }
2507 lck_mtx_unlock(&nmp->nm_lock);
2508 if (wake)
2509 wakeup(statep);
2510 }
2511
2512 #endif /* NFSCLIENT */
2513
2514 #if NFSSERVER
2515
2516 /*
2517 * Generate the rpc reply header
2518 * siz arg. is used to decide if adding a cluster is worthwhile
2519 */
2520 int
2521 nfsrv_rephead(
2522 struct nfsrv_descript *nd,
2523 __unused struct nfsrv_sock *slp,
2524 struct nfsm_chain *nmrepp,
2525 size_t siz)
2526 {
2527 mbuf_t mrep;
2528 u_long *tl;
2529 struct nfsm_chain nmrep;
2530 int err, error;
2531
2532 err = nd->nd_repstat;
2533 if (err && (nd->nd_vers == NFS_VER2))
2534 siz = 0;
2535
2536 /*
2537 * If this is a big reply, use a cluster else
2538 * try and leave leading space for the lower level headers.
2539 */
2540 siz += RPC_REPLYSIZ;
2541 if (siz >= nfs_mbuf_minclsize) {
2542 error = mbuf_getpacket(MBUF_WAITOK, &mrep);
2543 } else {
2544 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
2545 }
2546 if (error) {
2547 /* unable to allocate packet */
2548 /* XXX should we keep statistics for these errors? */
2549 return (error);
2550 }
2551 if (siz < nfs_mbuf_minclsize) {
2552 /* leave space for lower level headers */
2553 tl = mbuf_data(mrep);
2554 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */
2555 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
2556 }
2557 nfsm_chain_init(&nmrep, mrep);
2558 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
2559 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
2560 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
2561 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
2562 if (err & NFSERR_AUTHERR) {
2563 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
2564 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
2565 } else {
2566 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
2567 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2568 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2569 }
2570 } else {
2571 /* reply status */
2572 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
2573 if (nd->nd_gss_context != NULL) {
2574 /* RPCSEC_GSS verifier */
2575 error = nfs_gss_svc_verf_put(nd, &nmrep);
2576 if (error) {
2577 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
2578 goto done;
2579 }
2580 } else {
2581 /* RPCAUTH_NULL verifier */
2582 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
2583 nfsm_chain_add_32(error, &nmrep, 0);
2584 }
2585 /* accepted status */
2586 switch (err) {
2587 case EPROGUNAVAIL:
2588 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
2589 break;
2590 case EPROGMISMATCH:
2591 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
2592 /* XXX hard coded versions? */
2593 nfsm_chain_add_32(error, &nmrep, NFS_VER2);
2594 nfsm_chain_add_32(error, &nmrep, NFS_VER3);
2595 break;
2596 case EPROCUNAVAIL:
2597 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
2598 break;
2599 case EBADRPC:
2600 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
2601 break;
2602 default:
2603 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
2604 if (nd->nd_gss_context != NULL)
2605 error = nfs_gss_svc_prepare_reply(nd, &nmrep);
2606 if (err != NFSERR_RETVOID)
2607 nfsm_chain_add_32(error, &nmrep,
2608 (err ? nfsrv_errmap(nd, err) : 0));
2609 break;
2610 }
2611 }
2612
2613 done:
2614 nfsm_chain_build_done(error, &nmrep);
2615 if (error) {
2616 /* error composing reply header */
2617 /* XXX should we keep statistics for these errors? */
2618 mbuf_freem(mrep);
2619 return (error);
2620 }
2621
2622 *nmrepp = nmrep;
2623 if ((err != 0) && (err != NFSERR_RETVOID))
2624 OSAddAtomic(1, (SInt32*)&nfsstats.srvrpc_errs);
2625 return (0);
2626 }
2627
2628 /*
2629 * The nfs server send routine.
2630 *
2631 * - return EINTR or ERESTART if interrupted by a signal
2632 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
2633 * - do any cleanup required by recoverable socket errors (???)
2634 */
2635 int
2636 nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
2637 {
2638 int error;
2639 socket_t so = slp->ns_so;
2640 struct sockaddr *sendnam;
2641 struct msghdr msg;
2642
2643 bzero(&msg, sizeof(msg));
2644 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
2645 if ((sendnam = mbuf_data(nam))) {
2646 msg.msg_name = (caddr_t)sendnam;
2647 msg.msg_namelen = sendnam->sa_len;
2648 }
2649 }
2650 error = sock_sendmbuf(so, &msg, top, 0, NULL);
2651 if (!error)
2652 return (0);
2653 log(LOG_INFO, "nfsd send error %d\n", error);
2654
2655 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM))
2656 error = EPIPE; /* zap TCP sockets if they time out on send */
2657
2658 /* Handle any recoverable (soft) socket errors here. (???) */
2659 if (error != EINTR && error != ERESTART && error != EIO &&
2660 error != EWOULDBLOCK && error != EPIPE)
2661 error = 0;
2662
2663 return (error);
2664 }
2665
2666 /*
2667 * Socket upcall routine for the nfsd sockets.
2668 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
2669 * Essentially do as much as possible non-blocking, else punt and it will
2670 * be called with MBUF_WAITOK from an nfsd.
2671 */
2672 void
2673 nfsrv_rcv(socket_t so, caddr_t arg, int waitflag)
2674 {
2675 struct nfsrv_sock *slp = (struct nfsrv_sock *)arg;
2676
2677 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID))
2678 return;
2679
2680 lck_rw_lock_exclusive(&slp->ns_rwlock);
2681 nfsrv_rcv_locked(so, slp, waitflag);
2682 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
2683 }
2684 void
2685 nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
2686 {
2687 mbuf_t m, mp, mhck, m2;
2688 int ns_flag=0, error;
2689 struct msghdr msg;
2690 size_t bytes_read;
2691
2692 if ((slp->ns_flag & SLP_VALID) == 0) {
2693 if (waitflag == MBUF_DONTWAIT)
2694 lck_rw_done(&slp->ns_rwlock);
2695 return;
2696 }
2697
2698 #ifdef notdef
2699 /*
2700 * Define this to test for nfsds handling this under heavy load.
2701 */
2702 if (waitflag == MBUF_DONTWAIT) {
2703 ns_flag = SLP_NEEDQ;
2704 goto dorecs;
2705 }
2706 #endif
2707 if (slp->ns_sotype == SOCK_STREAM) {
2708 /*
2709 * If there are already records on the queue, defer soreceive()
2710 * to an nfsd so that there is feedback to the TCP layer that
2711 * the nfs servers are heavily loaded.
2712 */
2713 if (slp->ns_rec && waitflag == MBUF_DONTWAIT) {
2714 ns_flag = SLP_NEEDQ;
2715 goto dorecs;
2716 }
2717
2718 /*
2719 * Do soreceive().
2720 */
2721 bytes_read = 1000000000;
2722 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
2723 if (error || mp == NULL) {
2724 if (error == EWOULDBLOCK)
2725 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
2726 else
2727 ns_flag = SLP_DISCONN;
2728 goto dorecs;
2729 }
2730 m = mp;
2731 if (slp->ns_rawend) {
2732 if ((error = mbuf_setnext(slp->ns_rawend, m)))
2733 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
2734 slp->ns_cc += bytes_read;
2735 } else {
2736 slp->ns_raw = m;
2737 slp->ns_cc = bytes_read;
2738 }
2739 while ((m2 = mbuf_next(m)))
2740 m = m2;
2741 slp->ns_rawend = m;
2742
2743 /*
2744 * Now try and parse record(s) out of the raw stream data.
2745 */
2746 error = nfsrv_getstream(slp, waitflag);
2747 if (error) {
2748 if (error == EPERM)
2749 ns_flag = SLP_DISCONN;
2750 else
2751 ns_flag = SLP_NEEDQ;
2752 }
2753 } else {
2754 struct sockaddr_storage nam;
2755
2756 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
2757 /* already have max # RPC records queued on this socket */
2758 ns_flag = SLP_NEEDQ;
2759 goto dorecs;
2760 }
2761
2762 bzero(&msg, sizeof(msg));
2763 msg.msg_name = (caddr_t)&nam;
2764 msg.msg_namelen = sizeof(nam);
2765
2766 do {
2767 bytes_read = 1000000000;
2768 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
2769 if (mp) {
2770 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
2771 mbuf_setlen(mhck, nam.ss_len);
2772 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
2773 m = mhck;
2774 if (mbuf_setnext(m, mp)) {
2775 /* trouble... just drop it */
2776 printf("nfsrv_rcv: mbuf_setnext failed\n");
2777 mbuf_free(mhck);
2778 m = mp;
2779 }
2780 } else {
2781 m = mp;
2782 }
2783 if (slp->ns_recend)
2784 mbuf_setnextpkt(slp->ns_recend, m);
2785 else {
2786 slp->ns_rec = m;
2787 slp->ns_flag |= SLP_DOREC;
2788 }
2789 slp->ns_recend = m;
2790 mbuf_setnextpkt(m, NULL);
2791 slp->ns_reccnt++;
2792 }
2793 } while (mp);
2794 }
2795
2796 /*
2797 * Now try and process the request records, non-blocking.
2798 */
2799 dorecs:
2800 if (ns_flag)
2801 slp->ns_flag |= ns_flag;
2802 if (waitflag == MBUF_DONTWAIT) {
2803 int wake = (slp->ns_flag & SLP_WORKTODO);
2804 lck_rw_done(&slp->ns_rwlock);
2805 if (wake && nfsd_thread_count) {
2806 lck_mtx_lock(nfsd_mutex);
2807 nfsrv_wakenfsd(slp);
2808 lck_mtx_unlock(nfsd_mutex);
2809 }
2810 }
2811 }
2812
2813 /*
2814 * Try and extract an RPC request from the mbuf data list received on a
2815 * stream socket. The "waitflag" argument indicates whether or not it
2816 * can sleep.
2817 */
2818 static int
2819 nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
2820 {
2821 mbuf_t m;
2822 char *cp1, *cp2, *mdata;
2823 int len, mlen, error;
2824 mbuf_t om, m2, recm;
2825 u_long recmark;
2826
2827 if (slp->ns_flag & SLP_GETSTREAM)
2828 panic("nfs getstream");
2829 slp->ns_flag |= SLP_GETSTREAM;
2830 for (;;) {
2831 if (slp->ns_reclen == 0) {
2832 if (slp->ns_cc < NFSX_UNSIGNED) {
2833 slp->ns_flag &= ~SLP_GETSTREAM;
2834 return (0);
2835 }
2836 m = slp->ns_raw;
2837 mdata = mbuf_data(m);
2838 mlen = mbuf_len(m);
2839 if (mlen >= NFSX_UNSIGNED) {
2840 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
2841 mdata += NFSX_UNSIGNED;
2842 mlen -= NFSX_UNSIGNED;
2843 mbuf_setdata(m, mdata, mlen);
2844 } else {
2845 cp1 = (caddr_t)&recmark;
2846 cp2 = mdata;
2847 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2848 while (mlen == 0) {
2849 m = mbuf_next(m);
2850 cp2 = mbuf_data(m);
2851 mlen = mbuf_len(m);
2852 }
2853 *cp1++ = *cp2++;
2854 mlen--;
2855 mbuf_setdata(m, cp2, mlen);
2856 }
2857 }
2858 slp->ns_cc -= NFSX_UNSIGNED;
2859 recmark = ntohl(recmark);
2860 slp->ns_reclen = recmark & ~0x80000000;
2861 if (recmark & 0x80000000)
2862 slp->ns_flag |= SLP_LASTFRAG;
2863 else
2864 slp->ns_flag &= ~SLP_LASTFRAG;
2865 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
2866 slp->ns_flag &= ~SLP_GETSTREAM;
2867 return (EPERM);
2868 }
2869 }
2870
2871 /*
2872 * Now get the record part.
2873 *
2874 * Note that slp->ns_reclen may be 0. Linux sometimes
2875 * generates 0-length RPCs
2876 */
2877 recm = NULL;
2878 if (slp->ns_cc == slp->ns_reclen) {
2879 recm = slp->ns_raw;
2880 slp->ns_raw = slp->ns_rawend = NULL;
2881 slp->ns_cc = slp->ns_reclen = 0;
2882 } else if (slp->ns_cc > slp->ns_reclen) {
2883 len = 0;
2884 m = slp->ns_raw;
2885 mlen = mbuf_len(m);
2886 mdata = mbuf_data(m);
2887 om = NULL;
2888 while (len < slp->ns_reclen) {
2889 if ((len + mlen) > slp->ns_reclen) {
2890 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
2891 slp->ns_flag &= ~SLP_GETSTREAM;
2892 return (EWOULDBLOCK);
2893 }
2894 if (om) {
2895 if (mbuf_setnext(om, m2)) {
2896 /* trouble... just drop it */
2897 printf("nfsrv_getstream: mbuf_setnext failed\n");
2898 mbuf_freem(m2);
2899 slp->ns_flag &= ~SLP_GETSTREAM;
2900 return (EWOULDBLOCK);
2901 }
2902 recm = slp->ns_raw;
2903 } else {
2904 recm = m2;
2905 }
2906 mdata += slp->ns_reclen - len;
2907 mlen -= slp->ns_reclen - len;
2908 mbuf_setdata(m, mdata, mlen);
2909 len = slp->ns_reclen;
2910 } else if ((len + mlen) == slp->ns_reclen) {
2911 om = m;
2912 len += mlen;
2913 m = mbuf_next(m);
2914 recm = slp->ns_raw;
2915 if (mbuf_setnext(om, NULL)) {
2916 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
2917 slp->ns_flag &= ~SLP_GETSTREAM;
2918 return (EWOULDBLOCK);
2919 }
2920 mlen = mbuf_len(m);
2921 mdata = mbuf_data(m);
2922 } else {
2923 om = m;
2924 len += mlen;
2925 m = mbuf_next(m);
2926 mlen = mbuf_len(m);
2927 mdata = mbuf_data(m);
2928 }
2929 }
2930 slp->ns_raw = m;
2931 slp->ns_cc -= len;
2932 slp->ns_reclen = 0;
2933 } else {
2934 slp->ns_flag &= ~SLP_GETSTREAM;
2935 return (0);
2936 }
2937
2938 /*
2939 * Accumulate the fragments into a record.
2940 */
2941 if (slp->ns_frag == NULL) {
2942 slp->ns_frag = recm;
2943 } else {
2944 m = slp->ns_frag;
2945 while ((m2 = mbuf_next(m)))
2946 m = m2;
2947 if ((error = mbuf_setnext(m, recm)))
2948 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
2949 }
2950 if (slp->ns_flag & SLP_LASTFRAG) {
2951 if (slp->ns_recend)
2952 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
2953 else {
2954 slp->ns_rec = slp->ns_frag;
2955 slp->ns_flag |= SLP_DOREC;
2956 }
2957 slp->ns_recend = slp->ns_frag;
2958 slp->ns_frag = NULL;
2959 }
2960 }
2961 }
2962
2963 /*
2964 * Parse an RPC header.
2965 */
2966 int
2967 nfsrv_dorec(
2968 struct nfsrv_sock *slp,
2969 struct nfsd *nfsd,
2970 struct nfsrv_descript **ndp)
2971 {
2972 mbuf_t m;
2973 mbuf_t nam;
2974 struct nfsrv_descript *nd;
2975 int error = 0;
2976
2977 *ndp = NULL;
2978 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL))
2979 return (ENOBUFS);
2980 MALLOC_ZONE(nd, struct nfsrv_descript *,
2981 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2982 if (!nd)
2983 return (ENOMEM);
2984 m = slp->ns_rec;
2985 slp->ns_rec = mbuf_nextpkt(m);
2986 if (slp->ns_rec)
2987 mbuf_setnextpkt(m, NULL);
2988 else {
2989 slp->ns_flag &= ~SLP_DOREC;
2990 slp->ns_recend = NULL;
2991 }
2992 slp->ns_reccnt--;
2993 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
2994 nam = m;
2995 m = mbuf_next(m);
2996 if ((error = mbuf_setnext(nam, NULL)))
2997 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
2998 } else
2999 nam = NULL;
3000 nd->nd_nam2 = nam;
3001 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
3002 if (!error)
3003 error = nfsrv_getreq(nd);
3004 if (error) {
3005 if (nam)
3006 mbuf_freem(nam);
3007 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
3008 return (error);
3009 }
3010 nd->nd_mrep = NULL;
3011 *ndp = nd;
3012 nfsd->nfsd_nd = nd;
3013 return (0);
3014 }
3015
3016 /*
3017 * Parse an RPC request
3018 * - verify it
3019 * - fill in the cred struct.
3020 */
3021 static int
3022 nfsrv_getreq(struct nfsrv_descript *nd)
3023 {
3024 struct nfsm_chain *nmreq;
3025 int len, i;
3026 u_long nfsvers, auth_type;
3027 int error = 0;
3028 uid_t user_id;
3029 gid_t group_id;
3030 int ngroups;
3031 struct ucred temp_cred;
3032 uint32_t val;
3033
3034 nd->nd_cr = NULL;
3035 nd->nd_gss_context = NULL;
3036 nd->nd_gss_seqnum = 0;
3037 nd->nd_gss_mb = NULL;
3038
3039 user_id = group_id = -2;
3040 val = auth_type = len = 0;
3041
3042 nmreq = &nd->nd_nmreq;
3043 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
3044 nfsm_chain_get_32(error, nmreq, val); // RPC Call
3045 if (!error && (val != RPC_CALL))
3046 error = EBADRPC;
3047 nfsmout_if(error);
3048 nd->nd_repstat = 0;
3049 nfsm_chain_get_32(error, nmreq, val); // RPC Version
3050 nfsmout_if(error);
3051 if (val != RPC_VER2) {
3052 nd->nd_repstat = ERPCMISMATCH;
3053 nd->nd_procnum = NFSPROC_NOOP;
3054 return (0);
3055 }
3056 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number
3057 nfsmout_if(error);
3058 if (val != NFS_PROG) {
3059 nd->nd_repstat = EPROGUNAVAIL;
3060 nd->nd_procnum = NFSPROC_NOOP;
3061 return (0);
3062 }
3063 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
3064 nfsmout_if(error);
3065 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
3066 nd->nd_repstat = EPROGMISMATCH;
3067 nd->nd_procnum = NFSPROC_NOOP;
3068 return (0);
3069 }
3070 nd->nd_vers = nfsvers;
3071 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
3072 nfsmout_if(error);
3073 if ((nd->nd_procnum >= NFS_NPROCS) ||
3074 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
3075 nd->nd_repstat = EPROCUNAVAIL;
3076 nd->nd_procnum = NFSPROC_NOOP;
3077 return (0);
3078 }
3079 if (nfsvers != NFS_VER3)
3080 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
3081 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor
3082 nfsm_chain_get_32(error, nmreq, len); // Auth Length
3083 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ))
3084 error = EBADRPC;
3085 nfsmout_if(error);
3086
3087 /* Handle authentication */
3088 if (auth_type == RPCAUTH_UNIX) {
3089 if (nd->nd_procnum == NFSPROC_NULL)
3090 return (0);
3091 nd->nd_sec = RPCAUTH_UNIX;
3092 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp
3093 nfsm_chain_get_32(error, nmreq, len); // hostname length
3094 if (len < 0 || len > NFS_MAXNAMLEN)
3095 error = EBADRPC;
3096 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname
3097 nfsmout_if(error);
3098
3099 /* create a temporary credential using the bits from the wire */
3100 bzero(&temp_cred, sizeof(temp_cred));
3101 nfsm_chain_get_32(error, nmreq, user_id);
3102 nfsm_chain_get_32(error, nmreq, group_id);
3103 temp_cred.cr_groups[0] = group_id;
3104 nfsm_chain_get_32(error, nmreq, len); // extra GID count
3105 if ((len < 0) || (len > RPCAUTH_UNIXGIDS))
3106 error = EBADRPC;
3107 nfsmout_if(error);
3108 for (i = 1; i <= len; i++)
3109 if (i < NGROUPS)
3110 nfsm_chain_get_32(error, nmreq, temp_cred.cr_groups[i]);
3111 else
3112 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
3113 nfsmout_if(error);
3114 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
3115 if (ngroups > 1)
3116 nfsrv_group_sort(&temp_cred.cr_groups[0], ngroups);
3117 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
3118 nfsm_chain_get_32(error, nmreq, len); // verifier length
3119 if (len < 0 || len > RPCAUTH_MAXSIZ)
3120 error = EBADRPC;
3121 if (len > 0)
3122 nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
3123
3124 /* request creation of a real credential */
3125 temp_cred.cr_uid = user_id;
3126 temp_cred.cr_ngroups = ngroups;
3127 nd->nd_cr = kauth_cred_create(&temp_cred);
3128 if (nd->nd_cr == NULL) {
3129 nd->nd_repstat = ENOMEM;
3130 nd->nd_procnum = NFSPROC_NOOP;
3131 return (0);
3132 }
3133 } else if (auth_type == RPCSEC_GSS) {
3134 error = nfs_gss_svc_cred_get(nd, nmreq);
3135 if (error) {
3136 if (error == EINVAL)
3137 goto nfsmout; // drop the request
3138 nd->nd_repstat = error;
3139 nd->nd_procnum = NFSPROC_NOOP;
3140 return (0);
3141 }
3142 } else {
3143 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE
3144 return (0);
3145 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
3146 nd->nd_procnum = NFSPROC_NOOP;
3147 return (0);
3148 }
3149 return (0);
3150 nfsmout:
3151 if (IS_VALID_CRED(nd->nd_cr))
3152 kauth_cred_unref(&nd->nd_cr);
3153 nfsm_chain_cleanup(nmreq);
3154 return (error);
3155 }
3156
3157 /*
3158 * Search for a sleeping nfsd and wake it up.
3159 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
3160 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
3161 * Note: Must be called with nfsd_mutex held.
3162 */
3163 void
3164 nfsrv_wakenfsd(struct nfsrv_sock *slp)
3165 {
3166 struct nfsd *nd;
3167
3168 if ((slp->ns_flag & SLP_VALID) == 0)
3169 return;
3170
3171 lck_rw_lock_exclusive(&slp->ns_rwlock);
3172 /* if there's work to do on this socket, make sure it's queued up */
3173 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
3174 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
3175 slp->ns_flag |= SLP_WAITQ;
3176 }
3177 lck_rw_done(&slp->ns_rwlock);
3178
3179 /* wake up a waiting nfsd, if possible */
3180 nd = TAILQ_FIRST(&nfsd_queue);
3181 if (!nd)
3182 return;
3183
3184 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
3185 nd->nfsd_flag &= ~NFSD_WAITING;
3186 wakeup(nd);
3187 }
3188
3189 #endif /* NFSSERVER */
3190
3191 static int
3192 nfs_msg(thread_t thd,
3193 const char *server,
3194 const char *msg,
3195 int error)
3196 {
3197 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
3198 tpr_t tpr;
3199
3200 if (p)
3201 tpr = tprintf_open(p);
3202 else
3203 tpr = NULL;
3204 if (error)
3205 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error);
3206 else
3207 tprintf(tpr, "nfs server %s: %s\n", server, msg);
3208 tprintf_close(tpr);
3209 return (0);
3210 }
3211
3212 void
3213 nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg)
3214 {
3215 int ostate;
3216
3217 if (nmp == NULL)
3218 return;
3219
3220 lck_mtx_lock(&nmp->nm_lock);
3221 ostate = nmp->nm_state;
3222 if ((flags & NFSSTA_TIMEO) && !(ostate & NFSSTA_TIMEO))
3223 nmp->nm_state |= NFSSTA_TIMEO;
3224 if ((flags & NFSSTA_LOCKTIMEO) && !(ostate & NFSSTA_LOCKTIMEO))
3225 nmp->nm_state |= NFSSTA_LOCKTIMEO;
3226 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(ostate & NFSSTA_JUKEBOXTIMEO))
3227 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO;
3228 lck_mtx_unlock(&nmp->nm_lock);
3229
3230 if (!(ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)))
3231 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 0);
3232
3233 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
3234 }
3235
3236 void
3237 nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
3238 {
3239 int ostate, state;
3240
3241 if (nmp == NULL)
3242 return;
3243
3244 if (msg)
3245 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
3246
3247 lck_mtx_lock(&nmp->nm_lock);
3248 ostate = nmp->nm_state;
3249 if ((flags & NFSSTA_TIMEO) && (ostate & NFSSTA_TIMEO))
3250 nmp->nm_state &= ~NFSSTA_TIMEO;
3251 if ((flags & NFSSTA_LOCKTIMEO) && (ostate & NFSSTA_LOCKTIMEO))
3252 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
3253 if ((flags & NFSSTA_JUKEBOXTIMEO) && (ostate & NFSSTA_JUKEBOXTIMEO))
3254 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO;
3255 state = nmp->nm_state;
3256 lck_mtx_unlock(&nmp->nm_lock);
3257
3258 if ((ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)) &&
3259 !(state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)))
3260 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
3261 }
3262