]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
xnu-1228.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66 */
67
68 /*
69 * Socket operations for use by nfs
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/proc.h>
75 #include <sys/kauth.h>
76 #include <sys/mount_internal.h>
77 #include <sys/kernel.h>
78 #include <sys/kpi_mbuf.h>
79 #include <sys/malloc.h>
80 #include <sys/vnode.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/socket.h>
84 #include <sys/syslog.h>
85 #include <sys/tprintf.h>
86 #include <sys/uio_internal.h>
87 #include <libkern/OSAtomic.h>
88
89 #include <sys/time.h>
90 #include <kern/clock.h>
91 #include <kern/task.h>
92 #include <kern/thread.h>
93 #include <kern/thread_call.h>
94 #include <sys/user.h>
95
96 #include <netinet/in.h>
97 #include <netinet/tcp.h>
98
99 #include <nfs/rpcv2.h>
100 #include <nfs/nfsproto.h>
101 #include <nfs/nfs.h>
102 #include <nfs/xdr_subs.h>
103 #include <nfs/nfsm_subs.h>
104 #include <nfs/nfs_gss.h>
105 #include <nfs/nfsmount.h>
106 #include <nfs/nfsnode.h>
107
108 /* XXX */
109 boolean_t current_thread_aborted(void);
110 kern_return_t thread_terminate(thread_t);
111
112
113 #if NFSSERVER
114 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
115
116 static int nfsrv_getstream(struct nfsrv_sock *,int);
117 static int nfsrv_getreq(struct nfsrv_descript *);
118 extern int nfsv3_procid[NFS_NPROCS];
119 #endif /* NFSSERVER */
120
121 #if NFSCLIENT
122
123 static int nfs_connect_setup(struct nfsmount *);
124 static void nfs_reqdequeue(struct nfsreq *);
125 static void nfs_udp_rcv(socket_t, void*, int);
126 static void nfs_tcp_rcv(socket_t, void*, int);
127 static void nfs_request_match_reply(struct nfsmount *, mbuf_t);
128 static void nfs_softterm(struct nfsreq *);
129
130 #ifdef NFS_SOCKET_DEBUGGING
131 #define NFS_SOCK_DBG(X) printf X
132 #else
133 #define NFS_SOCK_DBG(X)
134 #endif
135
136 /*
137 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
138 * Use the mean and mean deviation of rtt for the appropriate type of rpc
139 * for the frequent rpcs and a default for the others.
140 * The justification for doing "other" this way is that these rpcs
141 * happen so infrequently that timer est. would probably be stale.
142 * Also, since many of these rpcs are
143 * non-idempotent, a conservative timeout is desired.
144 * getattr, lookup - A+2D
145 * read, write - A+4D
146 * other - nm_timeo
147 */
148 #define NFS_RTO(n, t) \
149 ((t) == 0 ? (n)->nm_timeo : \
150 ((t) < 3 ? \
151 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
152 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
153 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
154 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
155
156 /*
157 * Defines which timer to use for the procnum.
158 * 0 - default
159 * 1 - getattr
160 * 2 - lookup
161 * 3 - read
162 * 4 - write
163 */
164 static int proct[NFS_NPROCS] = {
165 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
166 };
167
168 /*
169 * There is a congestion window for outstanding rpcs maintained per mount
170 * point. The cwnd size is adjusted in roughly the way that:
171 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
172 * SIGCOMM '88". ACM, August 1988.
173 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
174 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
175 * of rpcs is in progress.
176 * (The sent count and cwnd are scaled for integer arith.)
177 * Variants of "slow start" were tried and were found to be too much of a
178 * performance hit (ave. rtt 3 times larger),
179 * I suspect due to the large rtt that nfs rpcs have.
180 */
181 #define NFS_CWNDSCALE 256
182 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
183 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
184
185 /*
186 * Initialize socket state and perform setup for a new NFS connection.
187 */
188 int
189 nfs_connect(struct nfsmount *nmp)
190 {
191 socket_t so;
192 int error, on = 1, proto;
193 sock_upcall upcall;
194 struct sockaddr *saddr;
195 struct sockaddr_in sin;
196 struct timeval timeo;
197 u_short tport;
198
199 lck_mtx_lock(&nmp->nm_lock);
200 nmp->nm_sockflags |= NMSOCK_CONNECTING;
201 saddr = mbuf_data(nmp->nm_nam);
202 upcall = (nmp->nm_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
203 lck_mtx_unlock(&nmp->nm_lock);
204 error = sock_socket(saddr->sa_family, nmp->nm_sotype,
205 nmp->nm_soproto, upcall, nmp, &nmp->nm_so);
206 if (error)
207 goto bad;
208 lck_mtx_lock(&nmp->nm_lock);
209 so = nmp->nm_so;
210
211 /*
212 * Some servers require that the client port be a reserved port number.
213 */
214 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
215 lck_mtx_unlock(&nmp->nm_lock);
216 sin.sin_len = sizeof (struct sockaddr_in);
217 sin.sin_family = AF_INET;
218 sin.sin_addr.s_addr = INADDR_ANY;
219 tport = IPPORT_RESERVED - 1;
220 sin.sin_port = htons(tport);
221 while (((error = sock_bind(so, (struct sockaddr *) &sin)) == EADDRINUSE) &&
222 (--tport > IPPORT_RESERVED / 2))
223 sin.sin_port = htons(tport);
224 if (error)
225 goto bad;
226 lck_mtx_lock(&nmp->nm_lock);
227 }
228
229 /*
230 * Protocols that do not require connections may be optionally left
231 * unconnected for servers that reply from a different address/port.
232 */
233 if (nmp->nm_flag & NFSMNT_NOCONN) {
234 if (nmp->nm_sotype == SOCK_STREAM) {
235 error = ENOTCONN;
236 lck_mtx_unlock(&nmp->nm_lock);
237 goto bad;
238 }
239 } else {
240 int tocnt = 0, optlen = sizeof(error);
241 struct timespec ts = { 2, 0 };
242
243 lck_mtx_unlock(&nmp->nm_lock);
244 error = sock_connect(so, mbuf_data(nmp->nm_nam), MSG_DONTWAIT);
245 if (error && (error != EINPROGRESS))
246 goto bad;
247 lck_mtx_lock(&nmp->nm_lock);
248 while (!sock_isconnected(so)) {
249 if (tocnt++ == 15) /* log a warning if connect is taking a while */
250 log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n",
251 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
252 /* check for error on socket */
253 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &error, &optlen);
254 if (error) {
255 log(LOG_INFO, "nfs_connect: socket error %d for %s\n",
256 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
257 break;
258 }
259 if (tocnt > 60) {
260 /* abort if this is taking too long */
261 error = ENOTCONN;
262 break;
263 }
264 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
265 break;
266 error = msleep(&nmp->nm_so, &nmp->nm_lock, PSOCK, "nfs_socket_connect", &ts);
267 if (error == EWOULDBLOCK)
268 error = 0;
269 if (error)
270 break;
271 }
272 if (tocnt > 15)
273 log(LOG_INFO, "nfs_connect: socket connect %s for %s\n",
274 error ? "aborted" : "completed",
275 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
276 if (error) {
277 lck_mtx_unlock(&nmp->nm_lock);
278 goto bad;
279 }
280 }
281
282 /*
283 * Set socket send/receive timeouts
284 * - Receive timeout shouldn't matter because all receives are performed
285 * in the socket upcall non-blocking.
286 * - Send timeout should allow us to react to a blocked socket.
287 * Soft mounts will want to abort sooner.
288 */
289 timeo.tv_usec = 0;
290 timeo.tv_sec = (nmp->nm_flag & NFSMNT_SOFT) ? 10 : 60;
291 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
292 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
293 if (error) {
294 log(LOG_INFO, "nfs_connect: socket timeout setting errors for %s\n",
295 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
296 error = 0;
297 }
298
299 if (nmp->nm_sotype == SOCK_STREAM) {
300 /* Assume that SOCK_STREAM always requires a connection */
301 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
302 /* set nodelay for TCP */
303 sock_gettype(so, NULL, NULL, &proto);
304 if (proto == IPPROTO_TCP)
305 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
306 }
307
308 if (nmp->nm_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
309 int reserve = NFS_UDPSOCKBUF;
310 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
311 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
312 if (error) {
313 log(LOG_INFO, "nfs_connect: socket buffer setting errors for %s\n",
314 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
315 error = 0;
316 }
317 }
318
319 /* set SO_NOADDRERR to detect network changes ASAP */
320 error = sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
321 if (error) {
322 lck_mtx_unlock(&nmp->nm_lock);
323 goto bad;
324 }
325
326 if (!(nmp->nm_flag & NFSMNT_INT))
327 sock_nointerrupt(so, 1);
328
329 /* Initialize socket state variables */
330 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
331 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
332 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
333 nmp->nm_sdrtt[3] = 0;
334 if (nmp->nm_sotype == SOCK_DGRAM) {
335 /* XXX do we really want to reset this on each reconnect? */
336 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
337 nmp->nm_sent = 0;
338 } else if (nmp->nm_sotype == SOCK_STREAM) {
339 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
340 nmp->nm_fragleft = nmp->nm_reclen = 0;
341 nmp->nm_timeouts = 0;
342 }
343 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
344 nmp->nm_sockflags |= NMSOCK_SETUP;
345 FSDBG(529, nmp, nmp->nm_state, nmp->nm_flag, nmp->nm_cwnd);
346 lck_mtx_unlock(&nmp->nm_lock);
347 error = nfs_connect_setup(nmp);
348 bad:
349 lck_mtx_lock(&nmp->nm_lock);
350 nmp->nm_sockflags &= ~(NMSOCK_CONNECTING|NMSOCK_SETUP);
351 if (!error) {
352 nmp->nm_sockflags |= NMSOCK_READY;
353 wakeup(&nmp->nm_sockflags);
354 }
355 lck_mtx_unlock(&nmp->nm_lock);
356 if (error)
357 nfs_disconnect(nmp);
358 return (error);
359 }
360
361 /* setup & confirm socket connection is functional */
362 static int
363 nfs_connect_setup(struct nfsmount *nmp)
364 {
365 struct nfsm_chain nmreq, nmrep;
366 int error = 0, status;
367 u_int64_t xid;
368
369 if (nmp->nm_vers >= NFS_VER4) {
370 error = nfs4_setclientid(nmp);
371 } else {
372 /* verify connection's OK by sending a NULL request */
373 nfsm_chain_null(&nmreq);
374 nfsm_chain_null(&nmrep);
375 nfsm_chain_build_alloc_init(error, &nmreq, 0);
376 nfsm_chain_build_done(error, &nmreq);
377 nfsmout_if(error);
378 error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC_NULL,
379 current_thread(), NULL, R_SETUP, &nmrep, &xid, &status);
380 if (!error)
381 error = status;
382 nfsmout:
383 nfsm_chain_cleanup(&nmreq);
384 nfsm_chain_cleanup(&nmrep);
385 }
386 return (error);
387 }
388
389 /*
390 * NFS socket reconnect routine:
391 * Called when a connection is broken.
392 * - disconnect the old socket
393 * - nfs_connect() again
394 * - set R_MUSTRESEND for all outstanding requests on mount point
395 * If this fails the mount point is DEAD!
396 */
397 static int
398 nfs_reconnect(struct nfsmount *nmp)
399 {
400 struct nfsreq *rq;
401 struct timeval now;
402 thread_t thd = current_thread();
403 int error, lastmsg, wentdown = 0;
404
405 microuptime(&now);
406 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay);
407
408 nfs_disconnect(nmp);
409
410 while ((error = nfs_connect(nmp))) {
411 if (error == EINTR || error == ERESTART)
412 return (EINTR);
413 if (error == EIO)
414 return (EIO);
415 microuptime(&now);
416 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) {
417 lastmsg = now.tv_sec;
418 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect");
419 wentdown = 1;
420 }
421 lck_mtx_lock(&nmp->nm_lock);
422 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
423 /* we're not yet completely mounted and */
424 /* we can't reconnect, so we fail */
425 lck_mtx_unlock(&nmp->nm_lock);
426 return (error);
427 }
428 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) {
429 lck_mtx_unlock(&nmp->nm_lock);
430 return (error);
431 }
432 lck_mtx_unlock(&nmp->nm_lock);
433 tsleep(&lbolt, PSOCK, "nfs_reconnect_delay", 0);
434 if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
435 return (error);
436 }
437
438 if (wentdown)
439 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected");
440
441 /*
442 * Loop through outstanding request list and mark all requests
443 * as needing a resend. (Though nfs_need_reconnect() probably
444 * marked them all already.)
445 */
446 lck_mtx_lock(nfs_request_mutex);
447 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
448 if (rq->r_nmp == nmp) {
449 lck_mtx_lock(&rq->r_mtx);
450 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
451 rq->r_flags |= R_MUSTRESEND;
452 rq->r_rtt = -1;
453 wakeup(rq);
454 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
455 nfs_asyncio_resend(rq);
456 }
457 lck_mtx_unlock(&rq->r_mtx);
458 }
459 }
460 lck_mtx_unlock(nfs_request_mutex);
461 return (0);
462 }
463
464 /*
465 * NFS disconnect. Clean up and unlink.
466 */
467 void
468 nfs_disconnect(struct nfsmount *nmp)
469 {
470 socket_t so;
471
472 lck_mtx_lock(&nmp->nm_lock);
473 if ((nmp->nm_sotype == SOCK_STREAM) && nmp->nm_m) {
474 mbuf_freem(nmp->nm_m);
475 nmp->nm_m = nmp->nm_mlast = NULL;
476 }
477 if (nmp->nm_so) {
478 so = nmp->nm_so;
479 nmp->nm_so = NULL;
480 lck_mtx_unlock(&nmp->nm_lock);
481 sock_shutdown(so, SHUT_RDWR);
482 sock_close(so);
483 } else {
484 lck_mtx_unlock(&nmp->nm_lock);
485 }
486 }
487
488 /*
489 * mark an NFS mount as needing a reconnect/resends.
490 */
491 static void
492 nfs_need_reconnect(struct nfsmount *nmp)
493 {
494 struct nfsreq *rq;
495
496 lck_mtx_lock(&nmp->nm_lock);
497 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP);
498 lck_mtx_unlock(&nmp->nm_lock);
499
500 /*
501 * Loop through outstanding request list and
502 * mark all requests as needing a resend.
503 */
504 lck_mtx_lock(nfs_request_mutex);
505 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
506 if (rq->r_nmp == nmp) {
507 lck_mtx_lock(&rq->r_mtx);
508 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
509 rq->r_flags |= R_MUSTRESEND;
510 rq->r_rtt = -1;
511 wakeup(rq);
512 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
513 nfs_asyncio_resend(rq);
514 }
515 lck_mtx_unlock(&rq->r_mtx);
516 }
517 }
518 lck_mtx_unlock(nfs_request_mutex);
519 }
520
521 /*
522 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
523 */
524 static void
525 nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
526 {
527 struct nfsmount *nmp = arg;
528 struct timespec ts = { 30, 0 };
529 thread_t thd = current_thread();
530 struct nfsreq *req;
531 struct timeval now;
532 int error, dofinish, force;
533
534 lck_mtx_lock(&nmp->nm_lock);
535
536 while (!(nmp->nm_sockflags & NMSOCK_READY) || !TAILQ_EMPTY(&nmp->nm_resendq)) {
537 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
538 break;
539 force = (nmp->nm_state & NFSSTA_FORCE);
540 /* do reconnect, if necessary */
541 if (!(nmp->nm_sockflags & NMSOCK_READY) && !force) {
542 if (nmp->nm_reconnect_start <= 0) {
543 microuptime(&now);
544 nmp->nm_reconnect_start = now.tv_sec;
545 }
546 lck_mtx_unlock(&nmp->nm_lock);
547 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname));
548 if ((error = nfs_reconnect(nmp)))
549 printf("nfs_reconnect failed %d for %s\n", error,
550 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
551 else
552 nmp->nm_reconnect_start = 0;
553 lck_mtx_lock(&nmp->nm_lock);
554 }
555 /* do resends, if necessary/possible */
556 while (((nmp->nm_sockflags & NMSOCK_READY) || force) && ((req = TAILQ_FIRST(&nmp->nm_resendq)))) {
557 if (req->r_resendtime)
558 microuptime(&now);
559 while (req && !force && req->r_resendtime && (now.tv_sec < req->r_resendtime))
560 req = TAILQ_NEXT(req, r_rchain);
561 if (!req)
562 break;
563 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
564 req->r_rchain.tqe_next = NFSREQNOLIST;
565 lck_mtx_unlock(&nmp->nm_lock);
566 lck_mtx_lock(&req->r_mtx);
567 if (req->r_error || req->r_nmrep.nmc_mhead) {
568 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
569 req->r_flags &= ~R_RESENDQ;
570 wakeup(req);
571 lck_mtx_unlock(&req->r_mtx);
572 if (dofinish)
573 nfs_asyncio_finish(req);
574 lck_mtx_lock(&nmp->nm_lock);
575 continue;
576 }
577 if ((req->r_flags & R_RESTART) || req->r_gss_ctx) {
578 req->r_flags &= ~R_RESTART;
579 req->r_resendtime = 0;
580 lck_mtx_unlock(&req->r_mtx);
581 /* async RPCs on GSS mounts need to be rebuilt and resent. */
582 nfs_reqdequeue(req);
583 if (req->r_gss_ctx) {
584 nfs_gss_clnt_rpcdone(req);
585 error = nfs_gss_clnt_args_restore(req);
586 if (error == ENEEDAUTH)
587 req->r_xid = 0;
588 }
589 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
590 req->r_gss_ctx ? " gss" : "", req->r_procnum, req->r_xid,
591 req->r_flags, req->r_rtt));
592 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
593 if (!error)
594 error = nfs_sigintr(nmp, req, req->r_thread, 0);
595 if (!error)
596 error = nfs_request_add_header(req);
597 if (!error)
598 error = nfs_request_send(req, 0);
599 lck_mtx_lock(&req->r_mtx);
600 if (req->r_rchain.tqe_next == NFSREQNOLIST)
601 req->r_flags &= ~R_RESENDQ;
602 if (error)
603 req->r_error = error;
604 wakeup(req);
605 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
606 lck_mtx_unlock(&req->r_mtx);
607 if (dofinish)
608 nfs_asyncio_finish(req);
609 lck_mtx_lock(&nmp->nm_lock);
610 error = 0;
611 continue;
612 }
613 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
614 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
615 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
616 if (!error)
617 error = nfs_sigintr(nmp, req, req->r_thread, 0);
618 if (!error) {
619 lck_mtx_unlock(&req->r_mtx);
620 error = nfs_send(req, 0);
621 lck_mtx_lock(&req->r_mtx);
622 if (!error) {
623 if (req->r_rchain.tqe_next == NFSREQNOLIST)
624 req->r_flags &= ~R_RESENDQ;
625 wakeup(req);
626 lck_mtx_unlock(&req->r_mtx);
627 lck_mtx_lock(&nmp->nm_lock);
628 continue;
629 }
630 }
631 req->r_error = error;
632 if (req->r_rchain.tqe_next == NFSREQNOLIST)
633 req->r_flags &= ~R_RESENDQ;
634 wakeup(req);
635 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
636 lck_mtx_unlock(&req->r_mtx);
637 if (dofinish)
638 nfs_asyncio_finish(req);
639 lck_mtx_lock(&nmp->nm_lock);
640 }
641 if (nmp->nm_sockflags & NMSOCK_READY) {
642 ts.tv_sec = TAILQ_EMPTY(&nmp->nm_resendq) ? 30 : 1;
643 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts);
644 } else if (force)
645 break;
646 }
647
648 if (nmp->nm_sockthd == thd)
649 nmp->nm_sockthd = NULL;
650 lck_mtx_unlock(&nmp->nm_lock);
651 wakeup(&nmp->nm_sockthd);
652 thread_terminate(thd);
653 }
654
655 /* start or wake a mount's socket thread */
656 void
657 nfs_mount_sock_thread_wake(struct nfsmount *nmp)
658 {
659 if (nmp->nm_sockthd)
660 wakeup(&nmp->nm_sockthd);
661 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS)
662 thread_deallocate(nmp->nm_sockthd);
663 }
664
665 /*
666 * The NFS client send routine.
667 *
668 * Send the given NFS request out the mount's socket.
669 * Holds nfs_sndlock() for the duration of this call.
670 *
671 * - check for request termination (sigintr)
672 * - perform reconnect, if necessary
673 * - UDP: check the congestion window
674 * - make a copy of the request to send
675 * - UDP: update the congestion window
676 * - send the request
677 *
678 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
679 * rexmit count is also updated if this isn't the first send.
680 *
681 * If the send is not successful, make sure R_MUSTRESEND is set.
682 * If this wasn't the first transmit, set R_RESENDERR.
683 * Also, undo any UDP congestion window changes made.
684 *
685 * If the error appears to indicate that the socket should
686 * be reconnected, mark the socket for reconnection.
687 *
688 * Only return errors when the request should be aborted.
689 */
690 int
691 nfs_send(struct nfsreq *req, int wait)
692 {
693 struct nfsmount *nmp;
694 socket_t so;
695 int error, error2, sotype, rexmit, slpflag = PSOCK, needrecon;
696 struct msghdr msg;
697 struct sockaddr *sendnam;
698 mbuf_t mreqcopy;
699 size_t sentlen = 0;
700 struct timespec ts = { 2, 0 };
701
702 again:
703 error = nfs_sndlock(req);
704 if (error)
705 return (error);
706
707 error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
708 if (error) {
709 nfs_sndunlock(req);
710 return (error);
711 }
712 nmp = req->r_nmp;
713 sotype = nmp->nm_sotype;
714
715 if ((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) {
716 /* a setup RPC but we're not in SETUP... must need reconnect */
717 nfs_sndunlock(req);
718 return (EPIPE);
719 }
720
721 /* If the socket needs reconnection, do that now. */
722 /* wait until socket is ready - unless this request is part of setup */
723 lck_mtx_lock(&nmp->nm_lock);
724 if (!(nmp->nm_sockflags & NMSOCK_READY) &&
725 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) {
726 if (nmp->nm_flag & NFSMNT_INT)
727 slpflag |= PCATCH;
728 lck_mtx_unlock(&nmp->nm_lock);
729 nfs_sndunlock(req);
730 if (!wait) {
731 lck_mtx_lock(&req->r_mtx);
732 req->r_flags |= R_MUSTRESEND;
733 req->r_rtt = 0;
734 lck_mtx_unlock(&req->r_mtx);
735 return (0);
736 }
737 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req->r_xid));
738 lck_mtx_lock(&req->r_mtx);
739 req->r_flags &= ~R_MUSTRESEND;
740 req->r_rtt = 0;
741 lck_mtx_unlock(&req->r_mtx);
742 lck_mtx_lock(&nmp->nm_lock);
743 while (!(nmp->nm_sockflags & NMSOCK_READY)) {
744 /* don't bother waiting if the socket thread won't be reconnecting it */
745 if (nmp->nm_state & NFSSTA_FORCE) {
746 error = EIO;
747 break;
748 }
749 /* make sure socket thread is running, then wait */
750 nfs_mount_sock_thread_wake(nmp);
751 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
752 break;
753 error = msleep(req, &nmp->nm_lock, slpflag, "nfsconnectwait", &ts);
754 if (error == EWOULDBLOCK)
755 error = 0;
756 if ((error == EINTR) || (error == ERESTART))
757 break;
758 }
759 lck_mtx_unlock(&nmp->nm_lock);
760 if (error)
761 return (error);
762 goto again;
763 }
764 so = nmp->nm_so;
765 lck_mtx_unlock(&nmp->nm_lock);
766 if (!so) {
767 nfs_sndunlock(req);
768 lck_mtx_lock(&req->r_mtx);
769 req->r_flags |= R_MUSTRESEND;
770 req->r_rtt = 0;
771 lck_mtx_unlock(&req->r_mtx);
772 return (0);
773 }
774
775 lck_mtx_lock(&req->r_mtx);
776 rexmit = (req->r_flags & R_SENT);
777
778 if (sotype == SOCK_DGRAM) {
779 lck_mtx_lock(&nmp->nm_lock);
780 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) {
781 /* if we can't send this out yet, wait on the cwnd queue */
782 slpflag = ((nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
783 lck_mtx_unlock(&nmp->nm_lock);
784 nfs_sndunlock(req);
785 req->r_flags |= R_MUSTRESEND;
786 lck_mtx_unlock(&req->r_mtx);
787 if (!wait) {
788 req->r_rtt = 0;
789 return (0);
790 }
791 lck_mtx_lock(&nmp->nm_lock);
792 while (nmp->nm_sent >= nmp->nm_cwnd) {
793 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
794 break;
795 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain);
796 error = msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts);
797 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) {
798 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
799 req->r_cchain.tqe_next = NFSREQNOLIST;
800 }
801 if ((error == EINTR) || (error == ERESTART))
802 break;
803 }
804 lck_mtx_unlock(&nmp->nm_lock);
805 if ((error == EINTR) || (error == ERESTART))
806 return (error);
807 goto again;
808 }
809 /*
810 * We update these *before* the send to avoid racing
811 * against others who may be looking to send requests.
812 */
813 if (!rexmit) {
814 /* first transmit */
815 req->r_flags |= R_CWND;
816 nmp->nm_sent += NFS_CWNDSCALE;
817 } else {
818 /*
819 * When retransmitting, turn timing off
820 * and divide congestion window by 2.
821 */
822 req->r_flags &= ~R_TIMING;
823 nmp->nm_cwnd >>= 1;
824 if (nmp->nm_cwnd < NFS_CWNDSCALE)
825 nmp->nm_cwnd = NFS_CWNDSCALE;
826 }
827 lck_mtx_unlock(&nmp->nm_lock);
828 }
829
830 req->r_flags &= ~R_MUSTRESEND;
831 lck_mtx_unlock(&req->r_mtx);
832
833 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL,
834 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy);
835 if (error) {
836 if (wait)
837 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error);
838 nfs_sndunlock(req);
839 lck_mtx_lock(&req->r_mtx);
840 req->r_flags |= R_MUSTRESEND;
841 req->r_rtt = 0;
842 lck_mtx_unlock(&req->r_mtx);
843 return (0);
844 }
845
846 bzero(&msg, sizeof(msg));
847 if (nmp->nm_nam && (sotype != SOCK_STREAM) && !sock_isconnected(so)) {
848 if ((sendnam = mbuf_data(nmp->nm_nam))) {
849 msg.msg_name = (caddr_t)sendnam;
850 msg.msg_namelen = sendnam->sa_len;
851 }
852 }
853 error = sock_sendmbuf(so, &msg, mreqcopy, 0, &sentlen);
854 #ifdef NFS_SOCKET_DEBUGGING
855 if (error || (sentlen != req->r_mreqlen))
856 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
857 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error));
858 #endif
859 if (!error && (sentlen != req->r_mreqlen))
860 error = EWOULDBLOCK;
861 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen));
862
863 lck_mtx_lock(&req->r_mtx);
864 req->r_rtt = 0;
865 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT))
866 req->r_rexmit = NFS_MAXREXMIT;
867
868 if (!error) {
869 /* SUCCESS */
870 req->r_flags &= ~R_RESENDERR;
871 if (rexmit)
872 OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries);
873 req->r_flags |= R_SENT;
874 if (req->r_flags & R_WAITSENT) {
875 req->r_flags &= ~R_WAITSENT;
876 wakeup(req);
877 }
878 nfs_sndunlock(req);
879 lck_mtx_unlock(&req->r_mtx);
880 return (0);
881 }
882
883 /* send failed */
884 req->r_flags |= R_MUSTRESEND;
885 if (rexmit)
886 req->r_flags |= R_RESENDERR;
887 if ((error == EINTR) || (error == ERESTART))
888 req->r_error = error;
889 lck_mtx_unlock(&req->r_mtx);
890
891 if (sotype == SOCK_DGRAM) {
892 /*
893 * Note: even though a first send may fail, we consider
894 * the request sent for congestion window purposes.
895 * So we don't need to undo any of the changes made above.
896 */
897 /*
898 * Socket errors ignored for connectionless sockets??
899 * For now, ignore them all
900 */
901 if ((error != EINTR) && (error != ERESTART) &&
902 (error != EWOULDBLOCK) && (error != EIO)) {
903 int clearerror = 0, optlen = sizeof(clearerror);
904 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
905 #ifdef NFS_SOCKET_DEBUGGING
906 if (clearerror)
907 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
908 error, clearerror));
909 #endif
910 }
911 }
912
913 /* check if it appears we should reconnect the socket */
914 switch (error) {
915 case EWOULDBLOCK:
916 /* if send timed out, reconnect if on TCP */
917 if (sotype != SOCK_STREAM)
918 break;
919 case EPIPE:
920 case EADDRNOTAVAIL:
921 case ENETDOWN:
922 case ENETUNREACH:
923 case ENETRESET:
924 case ECONNABORTED:
925 case ECONNRESET:
926 case ENOTCONN:
927 case ESHUTDOWN:
928 case ECONNREFUSED:
929 case EHOSTDOWN:
930 case EHOSTUNREACH:
931 needrecon = 1;
932 break;
933 }
934 if (needrecon) { /* mark socket as needing reconnect */
935 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error));
936 nfs_need_reconnect(nmp);
937 }
938
939 nfs_sndunlock(req);
940
941 /*
942 * Don't log some errors:
943 * EPIPE errors may be common with servers that drop idle connections.
944 * EADDRNOTAVAIL may occur on network transitions.
945 * ENOTCONN may occur under some network conditions.
946 */
947 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN))
948 error = 0;
949 if (error && (error != EINTR) && (error != ERESTART))
950 log(LOG_INFO, "nfs send error %d for server %s\n", error,
951 !req->r_nmp ? "<unmounted>" :
952 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
953
954 /* prefer request termination error over other errors */
955 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
956 if (error2)
957 error = error2;
958
959 /* only allow the following errors to be returned */
960 if ((error != EINTR) && (error != ERESTART) && (error != EIO) &&
961 (error != ENXIO) && (error != ETIMEDOUT))
962 error = 0;
963 return (error);
964 }
965
966 /*
967 * NFS client socket upcalls
968 *
969 * Pull RPC replies out of an NFS mount's socket and match them
970 * up with the pending request.
971 *
972 * The datagram code is simple because we always get whole
973 * messages out of the socket.
974 *
975 * The stream code is more involved because we have to parse
976 * the RPC records out of the stream.
977 */
978
979 /* NFS client UDP socket upcall */
980 static void
981 nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag)
982 {
983 struct nfsmount *nmp = arg;
984 size_t rcvlen;
985 mbuf_t m;
986 int error = 0;
987
988 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
989 wakeup(&nmp->nm_so);
990 return;
991 }
992
993 /* make sure we're on the current socket */
994 if (nmp->nm_so != so)
995 return;
996
997 do {
998 m = NULL;
999 rcvlen = 1000000;
1000 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
1001 if (m)
1002 nfs_request_match_reply(nmp, m);
1003 } while (m && !error);
1004
1005 if (error && (error != EWOULDBLOCK)) {
1006 /* problems with the socket... mark for reconnection */
1007 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error));
1008 nfs_need_reconnect(nmp);
1009 }
1010 }
1011
1012 /* NFS client TCP socket upcall */
1013 static void
1014 nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag)
1015 {
1016 struct nfsmount *nmp = arg;
1017 struct iovec_32 aio;
1018 struct msghdr msg;
1019 size_t rcvlen;
1020 mbuf_t m;
1021 int error = 0;
1022 int recv;
1023
1024 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
1025 wakeup(&nmp->nm_so);
1026 return;
1027 }
1028
1029 /* make sure we're on the current socket */
1030 if (nmp->nm_so != so)
1031 return;
1032
1033 lck_mtx_lock(&nmp->nm_lock);
1034 if (nmp->nm_sockflags & NMSOCK_UPCALL) {
1035 /* upcall is already receiving data - just return */
1036 lck_mtx_unlock(&nmp->nm_lock);
1037 return;
1038 }
1039 nmp->nm_sockflags |= NMSOCK_UPCALL;
1040
1041 nextfrag:
1042 recv = 0;
1043
1044 /* read the TCP RPC record marker */
1045 while (!error && nmp->nm_markerleft) {
1046 aio.iov_base = (uintptr_t)((char*)&nmp->nm_fragleft +
1047 sizeof(nmp->nm_fragleft) - nmp->nm_markerleft);
1048 aio.iov_len = nmp->nm_markerleft;
1049 bzero(&msg, sizeof(msg));
1050 msg.msg_iov = (struct iovec *) &aio;
1051 msg.msg_iovlen = 1;
1052 lck_mtx_unlock(&nmp->nm_lock);
1053 error = sock_receive(so, &msg, MSG_DONTWAIT, &rcvlen);
1054 lck_mtx_lock(&nmp->nm_lock);
1055 if (error || !rcvlen)
1056 break;
1057 recv = 1;
1058 nmp->nm_markerleft -= rcvlen;
1059 if (nmp->nm_markerleft)
1060 continue;
1061 /* record marker complete */
1062 nmp->nm_fragleft = ntohl(nmp->nm_fragleft);
1063 if (nmp->nm_fragleft & 0x80000000) {
1064 nmp->nm_sockflags |= NMSOCK_LASTFRAG;
1065 nmp->nm_fragleft &= ~0x80000000;
1066 }
1067 nmp->nm_reclen += nmp->nm_fragleft;
1068 if (nmp->nm_reclen > NFS_MAXPACKET) {
1069 /*
1070 * This is SERIOUS! We are out of sync with the sender
1071 * and forcing a disconnect/reconnect is all I can do.
1072 */
1073 log(LOG_ERR, "%s (%d) from nfs server %s\n",
1074 "impossible RPC record length", nmp->nm_reclen,
1075 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1076 error = EFBIG;
1077 }
1078 }
1079
1080 /* read the TCP RPC record fragment */
1081 while (!error && !nmp->nm_markerleft && nmp->nm_fragleft) {
1082 m = NULL;
1083 rcvlen = nmp->nm_fragleft;
1084 lck_mtx_unlock(&nmp->nm_lock);
1085 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
1086 lck_mtx_lock(&nmp->nm_lock);
1087 if (error || !rcvlen || !m)
1088 break;
1089 recv = 1;
1090 /* append mbufs to list */
1091 nmp->nm_fragleft -= rcvlen;
1092 if (!nmp->nm_m) {
1093 nmp->nm_m = m;
1094 } else {
1095 error = mbuf_setnext(nmp->nm_mlast, m);
1096 if (error) {
1097 printf("nfs_tcp_rcv: mbuf_setnext failed %d\n", error);
1098 mbuf_freem(m);
1099 break;
1100 }
1101 }
1102 while (mbuf_next(m))
1103 m = mbuf_next(m);
1104 nmp->nm_mlast = m;
1105 }
1106
1107 /* done reading fragment? */
1108 m = NULL;
1109 if (!error && !nmp->nm_markerleft && !nmp->nm_fragleft) {
1110 /* reset socket fragment parsing state */
1111 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
1112 if (nmp->nm_sockflags & NMSOCK_LASTFRAG) {
1113 /* RPC record complete */
1114 m = nmp->nm_m;
1115 /* reset socket record parsing state */
1116 nmp->nm_reclen = 0;
1117 nmp->nm_m = nmp->nm_mlast = NULL;
1118 nmp->nm_sockflags &= ~NMSOCK_LASTFRAG;
1119 }
1120 }
1121
1122 if (m) { /* match completed response with request */
1123 lck_mtx_unlock(&nmp->nm_lock);
1124 nfs_request_match_reply(nmp, m);
1125 lck_mtx_lock(&nmp->nm_lock);
1126 }
1127
1128 /* loop if we've been making error-free progress */
1129 if (!error && recv)
1130 goto nextfrag;
1131
1132 nmp->nm_sockflags &= ~NMSOCK_UPCALL;
1133 lck_mtx_unlock(&nmp->nm_lock);
1134 #ifdef NFS_SOCKET_DEBUGGING
1135 if (!recv && (error != EWOULDBLOCK))
1136 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error));
1137 #endif
1138 /* note: no error and no data indicates server closed its end */
1139 if ((error != EWOULDBLOCK) && (error || !recv)) {
1140 /* problems with the socket... mark for reconnection */
1141 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error));
1142 nfs_need_reconnect(nmp);
1143 }
1144 }
1145
1146 /*
1147 * "poke" a socket to try to provoke any pending errors
1148 */
1149 static void
1150 nfs_sock_poke(struct nfsmount *nmp)
1151 {
1152 struct iovec_32 aio;
1153 struct msghdr msg;
1154 size_t len;
1155 int error = 0;
1156 int dummy;
1157
1158 lck_mtx_lock(&nmp->nm_lock);
1159 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || !nmp->nm_so) {
1160 lck_mtx_unlock(&nmp->nm_lock);
1161 return;
1162 }
1163 lck_mtx_unlock(&nmp->nm_lock);
1164 aio.iov_base = (uintptr_t)&dummy;
1165 aio.iov_len = 0;
1166 len = 0;
1167 bzero(&msg, sizeof(msg));
1168 msg.msg_iov = (struct iovec *) &aio;
1169 msg.msg_iovlen = 1;
1170 error = sock_send(nmp->nm_so, &msg, MSG_DONTWAIT, &len);
1171 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error));
1172 }
1173
1174 /*
1175 * Match an RPC reply with the corresponding request
1176 */
1177 static void
1178 nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
1179 {
1180 struct nfsreq *req;
1181 struct nfsm_chain nmrep;
1182 u_long reply = 0, rxid = 0;
1183 long t1;
1184 int error = 0, asyncioq, asyncgss;
1185
1186 /* Get the xid and check that it is an rpc reply */
1187 nfsm_chain_dissect_init(error, &nmrep, mrep);
1188 nfsm_chain_get_32(error, &nmrep, rxid);
1189 nfsm_chain_get_32(error, &nmrep, reply);
1190 if (error || (reply != RPC_REPLY)) {
1191 OSAddAtomic(1, (SInt32*)&nfsstats.rpcinvalid);
1192 mbuf_freem(mrep);
1193 return;
1194 }
1195
1196 /*
1197 * Loop through the request list to match up the reply
1198 * Iff no match, just drop it.
1199 */
1200 lck_mtx_lock(nfs_request_mutex);
1201 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
1202 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid)))
1203 continue;
1204 /* looks like we have it, grab lock and double check */
1205 lck_mtx_lock(&req->r_mtx);
1206 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
1207 lck_mtx_unlock(&req->r_mtx);
1208 continue;
1209 }
1210 /* Found it.. */
1211 req->r_nmrep = nmrep;
1212 lck_mtx_lock(&nmp->nm_lock);
1213 if (nmp->nm_sotype == SOCK_DGRAM) {
1214 /*
1215 * Update congestion window.
1216 * Do the additive increase of one rpc/rtt.
1217 */
1218 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
1219 if (nmp->nm_cwnd <= nmp->nm_sent) {
1220 nmp->nm_cwnd +=
1221 ((NFS_CWNDSCALE * NFS_CWNDSCALE) +
1222 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
1223 if (nmp->nm_cwnd > NFS_MAXCWND)
1224 nmp->nm_cwnd = NFS_MAXCWND;
1225 }
1226 if (req->r_flags & R_CWND) {
1227 nmp->nm_sent -= NFS_CWNDSCALE;
1228 req->r_flags &= ~R_CWND;
1229 }
1230 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
1231 /* congestion window is open, poke the cwnd queue */
1232 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
1233 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
1234 req2->r_cchain.tqe_next = NFSREQNOLIST;
1235 wakeup(req2);
1236 }
1237 }
1238 /*
1239 * Update rtt using a gain of 0.125 on the mean
1240 * and a gain of 0.25 on the deviation.
1241 */
1242 if (req->r_flags & R_TIMING) {
1243 /*
1244 * Since the timer resolution of
1245 * NFS_HZ is so course, it can often
1246 * result in r_rtt == 0. Since
1247 * r_rtt == N means that the actual
1248 * rtt is between N+dt and N+2-dt ticks,
1249 * add 1.
1250 */
1251 if (proct[req->r_procnum] == 0)
1252 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum);
1253 t1 = req->r_rtt + 1;
1254 t1 -= (NFS_SRTT(req) >> 3);
1255 NFS_SRTT(req) += t1;
1256 if (t1 < 0)
1257 t1 = -t1;
1258 t1 -= (NFS_SDRTT(req) >> 2);
1259 NFS_SDRTT(req) += t1;
1260 }
1261 nmp->nm_timeouts = 0;
1262 lck_mtx_unlock(&nmp->nm_lock);
1263 /* signal anyone waiting on this request */
1264 wakeup(req);
1265 asyncioq = (req->r_callback.rcb_func != NULL);
1266 if ((asyncgss = ((req->r_gss_ctx != NULL) && ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_ALLOCATED)) == (R_ASYNC|R_ALLOCATED)))))
1267 nfs_request_ref(req, 1);
1268 lck_mtx_unlock(&req->r_mtx);
1269 lck_mtx_unlock(nfs_request_mutex);
1270 if (asyncgss) {
1271 nfs_gss_clnt_rpcdone(req);
1272 nfs_request_rele(req);
1273 }
1274 /* if it's an async RPC with a callback, queue it up */
1275 if (asyncioq)
1276 nfs_asyncio_finish(req);
1277 break;
1278 }
1279
1280 if (!req) {
1281 /* not matched to a request, so drop it. */
1282 lck_mtx_unlock(nfs_request_mutex);
1283 OSAddAtomic(1, (SInt32*)&nfsstats.rpcunexpected);
1284 mbuf_freem(mrep);
1285 }
1286 }
1287
1288 /*
1289 * Wait for the reply for a given request...
1290 * ...potentially resending the request if necessary.
1291 */
1292 static int
1293 nfs_wait_reply(struct nfsreq *req)
1294 {
1295 struct nfsmount *nmp = req->r_nmp;
1296 struct timespec ts = { 30, 0 };
1297 int error = 0, slpflag;
1298
1299 if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
1300 slpflag = PCATCH;
1301 else
1302 slpflag = 0;
1303
1304 lck_mtx_lock(&req->r_mtx);
1305 while (!req->r_nmrep.nmc_mhead) {
1306 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0)))
1307 break;
1308 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
1309 break;
1310 /* check if we need to resend */
1311 if (req->r_flags & R_MUSTRESEND) {
1312 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
1313 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
1314 lck_mtx_unlock(&req->r_mtx);
1315 if (req->r_gss_ctx) {
1316 /*
1317 * It's an RPCSEC_GSS mount.
1318 * Can't just resend the original request
1319 * without bumping the cred sequence number.
1320 * Go back and re-build the request.
1321 */
1322 return (EAGAIN);
1323 }
1324 error = nfs_send(req, 1);
1325 lck_mtx_lock(&req->r_mtx);
1326 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
1327 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error));
1328 if (error)
1329 break;
1330 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
1331 break;
1332 }
1333 /* need to poll if we're P_NOREMOTEHANG */
1334 if (nfs_noremotehang(req->r_thread))
1335 ts.tv_sec = 1;
1336 error = msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts);
1337 if (error == EWOULDBLOCK)
1338 error = 0;
1339 if ((error == EINTR) || (error == ERESTART))
1340 break;
1341 }
1342 lck_mtx_unlock(&req->r_mtx);
1343
1344 return (error);
1345 }
1346
1347 /*
1348 * An NFS request goes something like this:
1349 * (nb: always frees up mreq mbuf list)
1350 * nfs_request_create()
1351 * - allocates a request struct if one is not provided
1352 * - initial fill-in of the request struct
1353 * nfs_request_add_header()
1354 * - add the RPC header
1355 * nfs_request_send()
1356 * - link it into list
1357 * - call nfs_send() for first transmit
1358 * nfs_request_wait()
1359 * - call nfs_wait_reply() to wait for the reply
1360 * nfs_request_finish()
1361 * - break down rpc header and return with error or nfs reply
1362 * pointed to by nmrep.
1363 * nfs_request_rele()
1364 * nfs_request_destroy()
1365 * - clean up the request struct
1366 * - free the request struct if it was allocated by nfs_request_create()
1367 */
1368
1369 /*
1370 * Set up an NFS request struct (allocating if no request passed in).
1371 */
1372 int
1373 nfs_request_create(
1374 nfsnode_t np,
1375 mount_t mp, /* used only if !np */
1376 struct nfsm_chain *nmrest,
1377 int procnum,
1378 thread_t thd,
1379 kauth_cred_t cred,
1380 struct nfsreq **reqp)
1381 {
1382 struct nfsreq *req, *newreq = NULL;
1383 struct nfsmount *nmp;
1384
1385 req = *reqp;
1386 if (!req) {
1387 /* allocate a new NFS request structure */
1388 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK);
1389 if (!newreq) {
1390 mbuf_freem(nmrest->nmc_mhead);
1391 nmrest->nmc_mhead = NULL;
1392 return (ENOMEM);
1393 }
1394 req = newreq;
1395 }
1396
1397 bzero(req, sizeof(*req));
1398 if (req == newreq)
1399 req->r_flags = R_ALLOCATED;
1400
1401 nmp = VFSTONFS(np ? NFSTOMP(np) : mp);
1402 if (!nmp) {
1403 if (newreq)
1404 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
1405 return (ENXIO);
1406 }
1407 lck_mtx_lock(&nmp->nm_lock);
1408 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1409 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
1410 lck_mtx_unlock(&nmp->nm_lock);
1411 mbuf_freem(nmrest->nmc_mhead);
1412 nmrest->nmc_mhead = NULL;
1413 if (newreq)
1414 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
1415 return (ENXIO);
1416 }
1417
1418 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS))
1419 OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[procnum]);
1420 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL))
1421 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
1422
1423 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL);
1424 req->r_nmp = nmp;
1425 req->r_np = np;
1426 req->r_thread = thd;
1427 if (IS_VALID_CRED(cred)) {
1428 kauth_cred_ref(cred);
1429 req->r_cred = cred;
1430 }
1431 req->r_procnum = procnum;
1432 if (proct[procnum] > 0)
1433 req->r_flags |= R_TIMING;
1434 req->r_nmrep.nmc_mhead = NULL;
1435 SLIST_INIT(&req->r_gss_seqlist);
1436 req->r_achain.tqe_next = NFSREQNOLIST;
1437 req->r_rchain.tqe_next = NFSREQNOLIST;
1438 req->r_cchain.tqe_next = NFSREQNOLIST;
1439
1440 lck_mtx_unlock(&nmp->nm_lock);
1441
1442 /* move the request mbuf chain to the nfsreq */
1443 req->r_mrest = nmrest->nmc_mhead;
1444 nmrest->nmc_mhead = NULL;
1445
1446 req->r_flags |= R_INITTED;
1447 req->r_refs = 1;
1448 if (newreq)
1449 *reqp = req;
1450 return (0);
1451 }
1452
1453 /*
1454 * Clean up and free an NFS request structure.
1455 */
1456 void
1457 nfs_request_destroy(struct nfsreq *req)
1458 {
1459 struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1460 struct gss_seq *gsp, *ngsp;
1461 struct timespec ts = { 1, 0 };
1462
1463 if (!req || !(req->r_flags & R_INITTED))
1464 return;
1465 req->r_flags &= ~R_INITTED;
1466 if (req->r_lflags & RL_QUEUED)
1467 nfs_reqdequeue(req);
1468 if (req->r_achain.tqe_next != NFSREQNOLIST) {
1469 /* still on an async I/O queue? */
1470 lck_mtx_lock(nfsiod_mutex);
1471 if (nmp && (req->r_achain.tqe_next != NFSREQNOLIST)) {
1472 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
1473 req->r_achain.tqe_next = NFSREQNOLIST;
1474 }
1475 lck_mtx_unlock(nfsiod_mutex);
1476 }
1477 if (nmp) {
1478 lck_mtx_lock(&nmp->nm_lock);
1479 if (req->r_rchain.tqe_next != NFSREQNOLIST) {
1480 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
1481 req->r_rchain.tqe_next = NFSREQNOLIST;
1482 req->r_flags &= ~R_RESENDQ;
1483 }
1484 if (req->r_cchain.tqe_next != NFSREQNOLIST) {
1485 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
1486 req->r_cchain.tqe_next = NFSREQNOLIST;
1487 }
1488 lck_mtx_unlock(&nmp->nm_lock);
1489 }
1490 lck_mtx_lock(&req->r_mtx);
1491 while (req->r_flags & R_RESENDQ)
1492 msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts);
1493 lck_mtx_unlock(&req->r_mtx);
1494 if (req->r_mhead)
1495 mbuf_freem(req->r_mhead);
1496 else if (req->r_mrest)
1497 mbuf_freem(req->r_mrest);
1498 if (req->r_nmrep.nmc_mhead)
1499 mbuf_freem(req->r_nmrep.nmc_mhead);
1500 if (IS_VALID_CRED(req->r_cred))
1501 kauth_cred_unref(&req->r_cred);
1502 if (req->r_gss_ctx)
1503 nfs_gss_clnt_rpcdone(req);
1504 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp)
1505 FREE(gsp, M_TEMP);
1506 if (req->r_gss_ctx)
1507 nfs_gss_clnt_ctx_unref(req);
1508
1509 lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
1510 if (req->r_flags & R_ALLOCATED)
1511 FREE_ZONE(req, sizeof(*req), M_NFSREQ);
1512 }
1513
1514 void
1515 nfs_request_ref(struct nfsreq *req, int locked)
1516 {
1517 if (!locked)
1518 lck_mtx_lock(&req->r_mtx);
1519 if (req->r_refs <= 0)
1520 panic("nfsreq reference error");
1521 req->r_refs++;
1522 if (!locked)
1523 lck_mtx_unlock(&req->r_mtx);
1524 }
1525
1526 void
1527 nfs_request_rele(struct nfsreq *req)
1528 {
1529 int destroy;
1530
1531 lck_mtx_lock(&req->r_mtx);
1532 if (req->r_refs <= 0)
1533 panic("nfsreq reference underflow");
1534 req->r_refs--;
1535 destroy = (req->r_refs == 0);
1536 lck_mtx_unlock(&req->r_mtx);
1537 if (destroy)
1538 nfs_request_destroy(req);
1539 }
1540
1541
1542 /*
1543 * Add an (updated) RPC header with authorization to an NFS request.
1544 */
1545 int
1546 nfs_request_add_header(struct nfsreq *req)
1547 {
1548 struct nfsmount *nmp;
1549 int error = 0, auth_len = 0;
1550 mbuf_t m;
1551
1552 /* free up any previous header */
1553 if ((m = req->r_mhead)) {
1554 while (m && (m != req->r_mrest))
1555 m = mbuf_free(m);
1556 req->r_mhead = NULL;
1557 }
1558
1559 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1560 if (!nmp)
1561 return (ENXIO);
1562
1563 if (!req->r_cred) /* RPCAUTH_NULL */
1564 auth_len = 0;
1565 else switch (nmp->nm_auth) {
1566 case RPCAUTH_UNIX:
1567 if (req->r_cred->cr_ngroups < 1)
1568 return (EINVAL);
1569 auth_len = ((((req->r_cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1570 nmp->nm_numgrps : (req->r_cred->cr_ngroups - 1)) << 2) +
1571 5 * NFSX_UNSIGNED;
1572 break;
1573 case RPCAUTH_KRB5:
1574 case RPCAUTH_KRB5I:
1575 case RPCAUTH_KRB5P:
1576 auth_len = 5 * NFSX_UNSIGNED + 0; // zero context handle for now
1577 break;
1578 }
1579
1580 error = nfsm_rpchead(req, auth_len, req->r_mrest, &req->r_xid, &req->r_mhead);
1581 if (error)
1582 return (error);
1583
1584 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead);
1585 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1586 if (!nmp)
1587 return (ENXIO);
1588 lck_mtx_lock(&nmp->nm_lock);
1589 if (nmp->nm_flag & NFSMNT_SOFT)
1590 req->r_retry = nmp->nm_retry;
1591 else
1592 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1593 lck_mtx_unlock(&nmp->nm_lock);
1594
1595 return (error);
1596 }
1597
1598
1599 /*
1600 * Queue an NFS request up and send it out.
1601 */
1602 int
1603 nfs_request_send(struct nfsreq *req, int wait)
1604 {
1605 struct nfsmount *nmp;
1606 struct timeval now;
1607
1608 lck_mtx_lock(nfs_request_mutex);
1609
1610 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1611 if (!nmp) {
1612 lck_mtx_unlock(nfs_request_mutex);
1613 return (ENXIO);
1614 }
1615
1616 microuptime(&now);
1617 if (!req->r_start) {
1618 req->r_start = now.tv_sec;
1619 req->r_lastmsg = now.tv_sec -
1620 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
1621 }
1622
1623 OSAddAtomic(1, (SInt32*)&nfsstats.rpcrequests);
1624
1625 /*
1626 * Chain request into list of outstanding requests. Be sure
1627 * to put it LAST so timer finds oldest requests first.
1628 * Make sure that the request queue timer is running
1629 * to check for possible request timeout.
1630 */
1631 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain);
1632 req->r_lflags |= RL_QUEUED;
1633 if (!nfs_request_timer_on) {
1634 nfs_request_timer_on = 1;
1635 nfs_interval_timer_start(nfs_request_timer_call,
1636 NFS_REQUESTDELAY);
1637 }
1638 lck_mtx_unlock(nfs_request_mutex);
1639
1640 /* Send the request... */
1641 return (nfs_send(req, wait));
1642 }
1643
1644 /*
1645 * Call nfs_wait_reply() to wait for the reply.
1646 */
1647 void
1648 nfs_request_wait(struct nfsreq *req)
1649 {
1650 req->r_error = nfs_wait_reply(req);
1651 }
1652
1653 /*
1654 * Finish up an NFS request by dequeueing it and
1655 * doing the initial NFS request reply processing.
1656 */
1657 int
1658 nfs_request_finish(
1659 struct nfsreq *req,
1660 struct nfsm_chain *nmrepp,
1661 int *status)
1662 {
1663 struct nfsmount *nmp;
1664 mbuf_t mrep;
1665 int verf_type = 0;
1666 uint32_t verf_len = 0;
1667 uint32_t reply_status = 0;
1668 uint32_t rejected_status = 0;
1669 uint32_t auth_status = 0;
1670 uint32_t accepted_status = 0;
1671 struct nfsm_chain nmrep;
1672 int error, auth;
1673
1674 error = req->r_error;
1675
1676 if (nmrepp)
1677 nmrepp->nmc_mhead = NULL;
1678
1679 /* RPC done, unlink the request. */
1680 nfs_reqdequeue(req);
1681
1682 mrep = req->r_nmrep.nmc_mhead;
1683
1684 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1685
1686 /*
1687 * Decrement the outstanding request count.
1688 */
1689 if (req->r_flags & R_CWND) {
1690 req->r_flags &= ~R_CWND;
1691 lck_mtx_lock(&nmp->nm_lock);
1692 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
1693 nmp->nm_sent -= NFS_CWNDSCALE;
1694 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
1695 /* congestion window is open, poke the cwnd queue */
1696 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
1697 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
1698 req2->r_cchain.tqe_next = NFSREQNOLIST;
1699 wakeup(req2);
1700 }
1701 lck_mtx_unlock(&nmp->nm_lock);
1702 }
1703
1704 if (req->r_gss_ctx) { // Using gss cred ?
1705 /*
1706 * If the request had an RPCSEC_GSS credential
1707 * then reset its sequence number bit in the
1708 * request window.
1709 */
1710 nfs_gss_clnt_rpcdone(req);
1711
1712 /*
1713 * If we need to re-send, go back and re-build the
1714 * request based on a new sequence number.
1715 * Note that we're using the original XID.
1716 */
1717 if (error == EAGAIN) {
1718 req->r_error = 0;
1719 if (mrep)
1720 mbuf_freem(mrep);
1721 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs
1722 req->r_nmrep.nmc_mhead = NULL;
1723 req->r_flags |= R_RESTART;
1724 if (error == ENEEDAUTH) {
1725 req->r_xid = 0; // get a new XID
1726 error = 0;
1727 }
1728 goto nfsmout;
1729 }
1730 }
1731
1732 /*
1733 * If there was a successful reply, make sure to mark the mount as up.
1734 * If a tprintf message was given (or if this is a timed-out soft mount)
1735 * then post a tprintf message indicating the server is alive again.
1736 */
1737 if (!error) {
1738 if ((req->r_flags & R_TPRINTFMSG) ||
1739 (nmp && (nmp->nm_flag & NFSMNT_SOFT) &&
1740 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO)))
1741 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again");
1742 else
1743 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL);
1744 }
1745 if (!error && !nmp)
1746 error = ENXIO;
1747 nfsmout_if(error);
1748
1749 /*
1750 * break down the RPC header and check if ok
1751 */
1752 nmrep = req->r_nmrep;
1753 nfsm_chain_get_32(error, &nmrep, reply_status);
1754 nfsmout_if(error);
1755 if (reply_status == RPC_MSGDENIED) {
1756 nfsm_chain_get_32(error, &nmrep, rejected_status);
1757 nfsmout_if(error);
1758 if (rejected_status == RPC_MISMATCH) {
1759 error = ENOTSUP;
1760 goto nfsmout;
1761 }
1762 nfsm_chain_get_32(error, &nmrep, auth_status);
1763 nfsmout_if(error);
1764 switch (auth_status) {
1765 case RPCSEC_GSS_CREDPROBLEM:
1766 case RPCSEC_GSS_CTXPROBLEM:
1767 /*
1768 * An RPCSEC_GSS cred or context problem.
1769 * We can't use it anymore.
1770 * Restore the args, renew the context
1771 * and set up for a resend.
1772 */
1773 error = nfs_gss_clnt_args_restore(req);
1774 if (error && error != ENEEDAUTH)
1775 break;
1776
1777 if (!error) {
1778 error = nfs_gss_clnt_ctx_renew(req);
1779 if (error)
1780 break;
1781 }
1782 mbuf_freem(mrep);
1783 req->r_nmrep.nmc_mhead = NULL;
1784 req->r_xid = 0; // get a new XID
1785 req->r_flags |= R_RESTART;
1786 goto nfsmout;
1787 default:
1788 error = EACCES;
1789 break;
1790 }
1791 goto nfsmout;
1792 }
1793
1794 /* Now check the verifier */
1795 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor
1796 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length
1797 nfsmout_if(error);
1798
1799 auth = !req->r_cred ? RPCAUTH_NULL : nmp->nm_auth;
1800 switch (auth) {
1801 case RPCAUTH_NULL:
1802 case RPCAUTH_UNIX:
1803 /* Any AUTH_UNIX verifier is ignored */
1804 if (verf_len > 0)
1805 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
1806 nfsm_chain_get_32(error, &nmrep, accepted_status);
1807 break;
1808 case RPCAUTH_KRB5:
1809 case RPCAUTH_KRB5I:
1810 case RPCAUTH_KRB5P:
1811 error = nfs_gss_clnt_verf_get(req, &nmrep,
1812 verf_type, verf_len, &accepted_status);
1813 break;
1814 }
1815 nfsmout_if(error);
1816
1817 switch (accepted_status) {
1818 case RPC_SUCCESS:
1819 if (req->r_procnum == NFSPROC_NULL) {
1820 /*
1821 * The NFS null procedure is unique,
1822 * in not returning an NFS status.
1823 */
1824 *status = NFS_OK;
1825 } else {
1826 nfsm_chain_get_32(error, &nmrep, *status);
1827 nfsmout_if(error);
1828 }
1829
1830 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) {
1831 /*
1832 * It's a JUKEBOX error - delay and try again
1833 */
1834 int delay, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
1835
1836 mbuf_freem(mrep);
1837 req->r_nmrep.nmc_mhead = NULL;
1838 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) {
1839 /* we're not yet completely mounted and */
1840 /* we can't complete an RPC, so we fail */
1841 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
1842 nfs_softterm(req);
1843 error = req->r_error;
1844 goto nfsmout;
1845 }
1846 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2);
1847 if (req->r_delay > 30)
1848 req->r_delay = 30;
1849 if (nmp->nm_tprintf_initial_delay && (req->r_delay == 30)) {
1850 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO,
1851 "resource temporarily unavailable (jukebox)");
1852 req->r_flags |= R_JBTPRINTFMSG;
1853 }
1854 delay = req->r_delay;
1855 if (req->r_callback.rcb_func) {
1856 struct timeval now;
1857 microuptime(&now);
1858 req->r_resendtime = now.tv_sec + delay;
1859 } else {
1860 do {
1861 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
1862 return (error);
1863 tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0);
1864 } while (--delay > 0);
1865 }
1866 req->r_xid = 0; // get a new XID
1867 req->r_flags |= R_RESTART;
1868 req->r_start = 0;
1869 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER);
1870 return (0);
1871 }
1872
1873 if (req->r_flags & R_JBTPRINTFMSG)
1874 nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, "resource available again");
1875
1876 if (*status == NFS_OK) {
1877 /*
1878 * Successful NFS request
1879 */
1880 *nmrepp = nmrep;
1881 req->r_nmrep.nmc_mhead = NULL;
1882 break;
1883 }
1884 /* Got an NFS error of some kind */
1885
1886 /*
1887 * If the File Handle was stale, invalidate the
1888 * lookup cache, just in case.
1889 */
1890 if ((*status == ESTALE) && req->r_np)
1891 cache_purge(NFSTOV(req->r_np));
1892 if (nmp->nm_vers == NFS_VER2)
1893 mbuf_freem(mrep);
1894 else
1895 *nmrepp = nmrep;
1896 req->r_nmrep.nmc_mhead = NULL;
1897 error = 0;
1898 break;
1899 case RPC_PROGUNAVAIL:
1900 error = EPROGUNAVAIL;
1901 break;
1902 case RPC_PROGMISMATCH:
1903 error = ERPCMISMATCH;
1904 break;
1905 case RPC_PROCUNAVAIL:
1906 error = EPROCUNAVAIL;
1907 break;
1908 case RPC_GARBAGE:
1909 error = EBADRPC;
1910 break;
1911 case RPC_SYSTEM_ERR:
1912 default:
1913 error = EIO;
1914 break;
1915 }
1916 nfsmout:
1917 if (!error && (req->r_flags & R_JBTPRINTFMSG))
1918 nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, NULL);
1919 FSDBG(273, R_XID32(req->r_xid), nmp, req,
1920 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error);
1921 return (error);
1922 }
1923
1924
1925 /*
1926 * Perform an NFS request synchronously.
1927 */
1928
1929 int
1930 nfs_request(
1931 nfsnode_t np,
1932 mount_t mp, /* used only if !np */
1933 struct nfsm_chain *nmrest,
1934 int procnum,
1935 vfs_context_t ctx,
1936 struct nfsm_chain *nmrepp,
1937 u_int64_t *xidp,
1938 int *status)
1939 {
1940 return nfs_request2(np, mp, nmrest, procnum,
1941 vfs_context_thread(ctx), vfs_context_ucred(ctx),
1942 0, nmrepp, xidp, status);
1943 }
1944
1945 int
1946 nfs_request2(
1947 nfsnode_t np,
1948 mount_t mp, /* used only if !np */
1949 struct nfsm_chain *nmrest,
1950 int procnum,
1951 thread_t thd,
1952 kauth_cred_t cred,
1953 int flags,
1954 struct nfsm_chain *nmrepp,
1955 u_int64_t *xidp,
1956 int *status)
1957 {
1958 struct nfsreq rq, *req = &rq;
1959 int error;
1960
1961 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req)))
1962 return (error);
1963 req->r_flags |= (flags & R_OPTMASK);
1964
1965 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0);
1966 do {
1967 req->r_error = 0;
1968 req->r_flags &= ~R_RESTART;
1969 if ((error = nfs_request_add_header(req)))
1970 break;
1971 if (xidp)
1972 *xidp = req->r_xid;
1973 if ((error = nfs_request_send(req, 1)))
1974 break;
1975 nfs_request_wait(req);
1976 if ((error = nfs_request_finish(req, nmrepp, status)))
1977 break;
1978 } while (req->r_flags & R_RESTART);
1979
1980 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error);
1981 nfs_request_rele(req);
1982 return (error);
1983 }
1984
1985 /*
1986 * Create and start an asynchronous NFS request.
1987 */
1988 int
1989 nfs_request_async(
1990 nfsnode_t np,
1991 mount_t mp, /* used only if !np */
1992 struct nfsm_chain *nmrest,
1993 int procnum,
1994 thread_t thd,
1995 kauth_cred_t cred,
1996 struct nfsreq_cbinfo *cb,
1997 struct nfsreq **reqp)
1998 {
1999 struct nfsreq *req;
2000 int error, sent;
2001
2002 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp);
2003 req = *reqp;
2004 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error);
2005 if (error)
2006 return (error);
2007 req->r_flags |= R_ASYNC;
2008 if (cb)
2009 req->r_callback = *cb;
2010 error = nfs_request_add_header(req);
2011 if (!error) {
2012 req->r_flags |= R_WAITSENT;
2013 if (req->r_callback.rcb_func)
2014 nfs_request_ref(req, 0);
2015 error = nfs_request_send(req, 1);
2016 lck_mtx_lock(&req->r_mtx);
2017 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) {
2018 /* make sure to wait until this async I/O request gets sent */
2019 int slpflag = (req->r_nmp && (req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
2020 struct timespec ts = { 2, 0 };
2021 while (!error && !(req->r_flags & R_SENT)) {
2022 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
2023 break;
2024 error = msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts);
2025 if (error == EWOULDBLOCK)
2026 error = 0;
2027 }
2028 }
2029 sent = req->r_flags & R_SENT;
2030 lck_mtx_unlock(&req->r_mtx);
2031 if (error && req->r_callback.rcb_func && !sent)
2032 nfs_request_rele(req);
2033 }
2034 FSDBG(274, R_XID32(req->r_xid), np, procnum, error);
2035 if (error || req->r_callback.rcb_func)
2036 nfs_request_rele(req);
2037 return (error);
2038 }
2039
2040 /*
2041 * Wait for and finish an asynchronous NFS request.
2042 */
2043 int
2044 nfs_request_async_finish(
2045 struct nfsreq *req,
2046 struct nfsm_chain *nmrepp,
2047 u_int64_t *xidp,
2048 int *status)
2049 {
2050 int error, asyncio = req->r_callback.rcb_func ? 1 : 0;
2051
2052 lck_mtx_lock(&req->r_mtx);
2053 if (!asyncio)
2054 req->r_flags |= R_ASYNCWAIT;
2055 while (req->r_flags & R_RESENDQ) /* wait until the request is off the resend queue */
2056 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", NULL);
2057 lck_mtx_unlock(&req->r_mtx);
2058
2059 nfs_request_wait(req);
2060 error = nfs_request_finish(req, nmrepp, status);
2061
2062 while (!error && (req->r_flags & R_RESTART)) {
2063 if (asyncio && req->r_resendtime) { /* send later */
2064 lck_mtx_lock(&req->r_mtx);
2065 nfs_asyncio_resend(req);
2066 lck_mtx_unlock(&req->r_mtx);
2067 return (EINPROGRESS);
2068 }
2069 req->r_error = 0;
2070 req->r_flags &= ~R_RESTART;
2071 if ((error = nfs_request_add_header(req)))
2072 break;
2073 if ((error = nfs_request_send(req, !asyncio)))
2074 break;
2075 if (asyncio)
2076 return (EINPROGRESS);
2077 nfs_request_wait(req);
2078 if ((error = nfs_request_finish(req, nmrepp, status)))
2079 break;
2080 }
2081 if (xidp)
2082 *xidp = req->r_xid;
2083
2084 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error);
2085 nfs_request_rele(req);
2086 return (error);
2087 }
2088
2089 /*
2090 * Cancel a pending asynchronous NFS request.
2091 */
2092 void
2093 nfs_request_async_cancel(struct nfsreq *req)
2094 {
2095 nfs_reqdequeue(req);
2096 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E);
2097 nfs_request_rele(req);
2098 }
2099
2100 /*
2101 * Flag a request as being terminated.
2102 */
2103 static void
2104 nfs_softterm(struct nfsreq *req)
2105 {
2106 struct nfsmount *nmp = req->r_nmp;
2107 req->r_flags |= R_SOFTTERM;
2108 req->r_error = ETIMEDOUT;
2109 if (!(req->r_flags & R_CWND) || !nmp)
2110 return;
2111 /* update congestion window */
2112 req->r_flags &= ~R_CWND;
2113 lck_mtx_lock(&nmp->nm_lock);
2114 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
2115 nmp->nm_sent -= NFS_CWNDSCALE;
2116 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
2117 /* congestion window is open, poke the cwnd queue */
2118 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
2119 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
2120 req2->r_cchain.tqe_next = NFSREQNOLIST;
2121 wakeup(req2);
2122 }
2123 lck_mtx_unlock(&nmp->nm_lock);
2124 }
2125
2126 /*
2127 * Ensure req isn't in use by the timer, then dequeue it.
2128 */
2129 static void
2130 nfs_reqdequeue(struct nfsreq *req)
2131 {
2132 lck_mtx_lock(nfs_request_mutex);
2133 while (req->r_lflags & RL_BUSY) {
2134 req->r_lflags |= RL_WAITING;
2135 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL);
2136 }
2137 if (req->r_lflags & RL_QUEUED) {
2138 TAILQ_REMOVE(&nfs_reqq, req, r_chain);
2139 req->r_lflags &= ~RL_QUEUED;
2140 }
2141 lck_mtx_unlock(nfs_request_mutex);
2142 }
2143
2144 /*
2145 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
2146 * free()'d out from under it.
2147 */
2148 static void
2149 nfs_reqbusy(struct nfsreq *req)
2150 {
2151 if (req->r_lflags & RL_BUSY)
2152 panic("req locked");
2153 req->r_lflags |= RL_BUSY;
2154 }
2155
2156 /*
2157 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
2158 */
2159 static struct nfsreq *
2160 nfs_reqnext(struct nfsreq *req)
2161 {
2162 struct nfsreq * nextreq;
2163
2164 if (req == NULL)
2165 return (NULL);
2166 /*
2167 * We need to get and busy the next req before signalling the
2168 * current one, otherwise wakeup() may block us and we'll race to
2169 * grab the next req.
2170 */
2171 nextreq = TAILQ_NEXT(req, r_chain);
2172 if (nextreq != NULL)
2173 nfs_reqbusy(nextreq);
2174 /* unbusy and signal. */
2175 req->r_lflags &= ~RL_BUSY;
2176 if (req->r_lflags & RL_WAITING) {
2177 req->r_lflags &= ~RL_WAITING;
2178 wakeup(&req->r_lflags);
2179 }
2180 return (nextreq);
2181 }
2182
2183 /*
2184 * NFS request queue timer routine
2185 *
2186 * Scan the NFS request queue for any requests that have timed out.
2187 *
2188 * Alert the system of unresponsive servers.
2189 * Mark expired requests on soft mounts as terminated.
2190 * For UDP, mark/signal requests for retransmission.
2191 */
2192 void
2193 nfs_request_timer(__unused void *param0, __unused void *param1)
2194 {
2195 struct nfsreq *req;
2196 struct nfsmount *nmp;
2197 int timeo, maxtime, finish_asyncio, error;
2198 struct timeval now;
2199 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue;
2200
2201 lck_mtx_lock(nfs_request_mutex);
2202 req = TAILQ_FIRST(&nfs_reqq);
2203 if (req == NULL) { /* no requests - turn timer off */
2204 nfs_request_timer_on = 0;
2205 lck_mtx_unlock(nfs_request_mutex);
2206 return;
2207 }
2208
2209 nfs_reqbusy(req);
2210 TAILQ_INIT(&nfs_mount_poke_queue);
2211
2212 microuptime(&now);
2213 for ( ; req != NULL ; req = nfs_reqnext(req)) {
2214 nmp = req->r_nmp;
2215 if (!nmp) /* unmounted */
2216 continue;
2217 if (req->r_error || req->r_nmrep.nmc_mhead)
2218 continue;
2219 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) {
2220 if (req->r_callback.rcb_func != NULL) {
2221 /* async I/O RPC needs to be finished */
2222 lck_mtx_lock(&req->r_mtx);
2223 req->r_error = error;
2224 finish_asyncio = !(req->r_flags & R_WAITSENT);
2225 wakeup(req);
2226 lck_mtx_unlock(&req->r_mtx);
2227 if (finish_asyncio)
2228 nfs_asyncio_finish(req);
2229 }
2230 continue;
2231 }
2232
2233 lck_mtx_lock(&req->r_mtx);
2234
2235 if (nmp->nm_tprintf_initial_delay &&
2236 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) &&
2237 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
2238 req->r_lastmsg = now.tv_sec;
2239 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
2240 "not responding");
2241 req->r_flags |= R_TPRINTFMSG;
2242 lck_mtx_lock(&nmp->nm_lock);
2243 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
2244 lck_mtx_unlock(&nmp->nm_lock);
2245 /* we're not yet completely mounted and */
2246 /* we can't complete an RPC, so we fail */
2247 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
2248 nfs_softterm(req);
2249 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
2250 wakeup(req);
2251 lck_mtx_unlock(&req->r_mtx);
2252 if (finish_asyncio)
2253 nfs_asyncio_finish(req);
2254 continue;
2255 }
2256 lck_mtx_unlock(&nmp->nm_lock);
2257 }
2258
2259 /*
2260 * Put a reasonable limit on the maximum timeout,
2261 * and reduce that limit when soft mounts get timeouts or are in reconnect.
2262 */
2263 if (!(nmp->nm_flag & NFSMNT_SOFT))
2264 maxtime = NFS_MAXTIMEO;
2265 else if ((req->r_flags & R_SETUP) || ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
2266 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2;
2267 else
2268 maxtime = NFS_MINTIMEO/4;
2269
2270 /*
2271 * Check for request timeout.
2272 */
2273 if (req->r_rtt >= 0) {
2274 req->r_rtt++;
2275 lck_mtx_lock(&nmp->nm_lock);
2276 if (req->r_flags & R_RESENDERR) {
2277 /* with resend errors, retry every few seconds */
2278 timeo = 4*hz;
2279 } else {
2280 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL)
2281 timeo = NFS_MINIDEMTIMEO; // gss context setup
2282 else if (nmp->nm_flag & NFSMNT_DUMBTIMR)
2283 timeo = nmp->nm_timeo;
2284 else
2285 timeo = NFS_RTO(nmp, proct[req->r_procnum]);
2286
2287 /* ensure 62.5 ms floor */
2288 while (16 * timeo < hz)
2289 timeo *= 2;
2290 if (nmp->nm_timeouts > 0)
2291 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
2292 }
2293 /* limit timeout to max */
2294 if (timeo > maxtime)
2295 timeo = maxtime;
2296 if (req->r_rtt <= timeo) {
2297 lck_mtx_unlock(&nmp->nm_lock);
2298 lck_mtx_unlock(&req->r_mtx);
2299 continue;
2300 }
2301 /* The request has timed out */
2302 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
2303 req->r_procnum, proct[req->r_procnum],
2304 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts,
2305 (now.tv_sec - req->r_start)*NFS_HZ, maxtime));
2306 if (nmp->nm_timeouts < 8)
2307 nmp->nm_timeouts++;
2308 /* if it's been a few seconds, try poking the socket */
2309 if ((nmp->nm_sotype == SOCK_STREAM) &&
2310 ((now.tv_sec - req->r_start) >= 3) &&
2311 !(nmp->nm_sockflags & NMSOCK_POKE)) {
2312 nmp->nm_sockflags |= NMSOCK_POKE;
2313 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq);
2314 }
2315 lck_mtx_unlock(&nmp->nm_lock);
2316 }
2317
2318 /* For soft mounts (& SETUPs), check for too many retransmits/timeout. */
2319 if (((nmp->nm_flag & NFSMNT_SOFT) || (req->r_flags & R_SETUP)) &&
2320 ((req->r_rexmit >= req->r_retry) || /* too many */
2321 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */
2322 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
2323 lck_mtx_lock(&nmp->nm_lock);
2324 if (!(nmp->nm_state & NFSSTA_TIMEO)) {
2325 lck_mtx_unlock(&nmp->nm_lock);
2326 /* make sure we note the unresponsive server */
2327 /* (maxtime may be less than tprintf delay) */
2328 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
2329 "not responding");
2330 req->r_lastmsg = now.tv_sec;
2331 req->r_flags |= R_TPRINTFMSG;
2332 } else {
2333 lck_mtx_unlock(&nmp->nm_lock);
2334 }
2335 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
2336 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt,
2337 now.tv_sec - req->r_start));
2338 nfs_softterm(req);
2339 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
2340 wakeup(req);
2341 lck_mtx_unlock(&req->r_mtx);
2342 if (finish_asyncio)
2343 nfs_asyncio_finish(req);
2344 continue;
2345 }
2346
2347 /* for TCP, only resend if explicitly requested */
2348 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) {
2349 if (++req->r_rexmit > NFS_MAXREXMIT)
2350 req->r_rexmit = NFS_MAXREXMIT;
2351 req->r_rtt = 0;
2352 lck_mtx_unlock(&req->r_mtx);
2353 continue;
2354 }
2355
2356 /*
2357 * The request needs to be (re)sent. Kick the requester to resend it.
2358 * (unless it's already marked as needing a resend)
2359 */
2360 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) {
2361 lck_mtx_unlock(&req->r_mtx);
2362 continue;
2363 }
2364 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
2365 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
2366 req->r_flags |= R_MUSTRESEND;
2367 req->r_rtt = -1;
2368 wakeup(req);
2369 if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
2370 nfs_asyncio_resend(req);
2371 lck_mtx_unlock(&req->r_mtx);
2372 }
2373
2374 lck_mtx_unlock(nfs_request_mutex);
2375
2376 /* poke any sockets */
2377 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) {
2378 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
2379 nfs_sock_poke(nmp);
2380 lck_mtx_lock(&nmp->nm_lock);
2381 nmp->nm_sockflags &= ~NMSOCK_POKE;
2382 if (!(nmp->nm_state & NFSSTA_MOUNTED))
2383 wakeup(&nmp->nm_sockflags);
2384 lck_mtx_unlock(&nmp->nm_lock);
2385 }
2386
2387 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY);
2388 }
2389
2390 /*
2391 * check a thread's proc for the "noremotehang" flag.
2392 */
2393 int
2394 nfs_noremotehang(thread_t thd)
2395 {
2396 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
2397 return (p && proc_noremotehang(p));
2398 }
2399
2400 /*
2401 * Test for a termination condition pending on the process.
2402 * This is used to determine if we need to bail on a mount.
2403 * ETIMEDOUT is returned if there has been a soft timeout.
2404 * EINTR is returned if there is a signal pending that is not being ignored
2405 * and the mount is interruptable, or if we are a thread that is in the process
2406 * of cancellation (also SIGKILL posted).
2407 */
2408 int
2409 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked)
2410 {
2411 int error = 0;
2412
2413 if (nmp == NULL)
2414 return (ENXIO);
2415
2416 if (req && (req->r_flags & R_SOFTTERM))
2417 return (ETIMEDOUT); /* request has been terminated. */
2418
2419 /*
2420 * If we're in the progress of a force unmount and there's
2421 * been a timeout, we're dead and fail IO.
2422 */
2423 if (!nmplocked)
2424 lck_mtx_lock(&nmp->nm_lock);
2425 if ((nmp->nm_state & NFSSTA_FORCE) &&
2426 (nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO|NFSSTA_LOCKTIMEO))) {
2427 error = EIO;
2428 } else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) {
2429 /* Someone is unmounting us, go soft and mark it. */
2430 nmp->nm_flag |= NFSMNT_SOFT;
2431 nmp->nm_state |= NFSSTA_FORCE;
2432 }
2433
2434 /*
2435 * If the mount is hung and we've requested not to hang
2436 * on remote filesystems, then bail now.
2437 */
2438 if (!error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd))
2439 error = EIO;
2440
2441 if (!nmplocked)
2442 lck_mtx_unlock(&nmp->nm_lock);
2443 if (error)
2444 return (error);
2445
2446 /* may not have a thread for async I/O */
2447 if (thd == NULL)
2448 return (0);
2449
2450 /* If this thread belongs to kernel task; then abort check is not needed */
2451 if ((current_proc() != kernproc) && current_thread_aborted())
2452 return (EINTR);
2453
2454 /* mask off thread and process blocked signals. */
2455 if ((nmp->nm_flag & NFSMNT_INT) &&
2456 proc_pendingsignals(get_bsdthreadtask_info(thd), NFSINT_SIGMASK))
2457 return (EINTR);
2458 return (0);
2459 }
2460
2461 /*
2462 * Lock a socket against others.
2463 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2464 * and also to avoid race conditions between the processes with nfs requests
2465 * in progress when a reconnect is necessary.
2466 */
2467 int
2468 nfs_sndlock(struct nfsreq *req)
2469 {
2470 struct nfsmount *nmp = req->r_nmp;
2471 int *statep;
2472 int error = 0, slpflag = 0;
2473 struct timespec ts = { 0, 0 };
2474
2475 if (nmp == NULL)
2476 return (ENXIO);
2477
2478 lck_mtx_lock(&nmp->nm_lock);
2479 statep = &nmp->nm_state;
2480
2481 if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
2482 slpflag = PCATCH;
2483 while (!error && (*statep & NFSSTA_SNDLOCK)) {
2484 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1)))
2485 break;
2486 *statep |= NFSSTA_WANTSND;
2487 if (nfs_noremotehang(req->r_thread))
2488 ts.tv_sec = 1;
2489 error = msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts);
2490 if (error == EWOULDBLOCK)
2491 error = 0;
2492 if (slpflag == PCATCH) {
2493 slpflag = 0;
2494 ts.tv_sec = 2;
2495 }
2496 }
2497 if (!error)
2498 *statep |= NFSSTA_SNDLOCK;
2499 lck_mtx_unlock(&nmp->nm_lock);
2500 return (error);
2501 }
2502
2503 /*
2504 * Unlock the stream socket for others.
2505 */
2506 void
2507 nfs_sndunlock(struct nfsreq *req)
2508 {
2509 struct nfsmount *nmp = req->r_nmp;
2510 int *statep, wake = 0;
2511
2512 if (nmp == NULL)
2513 return;
2514 lck_mtx_lock(&nmp->nm_lock);
2515 statep = &nmp->nm_state;
2516 if ((*statep & NFSSTA_SNDLOCK) == 0)
2517 panic("nfs sndunlock");
2518 *statep &= ~NFSSTA_SNDLOCK;
2519 if (*statep & NFSSTA_WANTSND) {
2520 *statep &= ~NFSSTA_WANTSND;
2521 wake = 1;
2522 }
2523 lck_mtx_unlock(&nmp->nm_lock);
2524 if (wake)
2525 wakeup(statep);
2526 }
2527
2528 #endif /* NFSCLIENT */
2529
2530 #if NFSSERVER
2531
2532 /*
2533 * Generate the rpc reply header
2534 * siz arg. is used to decide if adding a cluster is worthwhile
2535 */
2536 int
2537 nfsrv_rephead(
2538 struct nfsrv_descript *nd,
2539 __unused struct nfsrv_sock *slp,
2540 struct nfsm_chain *nmrepp,
2541 size_t siz)
2542 {
2543 mbuf_t mrep;
2544 u_long *tl;
2545 struct nfsm_chain nmrep;
2546 int err, error;
2547
2548 err = nd->nd_repstat;
2549 if (err && (nd->nd_vers == NFS_VER2))
2550 siz = 0;
2551
2552 /*
2553 * If this is a big reply, use a cluster else
2554 * try and leave leading space for the lower level headers.
2555 */
2556 siz += RPC_REPLYSIZ;
2557 if (siz >= nfs_mbuf_minclsize) {
2558 error = mbuf_getpacket(MBUF_WAITOK, &mrep);
2559 } else {
2560 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
2561 }
2562 if (error) {
2563 /* unable to allocate packet */
2564 /* XXX should we keep statistics for these errors? */
2565 return (error);
2566 }
2567 if (siz < nfs_mbuf_minclsize) {
2568 /* leave space for lower level headers */
2569 tl = mbuf_data(mrep);
2570 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */
2571 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
2572 }
2573 nfsm_chain_init(&nmrep, mrep);
2574 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
2575 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
2576 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
2577 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
2578 if (err & NFSERR_AUTHERR) {
2579 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
2580 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
2581 } else {
2582 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
2583 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2584 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2585 }
2586 } else {
2587 /* reply status */
2588 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
2589 if (nd->nd_gss_context != NULL) {
2590 /* RPCSEC_GSS verifier */
2591 error = nfs_gss_svc_verf_put(nd, &nmrep);
2592 if (error) {
2593 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
2594 goto done;
2595 }
2596 } else {
2597 /* RPCAUTH_NULL verifier */
2598 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
2599 nfsm_chain_add_32(error, &nmrep, 0);
2600 }
2601 /* accepted status */
2602 switch (err) {
2603 case EPROGUNAVAIL:
2604 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
2605 break;
2606 case EPROGMISMATCH:
2607 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
2608 /* XXX hard coded versions? */
2609 nfsm_chain_add_32(error, &nmrep, NFS_VER2);
2610 nfsm_chain_add_32(error, &nmrep, NFS_VER3);
2611 break;
2612 case EPROCUNAVAIL:
2613 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
2614 break;
2615 case EBADRPC:
2616 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
2617 break;
2618 default:
2619 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
2620 if (nd->nd_gss_context != NULL)
2621 error = nfs_gss_svc_prepare_reply(nd, &nmrep);
2622 if (err != NFSERR_RETVOID)
2623 nfsm_chain_add_32(error, &nmrep,
2624 (err ? nfsrv_errmap(nd, err) : 0));
2625 break;
2626 }
2627 }
2628
2629 done:
2630 nfsm_chain_build_done(error, &nmrep);
2631 if (error) {
2632 /* error composing reply header */
2633 /* XXX should we keep statistics for these errors? */
2634 mbuf_freem(mrep);
2635 return (error);
2636 }
2637
2638 *nmrepp = nmrep;
2639 if ((err != 0) && (err != NFSERR_RETVOID))
2640 OSAddAtomic(1, (SInt32*)&nfsstats.srvrpc_errs);
2641 return (0);
2642 }
2643
2644 /*
2645 * The nfs server send routine.
2646 *
2647 * - return EINTR or ERESTART if interrupted by a signal
2648 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
2649 * - do any cleanup required by recoverable socket errors (???)
2650 */
2651 int
2652 nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
2653 {
2654 int error;
2655 socket_t so = slp->ns_so;
2656 struct sockaddr *sendnam;
2657 struct msghdr msg;
2658
2659 bzero(&msg, sizeof(msg));
2660 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
2661 if ((sendnam = mbuf_data(nam))) {
2662 msg.msg_name = (caddr_t)sendnam;
2663 msg.msg_namelen = sendnam->sa_len;
2664 }
2665 }
2666 error = sock_sendmbuf(so, &msg, top, 0, NULL);
2667 if (!error)
2668 return (0);
2669 log(LOG_INFO, "nfsd send error %d\n", error);
2670
2671 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM))
2672 error = EPIPE; /* zap TCP sockets if they time out on send */
2673
2674 /* Handle any recoverable (soft) socket errors here. (???) */
2675 if (error != EINTR && error != ERESTART && error != EIO &&
2676 error != EWOULDBLOCK && error != EPIPE)
2677 error = 0;
2678
2679 return (error);
2680 }
2681
2682 /*
2683 * Socket upcall routine for the nfsd sockets.
2684 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
2685 * Essentially do as much as possible non-blocking, else punt and it will
2686 * be called with MBUF_WAITOK from an nfsd.
2687 */
2688 void
2689 nfsrv_rcv(socket_t so, caddr_t arg, int waitflag)
2690 {
2691 struct nfsrv_sock *slp = (struct nfsrv_sock *)arg;
2692
2693 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID))
2694 return;
2695
2696 lck_rw_lock_exclusive(&slp->ns_rwlock);
2697 nfsrv_rcv_locked(so, slp, waitflag);
2698 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
2699 }
2700 void
2701 nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
2702 {
2703 mbuf_t m, mp, mhck, m2;
2704 int ns_flag=0, error;
2705 struct msghdr msg;
2706 size_t bytes_read;
2707
2708 if ((slp->ns_flag & SLP_VALID) == 0) {
2709 if (waitflag == MBUF_DONTWAIT)
2710 lck_rw_done(&slp->ns_rwlock);
2711 return;
2712 }
2713
2714 #ifdef notdef
2715 /*
2716 * Define this to test for nfsds handling this under heavy load.
2717 */
2718 if (waitflag == MBUF_DONTWAIT) {
2719 ns_flag = SLP_NEEDQ;
2720 goto dorecs;
2721 }
2722 #endif
2723 if (slp->ns_sotype == SOCK_STREAM) {
2724 /*
2725 * If there are already records on the queue, defer soreceive()
2726 * to an nfsd so that there is feedback to the TCP layer that
2727 * the nfs servers are heavily loaded.
2728 */
2729 if (slp->ns_rec && waitflag == MBUF_DONTWAIT) {
2730 ns_flag = SLP_NEEDQ;
2731 goto dorecs;
2732 }
2733
2734 /*
2735 * Do soreceive().
2736 */
2737 bytes_read = 1000000000;
2738 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
2739 if (error || mp == NULL) {
2740 if (error == EWOULDBLOCK)
2741 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
2742 else
2743 ns_flag = SLP_DISCONN;
2744 goto dorecs;
2745 }
2746 m = mp;
2747 if (slp->ns_rawend) {
2748 if ((error = mbuf_setnext(slp->ns_rawend, m)))
2749 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
2750 slp->ns_cc += bytes_read;
2751 } else {
2752 slp->ns_raw = m;
2753 slp->ns_cc = bytes_read;
2754 }
2755 while ((m2 = mbuf_next(m)))
2756 m = m2;
2757 slp->ns_rawend = m;
2758
2759 /*
2760 * Now try and parse record(s) out of the raw stream data.
2761 */
2762 error = nfsrv_getstream(slp, waitflag);
2763 if (error) {
2764 if (error == EPERM)
2765 ns_flag = SLP_DISCONN;
2766 else
2767 ns_flag = SLP_NEEDQ;
2768 }
2769 } else {
2770 struct sockaddr_storage nam;
2771
2772 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
2773 /* already have max # RPC records queued on this socket */
2774 ns_flag = SLP_NEEDQ;
2775 goto dorecs;
2776 }
2777
2778 bzero(&msg, sizeof(msg));
2779 msg.msg_name = (caddr_t)&nam;
2780 msg.msg_namelen = sizeof(nam);
2781
2782 do {
2783 bytes_read = 1000000000;
2784 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
2785 if (mp) {
2786 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
2787 mbuf_setlen(mhck, nam.ss_len);
2788 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
2789 m = mhck;
2790 if (mbuf_setnext(m, mp)) {
2791 /* trouble... just drop it */
2792 printf("nfsrv_rcv: mbuf_setnext failed\n");
2793 mbuf_free(mhck);
2794 m = mp;
2795 }
2796 } else {
2797 m = mp;
2798 }
2799 if (slp->ns_recend)
2800 mbuf_setnextpkt(slp->ns_recend, m);
2801 else {
2802 slp->ns_rec = m;
2803 slp->ns_flag |= SLP_DOREC;
2804 }
2805 slp->ns_recend = m;
2806 mbuf_setnextpkt(m, NULL);
2807 slp->ns_reccnt++;
2808 }
2809 } while (mp);
2810 }
2811
2812 /*
2813 * Now try and process the request records, non-blocking.
2814 */
2815 dorecs:
2816 if (ns_flag)
2817 slp->ns_flag |= ns_flag;
2818 if (waitflag == MBUF_DONTWAIT) {
2819 int wake = (slp->ns_flag & SLP_WORKTODO);
2820 lck_rw_done(&slp->ns_rwlock);
2821 if (wake && nfsd_thread_count) {
2822 lck_mtx_lock(nfsd_mutex);
2823 nfsrv_wakenfsd(slp);
2824 lck_mtx_unlock(nfsd_mutex);
2825 }
2826 }
2827 }
2828
2829 /*
2830 * Try and extract an RPC request from the mbuf data list received on a
2831 * stream socket. The "waitflag" argument indicates whether or not it
2832 * can sleep.
2833 */
2834 static int
2835 nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
2836 {
2837 mbuf_t m;
2838 char *cp1, *cp2, *mdata;
2839 int len, mlen, error;
2840 mbuf_t om, m2, recm;
2841 u_long recmark;
2842
2843 if (slp->ns_flag & SLP_GETSTREAM)
2844 panic("nfs getstream");
2845 slp->ns_flag |= SLP_GETSTREAM;
2846 for (;;) {
2847 if (slp->ns_reclen == 0) {
2848 if (slp->ns_cc < NFSX_UNSIGNED) {
2849 slp->ns_flag &= ~SLP_GETSTREAM;
2850 return (0);
2851 }
2852 m = slp->ns_raw;
2853 mdata = mbuf_data(m);
2854 mlen = mbuf_len(m);
2855 if (mlen >= NFSX_UNSIGNED) {
2856 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
2857 mdata += NFSX_UNSIGNED;
2858 mlen -= NFSX_UNSIGNED;
2859 mbuf_setdata(m, mdata, mlen);
2860 } else {
2861 cp1 = (caddr_t)&recmark;
2862 cp2 = mdata;
2863 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2864 while (mlen == 0) {
2865 m = mbuf_next(m);
2866 cp2 = mbuf_data(m);
2867 mlen = mbuf_len(m);
2868 }
2869 *cp1++ = *cp2++;
2870 mlen--;
2871 mbuf_setdata(m, cp2, mlen);
2872 }
2873 }
2874 slp->ns_cc -= NFSX_UNSIGNED;
2875 recmark = ntohl(recmark);
2876 slp->ns_reclen = recmark & ~0x80000000;
2877 if (recmark & 0x80000000)
2878 slp->ns_flag |= SLP_LASTFRAG;
2879 else
2880 slp->ns_flag &= ~SLP_LASTFRAG;
2881 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
2882 slp->ns_flag &= ~SLP_GETSTREAM;
2883 return (EPERM);
2884 }
2885 }
2886
2887 /*
2888 * Now get the record part.
2889 *
2890 * Note that slp->ns_reclen may be 0. Linux sometimes
2891 * generates 0-length RPCs
2892 */
2893 recm = NULL;
2894 if (slp->ns_cc == slp->ns_reclen) {
2895 recm = slp->ns_raw;
2896 slp->ns_raw = slp->ns_rawend = NULL;
2897 slp->ns_cc = slp->ns_reclen = 0;
2898 } else if (slp->ns_cc > slp->ns_reclen) {
2899 len = 0;
2900 m = slp->ns_raw;
2901 mlen = mbuf_len(m);
2902 mdata = mbuf_data(m);
2903 om = NULL;
2904 while (len < slp->ns_reclen) {
2905 if ((len + mlen) > slp->ns_reclen) {
2906 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
2907 slp->ns_flag &= ~SLP_GETSTREAM;
2908 return (EWOULDBLOCK);
2909 }
2910 if (om) {
2911 if (mbuf_setnext(om, m2)) {
2912 /* trouble... just drop it */
2913 printf("nfsrv_getstream: mbuf_setnext failed\n");
2914 mbuf_freem(m2);
2915 slp->ns_flag &= ~SLP_GETSTREAM;
2916 return (EWOULDBLOCK);
2917 }
2918 recm = slp->ns_raw;
2919 } else {
2920 recm = m2;
2921 }
2922 mdata += slp->ns_reclen - len;
2923 mlen -= slp->ns_reclen - len;
2924 mbuf_setdata(m, mdata, mlen);
2925 len = slp->ns_reclen;
2926 } else if ((len + mlen) == slp->ns_reclen) {
2927 om = m;
2928 len += mlen;
2929 m = mbuf_next(m);
2930 recm = slp->ns_raw;
2931 if (mbuf_setnext(om, NULL)) {
2932 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
2933 slp->ns_flag &= ~SLP_GETSTREAM;
2934 return (EWOULDBLOCK);
2935 }
2936 mlen = mbuf_len(m);
2937 mdata = mbuf_data(m);
2938 } else {
2939 om = m;
2940 len += mlen;
2941 m = mbuf_next(m);
2942 mlen = mbuf_len(m);
2943 mdata = mbuf_data(m);
2944 }
2945 }
2946 slp->ns_raw = m;
2947 slp->ns_cc -= len;
2948 slp->ns_reclen = 0;
2949 } else {
2950 slp->ns_flag &= ~SLP_GETSTREAM;
2951 return (0);
2952 }
2953
2954 /*
2955 * Accumulate the fragments into a record.
2956 */
2957 if (slp->ns_frag == NULL) {
2958 slp->ns_frag = recm;
2959 } else {
2960 m = slp->ns_frag;
2961 while ((m2 = mbuf_next(m)))
2962 m = m2;
2963 if ((error = mbuf_setnext(m, recm)))
2964 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
2965 }
2966 if (slp->ns_flag & SLP_LASTFRAG) {
2967 if (slp->ns_recend)
2968 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
2969 else {
2970 slp->ns_rec = slp->ns_frag;
2971 slp->ns_flag |= SLP_DOREC;
2972 }
2973 slp->ns_recend = slp->ns_frag;
2974 slp->ns_frag = NULL;
2975 }
2976 }
2977 }
2978
2979 /*
2980 * Parse an RPC header.
2981 */
2982 int
2983 nfsrv_dorec(
2984 struct nfsrv_sock *slp,
2985 struct nfsd *nfsd,
2986 struct nfsrv_descript **ndp)
2987 {
2988 mbuf_t m;
2989 mbuf_t nam;
2990 struct nfsrv_descript *nd;
2991 int error = 0;
2992
2993 *ndp = NULL;
2994 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL))
2995 return (ENOBUFS);
2996 MALLOC_ZONE(nd, struct nfsrv_descript *,
2997 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2998 if (!nd)
2999 return (ENOMEM);
3000 m = slp->ns_rec;
3001 slp->ns_rec = mbuf_nextpkt(m);
3002 if (slp->ns_rec)
3003 mbuf_setnextpkt(m, NULL);
3004 else {
3005 slp->ns_flag &= ~SLP_DOREC;
3006 slp->ns_recend = NULL;
3007 }
3008 slp->ns_reccnt--;
3009 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
3010 nam = m;
3011 m = mbuf_next(m);
3012 if ((error = mbuf_setnext(nam, NULL)))
3013 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
3014 } else
3015 nam = NULL;
3016 nd->nd_nam2 = nam;
3017 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
3018 if (!error)
3019 error = nfsrv_getreq(nd);
3020 if (error) {
3021 if (nam)
3022 mbuf_freem(nam);
3023 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
3024 return (error);
3025 }
3026 nd->nd_mrep = NULL;
3027 *ndp = nd;
3028 nfsd->nfsd_nd = nd;
3029 return (0);
3030 }
3031
3032 /*
3033 * Parse an RPC request
3034 * - verify it
3035 * - fill in the cred struct.
3036 */
3037 static int
3038 nfsrv_getreq(struct nfsrv_descript *nd)
3039 {
3040 struct nfsm_chain *nmreq;
3041 int len, i;
3042 u_long nfsvers, auth_type;
3043 int error = 0;
3044 uid_t user_id;
3045 gid_t group_id;
3046 int ngroups;
3047 struct ucred temp_cred;
3048 uint32_t val;
3049
3050 nd->nd_cr = NULL;
3051 nd->nd_gss_context = NULL;
3052 nd->nd_gss_seqnum = 0;
3053 nd->nd_gss_mb = NULL;
3054
3055 user_id = group_id = -2;
3056 val = auth_type = len = 0;
3057
3058 nmreq = &nd->nd_nmreq;
3059 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
3060 nfsm_chain_get_32(error, nmreq, val); // RPC Call
3061 if (!error && (val != RPC_CALL))
3062 error = EBADRPC;
3063 nfsmout_if(error);
3064 nd->nd_repstat = 0;
3065 nfsm_chain_get_32(error, nmreq, val); // RPC Version
3066 nfsmout_if(error);
3067 if (val != RPC_VER2) {
3068 nd->nd_repstat = ERPCMISMATCH;
3069 nd->nd_procnum = NFSPROC_NOOP;
3070 return (0);
3071 }
3072 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number
3073 nfsmout_if(error);
3074 if (val != NFS_PROG) {
3075 nd->nd_repstat = EPROGUNAVAIL;
3076 nd->nd_procnum = NFSPROC_NOOP;
3077 return (0);
3078 }
3079 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
3080 nfsmout_if(error);
3081 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
3082 nd->nd_repstat = EPROGMISMATCH;
3083 nd->nd_procnum = NFSPROC_NOOP;
3084 return (0);
3085 }
3086 nd->nd_vers = nfsvers;
3087 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
3088 nfsmout_if(error);
3089 if ((nd->nd_procnum >= NFS_NPROCS) ||
3090 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
3091 nd->nd_repstat = EPROCUNAVAIL;
3092 nd->nd_procnum = NFSPROC_NOOP;
3093 return (0);
3094 }
3095 if (nfsvers != NFS_VER3)
3096 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
3097 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor
3098 nfsm_chain_get_32(error, nmreq, len); // Auth Length
3099 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ))
3100 error = EBADRPC;
3101 nfsmout_if(error);
3102
3103 /* Handle authentication */
3104 if (auth_type == RPCAUTH_UNIX) {
3105 if (nd->nd_procnum == NFSPROC_NULL)
3106 return (0);
3107 nd->nd_sec = RPCAUTH_UNIX;
3108 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp
3109 nfsm_chain_get_32(error, nmreq, len); // hostname length
3110 if (len < 0 || len > NFS_MAXNAMLEN)
3111 error = EBADRPC;
3112 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname
3113 nfsmout_if(error);
3114
3115 /* create a temporary credential using the bits from the wire */
3116 bzero(&temp_cred, sizeof(temp_cred));
3117 nfsm_chain_get_32(error, nmreq, user_id);
3118 nfsm_chain_get_32(error, nmreq, group_id);
3119 temp_cred.cr_groups[0] = group_id;
3120 nfsm_chain_get_32(error, nmreq, len); // extra GID count
3121 if ((len < 0) || (len > RPCAUTH_UNIXGIDS))
3122 error = EBADRPC;
3123 nfsmout_if(error);
3124 for (i = 1; i <= len; i++)
3125 if (i < NGROUPS)
3126 nfsm_chain_get_32(error, nmreq, temp_cred.cr_groups[i]);
3127 else
3128 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
3129 nfsmout_if(error);
3130 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
3131 if (ngroups > 1)
3132 nfsrv_group_sort(&temp_cred.cr_groups[0], ngroups);
3133 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
3134 nfsm_chain_get_32(error, nmreq, len); // verifier length
3135 if (len < 0 || len > RPCAUTH_MAXSIZ)
3136 error = EBADRPC;
3137 if (len > 0)
3138 nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
3139
3140 /* request creation of a real credential */
3141 temp_cred.cr_uid = user_id;
3142 temp_cred.cr_ngroups = ngroups;
3143 nd->nd_cr = kauth_cred_create(&temp_cred);
3144 if (nd->nd_cr == NULL) {
3145 nd->nd_repstat = ENOMEM;
3146 nd->nd_procnum = NFSPROC_NOOP;
3147 return (0);
3148 }
3149 } else if (auth_type == RPCSEC_GSS) {
3150 error = nfs_gss_svc_cred_get(nd, nmreq);
3151 if (error) {
3152 if (error == EINVAL)
3153 goto nfsmout; // drop the request
3154 nd->nd_repstat = error;
3155 nd->nd_procnum = NFSPROC_NOOP;
3156 return (0);
3157 }
3158 } else {
3159 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE
3160 return (0);
3161 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
3162 nd->nd_procnum = NFSPROC_NOOP;
3163 return (0);
3164 }
3165 return (0);
3166 nfsmout:
3167 if (IS_VALID_CRED(nd->nd_cr))
3168 kauth_cred_unref(&nd->nd_cr);
3169 nfsm_chain_cleanup(nmreq);
3170 return (error);
3171 }
3172
3173 /*
3174 * Search for a sleeping nfsd and wake it up.
3175 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
3176 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
3177 * Note: Must be called with nfsd_mutex held.
3178 */
3179 void
3180 nfsrv_wakenfsd(struct nfsrv_sock *slp)
3181 {
3182 struct nfsd *nd;
3183
3184 if ((slp->ns_flag & SLP_VALID) == 0)
3185 return;
3186
3187 lck_rw_lock_exclusive(&slp->ns_rwlock);
3188 /* if there's work to do on this socket, make sure it's queued up */
3189 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
3190 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
3191 slp->ns_flag |= SLP_WAITQ;
3192 }
3193 lck_rw_done(&slp->ns_rwlock);
3194
3195 /* wake up a waiting nfsd, if possible */
3196 nd = TAILQ_FIRST(&nfsd_queue);
3197 if (!nd)
3198 return;
3199
3200 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
3201 nd->nfsd_flag &= ~NFSD_WAITING;
3202 wakeup(nd);
3203 }
3204
3205 #endif /* NFSSERVER */
3206
3207 static int
3208 nfs_msg(thread_t thd,
3209 const char *server,
3210 const char *msg,
3211 int error)
3212 {
3213 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
3214 tpr_t tpr;
3215
3216 if (p)
3217 tpr = tprintf_open(p);
3218 else
3219 tpr = NULL;
3220 if (error)
3221 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error);
3222 else
3223 tprintf(tpr, "nfs server %s: %s\n", server, msg);
3224 tprintf_close(tpr);
3225 return (0);
3226 }
3227
3228 void
3229 nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg)
3230 {
3231 int ostate;
3232
3233 if (nmp == NULL)
3234 return;
3235
3236 lck_mtx_lock(&nmp->nm_lock);
3237 ostate = nmp->nm_state;
3238 if ((flags & NFSSTA_TIMEO) && !(ostate & NFSSTA_TIMEO))
3239 nmp->nm_state |= NFSSTA_TIMEO;
3240 if ((flags & NFSSTA_LOCKTIMEO) && !(ostate & NFSSTA_LOCKTIMEO))
3241 nmp->nm_state |= NFSSTA_LOCKTIMEO;
3242 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(ostate & NFSSTA_JUKEBOXTIMEO))
3243 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO;
3244 lck_mtx_unlock(&nmp->nm_lock);
3245
3246 if (!(ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)))
3247 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 0);
3248
3249 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
3250 }
3251
3252 void
3253 nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
3254 {
3255 int ostate, state;
3256
3257 if (nmp == NULL)
3258 return;
3259
3260 if (msg)
3261 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
3262
3263 lck_mtx_lock(&nmp->nm_lock);
3264 ostate = nmp->nm_state;
3265 if ((flags & NFSSTA_TIMEO) && (ostate & NFSSTA_TIMEO))
3266 nmp->nm_state &= ~NFSSTA_TIMEO;
3267 if ((flags & NFSSTA_LOCKTIMEO) && (ostate & NFSSTA_LOCKTIMEO))
3268 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
3269 if ((flags & NFSSTA_JUKEBOXTIMEO) && (ostate & NFSSTA_JUKEBOXTIMEO))
3270 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO;
3271 state = nmp->nm_state;
3272 lck_mtx_unlock(&nmp->nm_lock);
3273
3274 if ((ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)) &&
3275 !(state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)))
3276 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
3277 }
3278