]> git.saurik.com Git - apple/xnu.git/blame - bsd/nfs/nfs_socket.c
xnu-1228.9.59.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
CommitLineData
1c79356b 1/*
cf7d32b8 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1991, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
66 */
67
68/*
69 * Socket operations for use by nfs
70 */
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/proc.h>
91447636
A
75#include <sys/kauth.h>
76#include <sys/mount_internal.h>
1c79356b 77#include <sys/kernel.h>
91447636 78#include <sys/kpi_mbuf.h>
1c79356b
A
79#include <sys/malloc.h>
80#include <sys/vnode.h>
81#include <sys/domain.h>
82#include <sys/protosw.h>
83#include <sys/socket.h>
1c79356b
A
84#include <sys/syslog.h>
85#include <sys/tprintf.h>
91447636
A
86#include <sys/uio_internal.h>
87#include <libkern/OSAtomic.h>
1c79356b
A
88
89#include <sys/time.h>
90#include <kern/clock.h>
4a249263
A
91#include <kern/task.h>
92#include <kern/thread.h>
2d21ac55 93#include <kern/thread_call.h>
9bccf70c 94#include <sys/user.h>
1c79356b
A
95
96#include <netinet/in.h>
97#include <netinet/tcp.h>
98
99#include <nfs/rpcv2.h>
100#include <nfs/nfsproto.h>
101#include <nfs/nfs.h>
102#include <nfs/xdr_subs.h>
103#include <nfs/nfsm_subs.h>
2d21ac55 104#include <nfs/nfs_gss.h>
1c79356b
A
105#include <nfs/nfsmount.h>
106#include <nfs/nfsnode.h>
1c79356b 107
2d21ac55
A
108/* XXX */
109boolean_t current_thread_aborted(void);
110kern_return_t thread_terminate(thread_t);
111
112
113#if NFSSERVER
114int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
115
116static int nfsrv_getstream(struct nfsrv_sock *,int);
117static int nfsrv_getreq(struct nfsrv_descript *);
118extern int nfsv3_procid[NFS_NPROCS];
119#endif /* NFSSERVER */
120
121#if NFSCLIENT
122
123static int nfs_connect_setup(struct nfsmount *);
124static void nfs_reqdequeue(struct nfsreq *);
125static void nfs_udp_rcv(socket_t, void*, int);
126static void nfs_tcp_rcv(socket_t, void*, int);
127static void nfs_request_match_reply(struct nfsmount *, mbuf_t);
128static void nfs_softterm(struct nfsreq *);
fa4905b1 129
2d21ac55
A
130#ifdef NFS_SOCKET_DEBUGGING
131#define NFS_SOCK_DBG(X) printf X
132#else
133#define NFS_SOCK_DBG(X)
134#endif
fa4905b1 135
1c79356b
A
136/*
137 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
138 * Use the mean and mean deviation of rtt for the appropriate type of rpc
139 * for the frequent rpcs and a default for the others.
140 * The justification for doing "other" this way is that these rpcs
141 * happen so infrequently that timer est. would probably be stale.
142 * Also, since many of these rpcs are
143 * non-idempotent, a conservative timeout is desired.
144 * getattr, lookup - A+2D
145 * read, write - A+4D
146 * other - nm_timeo
147 */
148#define NFS_RTO(n, t) \
149 ((t) == 0 ? (n)->nm_timeo : \
150 ((t) < 3 ? \
151 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
152 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
153#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
154#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
1c79356b
A
155
156/*
157 * Defines which timer to use for the procnum.
158 * 0 - default
159 * 1 - getattr
160 * 2 - lookup
161 * 3 - read
162 * 4 - write
163 */
164static int proct[NFS_NPROCS] = {
91447636 165 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
1c79356b
A
166};
167
168/*
169 * There is a congestion window for outstanding rpcs maintained per mount
170 * point. The cwnd size is adjusted in roughly the way that:
171 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
172 * SIGCOMM '88". ACM, August 1988.
173 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
174 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
175 * of rpcs is in progress.
176 * (The sent count and cwnd are scaled for integer arith.)
177 * Variants of "slow start" were tried and were found to be too much of a
178 * performance hit (ave. rtt 3 times larger),
179 * I suspect due to the large rtt that nfs rpcs have.
180 */
181#define NFS_CWNDSCALE 256
182#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
183static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
4a249263 184
1c79356b 185/*
2d21ac55 186 * Initialize socket state and perform setup for a new NFS connection.
1c79356b
A
187 */
188int
2d21ac55 189nfs_connect(struct nfsmount *nmp)
1c79356b 190{
91447636 191 socket_t so;
2d21ac55
A
192 int error, on = 1, proto;
193 sock_upcall upcall;
1c79356b 194 struct sockaddr *saddr;
2d21ac55 195 struct sockaddr_in sin;
91447636 196 struct timeval timeo;
2d21ac55 197 u_short tport;
1c79356b 198
2d21ac55
A
199 lck_mtx_lock(&nmp->nm_lock);
200 nmp->nm_sockflags |= NMSOCK_CONNECTING;
91447636 201 saddr = mbuf_data(nmp->nm_nam);
2d21ac55
A
202 upcall = (nmp->nm_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
203 lck_mtx_unlock(&nmp->nm_lock);
91447636 204 error = sock_socket(saddr->sa_family, nmp->nm_sotype,
2d21ac55
A
205 nmp->nm_soproto, upcall, nmp, &nmp->nm_so);
206 if (error)
1c79356b 207 goto bad;
2d21ac55 208 lck_mtx_lock(&nmp->nm_lock);
1c79356b 209 so = nmp->nm_so;
1c79356b
A
210
211 /*
212 * Some servers require that the client port be a reserved port number.
213 */
214 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
2d21ac55
A
215 lck_mtx_unlock(&nmp->nm_lock);
216 sin.sin_len = sizeof (struct sockaddr_in);
217 sin.sin_family = AF_INET;
218 sin.sin_addr.s_addr = INADDR_ANY;
219 tport = IPPORT_RESERVED - 1;
220 sin.sin_port = htons(tport);
221 while (((error = sock_bind(so, (struct sockaddr *) &sin)) == EADDRINUSE) &&
222 (--tport > IPPORT_RESERVED / 2))
223 sin.sin_port = htons(tport);
4a249263
A
224 if (error)
225 goto bad;
2d21ac55 226 lck_mtx_lock(&nmp->nm_lock);
1c79356b
A
227 }
228
229 /*
230 * Protocols that do not require connections may be optionally left
2d21ac55 231 * unconnected for servers that reply from a different address/port.
1c79356b
A
232 */
233 if (nmp->nm_flag & NFSMNT_NOCONN) {
91447636 234 if (nmp->nm_sotype == SOCK_STREAM) {
1c79356b 235 error = ENOTCONN;
2d21ac55 236 lck_mtx_unlock(&nmp->nm_lock);
1c79356b
A
237 goto bad;
238 }
239 } else {
2d21ac55
A
240 int tocnt = 0, optlen = sizeof(error);
241 struct timespec ts = { 2, 0 };
242
243 lck_mtx_unlock(&nmp->nm_lock);
91447636 244 error = sock_connect(so, mbuf_data(nmp->nm_nam), MSG_DONTWAIT);
2d21ac55 245 if (error && (error != EINPROGRESS))
1c79356b 246 goto bad;
2d21ac55
A
247 lck_mtx_lock(&nmp->nm_lock);
248 while (!sock_isconnected(so)) {
249 if (tocnt++ == 15) /* log a warning if connect is taking a while */
250 log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n",
251 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
252 /* check for error on socket */
253 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &error, &optlen);
254 if (error) {
255 log(LOG_INFO, "nfs_connect: socket error %d for %s\n",
256 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
257 break;
258 }
259 if (tocnt > 60) {
260 /* abort if this is taking too long */
261 error = ENOTCONN;
262 break;
1c79356b 263 }
2d21ac55
A
264 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
265 break;
36401178 266 msleep(&nmp->nm_so, &nmp->nm_lock, PSOCK, "nfs_socket_connect", &ts);
2d21ac55
A
267 }
268 if (tocnt > 15)
269 log(LOG_INFO, "nfs_connect: socket connect %s for %s\n",
270 error ? "aborted" : "completed",
271 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
272 if (error) {
273 lck_mtx_unlock(&nmp->nm_lock);
274 goto bad;
1c79356b 275 }
1c79356b 276 }
2d21ac55 277
55e303ae 278 /*
2d21ac55
A
279 * Set socket send/receive timeouts
280 * - Receive timeout shouldn't matter because all receives are performed
281 * in the socket upcall non-blocking.
282 * - Send timeout should allow us to react to a blocked socket.
283 * Soft mounts will want to abort sooner.
55e303ae 284 */
91447636 285 timeo.tv_usec = 0;
2d21ac55
A
286 timeo.tv_sec = (nmp->nm_flag & NFSMNT_SOFT) ? 10 : 60;
287 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
288 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
289 if (error) {
290 log(LOG_INFO, "nfs_connect: socket timeout setting errors for %s\n",
291 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
292 error = 0;
1c79356b 293 }
1c79356b 294
2d21ac55
A
295 if (nmp->nm_sotype == SOCK_STREAM) {
296 /* Assume that SOCK_STREAM always requires a connection */
91447636 297 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
2d21ac55
A
298 /* set nodelay for TCP */
299 sock_gettype(so, NULL, NULL, &proto);
300 if (proto == IPPROTO_TCP)
91447636 301 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
1c79356b
A
302 }
303
2d21ac55
A
304 if (nmp->nm_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
305 int reserve = NFS_UDPSOCKBUF;
306 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
307 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
308 if (error) {
309 log(LOG_INFO, "nfs_connect: socket buffer setting errors for %s\n",
310 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
311 error = 0;
312 }
91447636 313 }
2d21ac55
A
314
315 /* set SO_NOADDRERR to detect network changes ASAP */
316 error = sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
1c79356b 317 if (error) {
2d21ac55 318 lck_mtx_unlock(&nmp->nm_lock);
1c79356b
A
319 goto bad;
320 }
4a3eedf9
A
321 /* just playin' it safe */
322 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
1c79356b 323
2d21ac55
A
324 if (!(nmp->nm_flag & NFSMNT_INT))
325 sock_nointerrupt(so, 1);
1c79356b 326
2d21ac55 327 /* Initialize socket state variables */
1c79356b
A
328 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
329 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
330 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
331 nmp->nm_sdrtt[3] = 0;
2d21ac55
A
332 if (nmp->nm_sotype == SOCK_DGRAM) {
333 /* XXX do we really want to reset this on each reconnect? */
334 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
335 nmp->nm_sent = 0;
336 } else if (nmp->nm_sotype == SOCK_STREAM) {
337 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
338 nmp->nm_fragleft = nmp->nm_reclen = 0;
339 nmp->nm_timeouts = 0;
340 }
341 nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
342 nmp->nm_sockflags |= NMSOCK_SETUP;
343 FSDBG(529, nmp, nmp->nm_state, nmp->nm_flag, nmp->nm_cwnd);
344 lck_mtx_unlock(&nmp->nm_lock);
345 error = nfs_connect_setup(nmp);
1c79356b 346bad:
2d21ac55
A
347 lck_mtx_lock(&nmp->nm_lock);
348 nmp->nm_sockflags &= ~(NMSOCK_CONNECTING|NMSOCK_SETUP);
349 if (!error) {
350 nmp->nm_sockflags |= NMSOCK_READY;
351 wakeup(&nmp->nm_sockflags);
352 }
353 lck_mtx_unlock(&nmp->nm_lock);
354 if (error)
355 nfs_disconnect(nmp);
356 return (error);
357}
358
359/* setup & confirm socket connection is functional */
360static int
361nfs_connect_setup(struct nfsmount *nmp)
362{
363 struct nfsm_chain nmreq, nmrep;
364 int error = 0, status;
365 u_int64_t xid;
366
367 if (nmp->nm_vers >= NFS_VER4) {
368 error = nfs4_setclientid(nmp);
369 } else {
370 /* verify connection's OK by sending a NULL request */
371 nfsm_chain_null(&nmreq);
372 nfsm_chain_null(&nmrep);
373 nfsm_chain_build_alloc_init(error, &nmreq, 0);
374 nfsm_chain_build_done(error, &nmreq);
375 nfsmout_if(error);
376 error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC_NULL,
377 current_thread(), NULL, R_SETUP, &nmrep, &xid, &status);
378 if (!error)
379 error = status;
380nfsmout:
381 nfsm_chain_cleanup(&nmreq);
382 nfsm_chain_cleanup(&nmrep);
383 }
1c79356b
A
384 return (error);
385}
386
387/*
2d21ac55
A
388 * NFS socket reconnect routine:
389 * Called when a connection is broken.
390 * - disconnect the old socket
1c79356b
A
391 * - nfs_connect() again
392 * - set R_MUSTRESEND for all outstanding requests on mount point
393 * If this fails the mount point is DEAD!
1c79356b
A
394 */
395static int
2d21ac55 396nfs_reconnect(struct nfsmount *nmp)
1c79356b 397{
2d21ac55
A
398 struct nfsreq *rq;
399 struct timeval now;
400 thread_t thd = current_thread();
401 int error, lastmsg, wentdown = 0;
402
403 microuptime(&now);
404 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay);
1c79356b
A
405
406 nfs_disconnect(nmp);
2d21ac55
A
407
408 while ((error = nfs_connect(nmp))) {
1c79356b
A
409 if (error == EINTR || error == ERESTART)
410 return (EINTR);
55e303ae
A
411 if (error == EIO)
412 return (EIO);
2d21ac55
A
413 microuptime(&now);
414 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) {
415 lastmsg = now.tv_sec;
416 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect");
417 wentdown = 1;
418 }
419 lck_mtx_lock(&nmp->nm_lock);
4a249263
A
420 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
421 /* we're not yet completely mounted and */
422 /* we can't reconnect, so we fail */
2d21ac55
A
423 lck_mtx_unlock(&nmp->nm_lock);
424 return (error);
425 }
426 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) {
427 lck_mtx_unlock(&nmp->nm_lock);
4a249263
A
428 return (error);
429 }
2d21ac55
A
430 lck_mtx_unlock(&nmp->nm_lock);
431 tsleep(&lbolt, PSOCK, "nfs_reconnect_delay", 0);
432 if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
4a249263 433 return (error);
1c79356b
A
434 }
435
2d21ac55
A
436 if (wentdown)
437 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected");
438
1c79356b 439 /*
2d21ac55
A
440 * Loop through outstanding request list and mark all requests
441 * as needing a resend. (Though nfs_need_reconnect() probably
442 * marked them all already.)
1c79356b 443 */
2d21ac55
A
444 lck_mtx_lock(nfs_request_mutex);
445 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
446 if (rq->r_nmp == nmp) {
447 lck_mtx_lock(&rq->r_mtx);
448 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
449 rq->r_flags |= R_MUSTRESEND;
450 rq->r_rtt = -1;
451 wakeup(rq);
452 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
453 nfs_asyncio_resend(rq);
454 }
455 lck_mtx_unlock(&rq->r_mtx);
456 }
1c79356b 457 }
2d21ac55 458 lck_mtx_unlock(nfs_request_mutex);
1c79356b
A
459 return (0);
460}
461
462/*
463 * NFS disconnect. Clean up and unlink.
464 */
465void
91447636 466nfs_disconnect(struct nfsmount *nmp)
1c79356b 467{
91447636 468 socket_t so;
1c79356b 469
2d21ac55
A
470 lck_mtx_lock(&nmp->nm_lock);
471 if ((nmp->nm_sotype == SOCK_STREAM) && nmp->nm_m) {
472 mbuf_freem(nmp->nm_m);
473 nmp->nm_m = nmp->nm_mlast = NULL;
474 }
1c79356b
A
475 if (nmp->nm_so) {
476 so = nmp->nm_so;
2d21ac55
A
477 nmp->nm_so = NULL;
478 lck_mtx_unlock(&nmp->nm_lock);
479 sock_shutdown(so, SHUT_RDWR);
91447636 480 sock_close(so);
2d21ac55
A
481 } else {
482 lck_mtx_unlock(&nmp->nm_lock);
1c79356b 483 }
1c79356b
A
484}
485
486/*
2d21ac55 487 * mark an NFS mount as needing a reconnect/resends.
1c79356b 488 */
2d21ac55
A
489static void
490nfs_need_reconnect(struct nfsmount *nmp)
1c79356b 491{
2d21ac55 492 struct nfsreq *rq;
1c79356b 493
2d21ac55
A
494 lck_mtx_lock(&nmp->nm_lock);
495 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP);
496 lck_mtx_unlock(&nmp->nm_lock);
1c79356b 497
2d21ac55
A
498 /*
499 * Loop through outstanding request list and
500 * mark all requests as needing a resend.
1c79356b 501 */
2d21ac55
A
502 lck_mtx_lock(nfs_request_mutex);
503 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
504 if (rq->r_nmp == nmp) {
505 lck_mtx_lock(&rq->r_mtx);
506 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) {
507 rq->r_flags |= R_MUSTRESEND;
508 rq->r_rtt = -1;
509 wakeup(rq);
510 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
511 nfs_asyncio_resend(rq);
1c79356b 512 }
2d21ac55 513 lck_mtx_unlock(&rq->r_mtx);
55e303ae 514 }
1c79356b 515 }
2d21ac55 516 lck_mtx_unlock(nfs_request_mutex);
1c79356b
A
517}
518
519/*
2d21ac55 520 * thread to handle miscellaneous async NFS socket work (reconnects/resends)
1c79356b 521 */
2d21ac55
A
522static void
523nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
1c79356b 524{
2d21ac55
A
525 struct nfsmount *nmp = arg;
526 struct timespec ts = { 30, 0 };
527 thread_t thd = current_thread();
528 struct nfsreq *req;
529 struct timeval now;
530 int error, dofinish, force;
1c79356b 531
2d21ac55 532 lck_mtx_lock(&nmp->nm_lock);
1c79356b 533
2d21ac55
A
534 while (!(nmp->nm_sockflags & NMSOCK_READY) || !TAILQ_EMPTY(&nmp->nm_resendq)) {
535 if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
536 break;
537 force = (nmp->nm_state & NFSSTA_FORCE);
538 /* do reconnect, if necessary */
539 if (!(nmp->nm_sockflags & NMSOCK_READY) && !force) {
540 if (nmp->nm_reconnect_start <= 0) {
541 microuptime(&now);
542 nmp->nm_reconnect_start = now.tv_sec;
1c79356b 543 }
2d21ac55
A
544 lck_mtx_unlock(&nmp->nm_lock);
545 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname));
546 if ((error = nfs_reconnect(nmp)))
547 printf("nfs_reconnect failed %d for %s\n", error,
548 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
549 else
550 nmp->nm_reconnect_start = 0;
551 lck_mtx_lock(&nmp->nm_lock);
1c79356b 552 }
2d21ac55
A
553 /* do resends, if necessary/possible */
554 while (((nmp->nm_sockflags & NMSOCK_READY) || force) && ((req = TAILQ_FIRST(&nmp->nm_resendq)))) {
555 if (req->r_resendtime)
556 microuptime(&now);
557 while (req && !force && req->r_resendtime && (now.tv_sec < req->r_resendtime))
558 req = TAILQ_NEXT(req, r_rchain);
559 if (!req)
560 break;
561 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
562 req->r_rchain.tqe_next = NFSREQNOLIST;
563 lck_mtx_unlock(&nmp->nm_lock);
564 lck_mtx_lock(&req->r_mtx);
565 if (req->r_error || req->r_nmrep.nmc_mhead) {
566 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
567 req->r_flags &= ~R_RESENDQ;
568 wakeup(req);
569 lck_mtx_unlock(&req->r_mtx);
570 if (dofinish)
571 nfs_asyncio_finish(req);
572 lck_mtx_lock(&nmp->nm_lock);
573 continue;
1c79356b 574 }
2d21ac55
A
575 if ((req->r_flags & R_RESTART) || req->r_gss_ctx) {
576 req->r_flags &= ~R_RESTART;
577 req->r_resendtime = 0;
578 lck_mtx_unlock(&req->r_mtx);
579 /* async RPCs on GSS mounts need to be rebuilt and resent. */
580 nfs_reqdequeue(req);
581 if (req->r_gss_ctx) {
582 nfs_gss_clnt_rpcdone(req);
583 error = nfs_gss_clnt_args_restore(req);
584 if (error == ENEEDAUTH)
585 req->r_xid = 0;
1c79356b 586 }
2d21ac55
A
587 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
588 req->r_gss_ctx ? " gss" : "", req->r_procnum, req->r_xid,
589 req->r_flags, req->r_rtt));
590 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
ccc36f2f 591 if (!error)
2d21ac55
A
592 error = nfs_sigintr(nmp, req, req->r_thread, 0);
593 if (!error)
594 error = nfs_request_add_header(req);
595 if (!error)
596 error = nfs_request_send(req, 0);
597 lck_mtx_lock(&req->r_mtx);
598 if (req->r_rchain.tqe_next == NFSREQNOLIST)
599 req->r_flags &= ~R_RESENDQ;
600 if (error)
601 req->r_error = error;
602 wakeup(req);
603 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
604 lck_mtx_unlock(&req->r_mtx);
605 if (dofinish)
606 nfs_asyncio_finish(req);
607 lck_mtx_lock(&nmp->nm_lock);
608 error = 0;
609 continue;
ccc36f2f 610 }
2d21ac55
A
611 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n",
612 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
613 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */
614 if (!error)
615 error = nfs_sigintr(nmp, req, req->r_thread, 0);
55e303ae 616 if (!error) {
2d21ac55
A
617 lck_mtx_unlock(&req->r_mtx);
618 error = nfs_send(req, 0);
619 lck_mtx_lock(&req->r_mtx);
620 if (!error) {
621 if (req->r_rchain.tqe_next == NFSREQNOLIST)
622 req->r_flags &= ~R_RESENDQ;
623 wakeup(req);
624 lck_mtx_unlock(&req->r_mtx);
625 lck_mtx_lock(&nmp->nm_lock);
626 continue;
55e303ae 627 }
1c79356b 628 }
2d21ac55
A
629 req->r_error = error;
630 if (req->r_rchain.tqe_next == NFSREQNOLIST)
631 req->r_flags &= ~R_RESENDQ;
632 wakeup(req);
633 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
634 lck_mtx_unlock(&req->r_mtx);
635 if (dofinish)
636 nfs_asyncio_finish(req);
637 lck_mtx_lock(&nmp->nm_lock);
638 }
639 if (nmp->nm_sockflags & NMSOCK_READY) {
640 ts.tv_sec = TAILQ_EMPTY(&nmp->nm_resendq) ? 30 : 1;
641 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts);
642 } else if (force)
643 break;
1c79356b 644 }
2d21ac55
A
645
646 if (nmp->nm_sockthd == thd)
647 nmp->nm_sockthd = NULL;
648 lck_mtx_unlock(&nmp->nm_lock);
649 wakeup(&nmp->nm_sockthd);
650 thread_terminate(thd);
651}
652
653/* start or wake a mount's socket thread */
654void
655nfs_mount_sock_thread_wake(struct nfsmount *nmp)
656{
657 if (nmp->nm_sockthd)
658 wakeup(&nmp->nm_sockthd);
659 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS)
660 thread_deallocate(nmp->nm_sockthd);
1c79356b
A
661}
662
663/*
2d21ac55
A
664 * The NFS client send routine.
665 *
666 * Send the given NFS request out the mount's socket.
667 * Holds nfs_sndlock() for the duration of this call.
668 *
669 * - check for request termination (sigintr)
670 * - perform reconnect, if necessary
671 * - UDP: check the congestion window
672 * - make a copy of the request to send
673 * - UDP: update the congestion window
674 * - send the request
675 *
676 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared.
677 * rexmit count is also updated if this isn't the first send.
678 *
679 * If the send is not successful, make sure R_MUSTRESEND is set.
680 * If this wasn't the first transmit, set R_RESENDERR.
681 * Also, undo any UDP congestion window changes made.
682 *
683 * If the error appears to indicate that the socket should
684 * be reconnected, mark the socket for reconnection.
685 *
686 * Only return errors when the request should be aborted.
1c79356b 687 */
1c79356b 688int
2d21ac55 689nfs_send(struct nfsreq *req, int wait)
1c79356b 690{
2d21ac55
A
691 struct nfsmount *nmp;
692 socket_t so;
36401178 693 int error, error2, sotype, rexmit, slpflag = 0, needrecon;
2d21ac55
A
694 struct msghdr msg;
695 struct sockaddr *sendnam;
696 mbuf_t mreqcopy;
697 size_t sentlen = 0;
698 struct timespec ts = { 2, 0 };
1c79356b 699
2d21ac55
A
700again:
701 error = nfs_sndlock(req);
702 if (error)
703 return (error);
704
705 error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
706 if (error) {
707 nfs_sndunlock(req);
708 return (error);
709 }
710 nmp = req->r_nmp;
711 sotype = nmp->nm_sotype;
712
713 if ((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) {
714 /* a setup RPC but we're not in SETUP... must need reconnect */
715 nfs_sndunlock(req);
716 return (EPIPE);
717 }
718
719 /* If the socket needs reconnection, do that now. */
720 /* wait until socket is ready - unless this request is part of setup */
721 lck_mtx_lock(&nmp->nm_lock);
722 if (!(nmp->nm_sockflags & NMSOCK_READY) &&
723 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) {
724 if (nmp->nm_flag & NFSMNT_INT)
725 slpflag |= PCATCH;
726 lck_mtx_unlock(&nmp->nm_lock);
727 nfs_sndunlock(req);
728 if (!wait) {
729 lck_mtx_lock(&req->r_mtx);
730 req->r_flags |= R_MUSTRESEND;
731 req->r_rtt = 0;
732 lck_mtx_unlock(&req->r_mtx);
1c79356b 733 return (0);
2d21ac55
A
734 }
735 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req->r_xid));
736 lck_mtx_lock(&req->r_mtx);
737 req->r_flags &= ~R_MUSTRESEND;
738 req->r_rtt = 0;
739 lck_mtx_unlock(&req->r_mtx);
740 lck_mtx_lock(&nmp->nm_lock);
741 while (!(nmp->nm_sockflags & NMSOCK_READY)) {
742 /* don't bother waiting if the socket thread won't be reconnecting it */
743 if (nmp->nm_state & NFSSTA_FORCE) {
744 error = EIO;
745 break;
746 }
747 /* make sure socket thread is running, then wait */
748 nfs_mount_sock_thread_wake(nmp);
749 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
750 break;
36401178
A
751 msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait", &ts);
752 slpflag = 0;
2d21ac55
A
753 }
754 lck_mtx_unlock(&nmp->nm_lock);
1c79356b
A
755 if (error)
756 return (error);
2d21ac55
A
757 goto again;
758 }
759 so = nmp->nm_so;
760 lck_mtx_unlock(&nmp->nm_lock);
761 if (!so) {
762 nfs_sndunlock(req);
763 lck_mtx_lock(&req->r_mtx);
764 req->r_flags |= R_MUSTRESEND;
765 req->r_rtt = 0;
766 lck_mtx_unlock(&req->r_mtx);
767 return (0);
768 }
769
770 lck_mtx_lock(&req->r_mtx);
771 rexmit = (req->r_flags & R_SENT);
772
773 if (sotype == SOCK_DGRAM) {
774 lck_mtx_lock(&nmp->nm_lock);
775 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) {
776 /* if we can't send this out yet, wait on the cwnd queue */
777 slpflag = ((nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
778 lck_mtx_unlock(&nmp->nm_lock);
779 nfs_sndunlock(req);
780 req->r_flags |= R_MUSTRESEND;
781 lck_mtx_unlock(&req->r_mtx);
782 if (!wait) {
783 req->r_rtt = 0;
784 return (0);
785 }
786 lck_mtx_lock(&nmp->nm_lock);
787 while (nmp->nm_sent >= nmp->nm_cwnd) {
788 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1)))
789 break;
790 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain);
36401178
A
791 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts);
792 slpflag = 0;
2d21ac55
A
793 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) {
794 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
795 req->r_cchain.tqe_next = NFSREQNOLIST;
796 }
2d21ac55
A
797 }
798 lck_mtx_unlock(&nmp->nm_lock);
2d21ac55
A
799 goto again;
800 }
1c79356b 801 /*
2d21ac55
A
802 * We update these *before* the send to avoid racing
803 * against others who may be looking to send requests.
1c79356b 804 */
2d21ac55
A
805 if (!rexmit) {
806 /* first transmit */
807 req->r_flags |= R_CWND;
808 nmp->nm_sent += NFS_CWNDSCALE;
809 } else {
810 /*
811 * When retransmitting, turn timing off
812 * and divide congestion window by 2.
813 */
814 req->r_flags &= ~R_TIMING;
815 nmp->nm_cwnd >>= 1;
816 if (nmp->nm_cwnd < NFS_CWNDSCALE)
817 nmp->nm_cwnd = NFS_CWNDSCALE;
1c79356b 818 }
2d21ac55
A
819 lck_mtx_unlock(&nmp->nm_lock);
820 }
821
822 req->r_flags &= ~R_MUSTRESEND;
823 lck_mtx_unlock(&req->r_mtx);
824
825 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL,
826 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy);
827 if (error) {
828 if (wait)
829 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error);
830 nfs_sndunlock(req);
831 lck_mtx_lock(&req->r_mtx);
832 req->r_flags |= R_MUSTRESEND;
833 req->r_rtt = 0;
834 lck_mtx_unlock(&req->r_mtx);
835 return (0);
836 }
837
838 bzero(&msg, sizeof(msg));
839 if (nmp->nm_nam && (sotype != SOCK_STREAM) && !sock_isconnected(so)) {
840 if ((sendnam = mbuf_data(nmp->nm_nam))) {
841 msg.msg_name = (caddr_t)sendnam;
842 msg.msg_namelen = sendnam->sa_len;
843 }
844 }
845 error = sock_sendmbuf(so, &msg, mreqcopy, 0, &sentlen);
846#ifdef NFS_SOCKET_DEBUGGING
847 if (error || (sentlen != req->r_mreqlen))
848 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
849 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error));
850#endif
851 if (!error && (sentlen != req->r_mreqlen))
852 error = EWOULDBLOCK;
853 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen));
854
855 lck_mtx_lock(&req->r_mtx);
856 req->r_rtt = 0;
857 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT))
858 req->r_rexmit = NFS_MAXREXMIT;
859
860 if (!error) {
861 /* SUCCESS */
862 req->r_flags &= ~R_RESENDERR;
863 if (rexmit)
864 OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries);
865 req->r_flags |= R_SENT;
866 if (req->r_flags & R_WAITSENT) {
867 req->r_flags &= ~R_WAITSENT;
868 wakeup(req);
869 }
870 nfs_sndunlock(req);
871 lck_mtx_unlock(&req->r_mtx);
872 return (0);
873 }
874
875 /* send failed */
876 req->r_flags |= R_MUSTRESEND;
877 if (rexmit)
878 req->r_flags |= R_RESENDERR;
879 if ((error == EINTR) || (error == ERESTART))
880 req->r_error = error;
881 lck_mtx_unlock(&req->r_mtx);
882
883 if (sotype == SOCK_DGRAM) {
1c79356b 884 /*
2d21ac55
A
885 * Note: even though a first send may fail, we consider
886 * the request sent for congestion window purposes.
887 * So we don't need to undo any of the changes made above.
1c79356b 888 */
1c79356b 889 /*
2d21ac55
A
890 * Socket errors ignored for connectionless sockets??
891 * For now, ignore them all
1c79356b 892 */
2d21ac55
A
893 if ((error != EINTR) && (error != ERESTART) &&
894 (error != EWOULDBLOCK) && (error != EIO)) {
895 int clearerror = 0, optlen = sizeof(clearerror);
896 sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
897#ifdef NFS_SOCKET_DEBUGGING
898 if (clearerror)
899 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
900 error, clearerror));
901#endif
1c79356b 902 }
2d21ac55
A
903 }
904
905 /* check if it appears we should reconnect the socket */
906 switch (error) {
907 case EWOULDBLOCK:
908 /* if send timed out, reconnect if on TCP */
909 if (sotype != SOCK_STREAM)
910 break;
911 case EPIPE:
912 case EADDRNOTAVAIL:
913 case ENETDOWN:
914 case ENETUNREACH:
915 case ENETRESET:
916 case ECONNABORTED:
917 case ECONNRESET:
918 case ENOTCONN:
919 case ESHUTDOWN:
920 case ECONNREFUSED:
921 case EHOSTDOWN:
922 case EHOSTUNREACH:
923 needrecon = 1;
924 break;
925 }
926 if (needrecon) { /* mark socket as needing reconnect */
927 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error));
928 nfs_need_reconnect(nmp);
929 }
930
931 nfs_sndunlock(req);
932
933 /*
934 * Don't log some errors:
935 * EPIPE errors may be common with servers that drop idle connections.
936 * EADDRNOTAVAIL may occur on network transitions.
937 * ENOTCONN may occur under some network conditions.
938 */
939 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN))
940 error = 0;
941 if (error && (error != EINTR) && (error != ERESTART))
942 log(LOG_INFO, "nfs send error %d for server %s\n", error,
943 !req->r_nmp ? "<unmounted>" :
944 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
945
946 /* prefer request termination error over other errors */
947 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
948 if (error2)
949 error = error2;
950
951 /* only allow the following errors to be returned */
952 if ((error != EINTR) && (error != ERESTART) && (error != EIO) &&
953 (error != ENXIO) && (error != ETIMEDOUT))
954 error = 0;
955 return (error);
956}
957
958/*
959 * NFS client socket upcalls
960 *
961 * Pull RPC replies out of an NFS mount's socket and match them
962 * up with the pending request.
963 *
964 * The datagram code is simple because we always get whole
965 * messages out of the socket.
966 *
967 * The stream code is more involved because we have to parse
968 * the RPC records out of the stream.
969 */
970
971/* NFS client UDP socket upcall */
972static void
973nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag)
974{
975 struct nfsmount *nmp = arg;
976 size_t rcvlen;
977 mbuf_t m;
978 int error = 0;
979
980 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
981 wakeup(&nmp->nm_so);
982 return;
983 }
984
985 /* make sure we're on the current socket */
986 if (nmp->nm_so != so)
987 return;
988
989 do {
990 m = NULL;
991 rcvlen = 1000000;
992 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
993 if (m)
994 nfs_request_match_reply(nmp, m);
995 } while (m && !error);
996
997 if (error && (error != EWOULDBLOCK)) {
998 /* problems with the socket... mark for reconnection */
999 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error));
1000 nfs_need_reconnect(nmp);
1001 }
1002}
1003
1004/* NFS client TCP socket upcall */
1005static void
1006nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag)
1007{
1008 struct nfsmount *nmp = arg;
1009 struct iovec_32 aio;
1010 struct msghdr msg;
1011 size_t rcvlen;
1012 mbuf_t m;
1013 int error = 0;
1014 int recv;
d12e1678 1015
2d21ac55
A
1016 if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
1017 wakeup(&nmp->nm_so);
1018 return;
1019 }
1020
1021 /* make sure we're on the current socket */
1022 if (nmp->nm_so != so)
1023 return;
1024
1025 lck_mtx_lock(&nmp->nm_lock);
1026 if (nmp->nm_sockflags & NMSOCK_UPCALL) {
1027 /* upcall is already receiving data - just return */
1028 lck_mtx_unlock(&nmp->nm_lock);
1029 return;
1030 }
1031 nmp->nm_sockflags |= NMSOCK_UPCALL;
1032
1033nextfrag:
1034 recv = 0;
1035
1036 /* read the TCP RPC record marker */
1037 while (!error && nmp->nm_markerleft) {
1038 aio.iov_base = (uintptr_t)((char*)&nmp->nm_fragleft +
1039 sizeof(nmp->nm_fragleft) - nmp->nm_markerleft);
1040 aio.iov_len = nmp->nm_markerleft;
1041 bzero(&msg, sizeof(msg));
1042 msg.msg_iov = (struct iovec *) &aio;
1043 msg.msg_iovlen = 1;
1044 lck_mtx_unlock(&nmp->nm_lock);
1045 error = sock_receive(so, &msg, MSG_DONTWAIT, &rcvlen);
1046 lck_mtx_lock(&nmp->nm_lock);
1047 if (error || !rcvlen)
1048 break;
1049 recv = 1;
1050 nmp->nm_markerleft -= rcvlen;
1051 if (nmp->nm_markerleft)
1052 continue;
1053 /* record marker complete */
1054 nmp->nm_fragleft = ntohl(nmp->nm_fragleft);
1055 if (nmp->nm_fragleft & 0x80000000) {
1056 nmp->nm_sockflags |= NMSOCK_LASTFRAG;
1057 nmp->nm_fragleft &= ~0x80000000;
1058 }
1059 nmp->nm_reclen += nmp->nm_fragleft;
1060 if (nmp->nm_reclen > NFS_MAXPACKET) {
1c79356b 1061 /*
2d21ac55
A
1062 * This is SERIOUS! We are out of sync with the sender
1063 * and forcing a disconnect/reconnect is all I can do.
1c79356b 1064 */
2d21ac55
A
1065 log(LOG_ERR, "%s (%d) from nfs server %s\n",
1066 "impossible RPC record length", nmp->nm_reclen,
1067 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
1068 error = EFBIG;
1069 }
1070 }
1071
1072 /* read the TCP RPC record fragment */
1073 while (!error && !nmp->nm_markerleft && nmp->nm_fragleft) {
1074 m = NULL;
1075 rcvlen = nmp->nm_fragleft;
1076 lck_mtx_unlock(&nmp->nm_lock);
1077 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
1078 lck_mtx_lock(&nmp->nm_lock);
1079 if (error || !rcvlen || !m)
1080 break;
1081 recv = 1;
1082 /* append mbufs to list */
1083 nmp->nm_fragleft -= rcvlen;
1084 if (!nmp->nm_m) {
1085 nmp->nm_m = m;
1086 } else {
1087 error = mbuf_setnext(nmp->nm_mlast, m);
1088 if (error) {
1089 printf("nfs_tcp_rcv: mbuf_setnext failed %d\n", error);
1090 mbuf_freem(m);
1091 break;
1c79356b 1092 }
1c79356b 1093 }
2d21ac55
A
1094 while (mbuf_next(m))
1095 m = mbuf_next(m);
1096 nmp->nm_mlast = m;
1097 }
1098
1099 /* done reading fragment? */
1100 m = NULL;
1101 if (!error && !nmp->nm_markerleft && !nmp->nm_fragleft) {
1102 /* reset socket fragment parsing state */
1103 nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
1104 if (nmp->nm_sockflags & NMSOCK_LASTFRAG) {
1105 /* RPC record complete */
1106 m = nmp->nm_m;
1107 /* reset socket record parsing state */
1108 nmp->nm_reclen = 0;
1109 nmp->nm_m = nmp->nm_mlast = NULL;
1110 nmp->nm_sockflags &= ~NMSOCK_LASTFRAG;
1111 }
1112 }
1c79356b 1113
2d21ac55
A
1114 if (m) { /* match completed response with request */
1115 lck_mtx_unlock(&nmp->nm_lock);
1116 nfs_request_match_reply(nmp, m);
1117 lck_mtx_lock(&nmp->nm_lock);
1118 }
1119
1120 /* loop if we've been making error-free progress */
1121 if (!error && recv)
1122 goto nextfrag;
1123
1124 nmp->nm_sockflags &= ~NMSOCK_UPCALL;
1125 lck_mtx_unlock(&nmp->nm_lock);
1126#ifdef NFS_SOCKET_DEBUGGING
1127 if (!recv && (error != EWOULDBLOCK))
1128 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error));
1129#endif
1130 /* note: no error and no data indicates server closed its end */
1131 if ((error != EWOULDBLOCK) && (error || !recv)) {
1132 /* problems with the socket... mark for reconnection */
1133 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error));
1134 nfs_need_reconnect(nmp);
1135 }
1136}
1137
1138/*
1139 * "poke" a socket to try to provoke any pending errors
1140 */
1141static void
1142nfs_sock_poke(struct nfsmount *nmp)
1143{
1144 struct iovec_32 aio;
1145 struct msghdr msg;
1146 size_t len;
1147 int error = 0;
1148 int dummy;
1149
1150 lck_mtx_lock(&nmp->nm_lock);
1151 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || !nmp->nm_so) {
1152 lck_mtx_unlock(&nmp->nm_lock);
1153 return;
1154 }
1155 lck_mtx_unlock(&nmp->nm_lock);
1156 aio.iov_base = (uintptr_t)&dummy;
1157 aio.iov_len = 0;
1158 len = 0;
1159 bzero(&msg, sizeof(msg));
1160 msg.msg_iov = (struct iovec *) &aio;
1161 msg.msg_iovlen = 1;
1162 error = sock_send(nmp->nm_so, &msg, MSG_DONTWAIT, &len);
1163 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error));
1164}
1165
1166/*
1167 * Match an RPC reply with the corresponding request
1168 */
1169static void
1170nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
1171{
1172 struct nfsreq *req;
1173 struct nfsm_chain nmrep;
1174 u_long reply = 0, rxid = 0;
1175 long t1;
1176 int error = 0, asyncioq, asyncgss;
1177
1178 /* Get the xid and check that it is an rpc reply */
1179 nfsm_chain_dissect_init(error, &nmrep, mrep);
1180 nfsm_chain_get_32(error, &nmrep, rxid);
1181 nfsm_chain_get_32(error, &nmrep, reply);
1182 if (error || (reply != RPC_REPLY)) {
1183 OSAddAtomic(1, (SInt32*)&nfsstats.rpcinvalid);
1184 mbuf_freem(mrep);
1185 return;
1186 }
1187
1188 /*
1189 * Loop through the request list to match up the reply
1190 * Iff no match, just drop it.
1191 */
1192 lck_mtx_lock(nfs_request_mutex);
1193 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
1194 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid)))
1195 continue;
1196 /* looks like we have it, grab lock and double check */
1197 lck_mtx_lock(&req->r_mtx);
1198 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
1199 lck_mtx_unlock(&req->r_mtx);
1c79356b
A
1200 continue;
1201 }
2d21ac55
A
1202 /* Found it.. */
1203 req->r_nmrep = nmrep;
1204 lck_mtx_lock(&nmp->nm_lock);
1205 if (nmp->nm_sotype == SOCK_DGRAM) {
1206 /*
1207 * Update congestion window.
1208 * Do the additive increase of one rpc/rtt.
1209 */
1210 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
1211 if (nmp->nm_cwnd <= nmp->nm_sent) {
1212 nmp->nm_cwnd +=
1213 ((NFS_CWNDSCALE * NFS_CWNDSCALE) +
1214 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
1215 if (nmp->nm_cwnd > NFS_MAXCWND)
1216 nmp->nm_cwnd = NFS_MAXCWND;
1217 }
1218 if (req->r_flags & R_CWND) {
1219 nmp->nm_sent -= NFS_CWNDSCALE;
1220 req->r_flags &= ~R_CWND;
1221 }
1222 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
1223 /* congestion window is open, poke the cwnd queue */
1224 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
1225 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
1226 req2->r_cchain.tqe_next = NFSREQNOLIST;
1227 wakeup(req2);
1228 }
1229 }
1c79356b 1230 /*
2d21ac55
A
1231 * Update rtt using a gain of 0.125 on the mean
1232 * and a gain of 0.25 on the deviation.
1c79356b 1233 */
2d21ac55
A
1234 if (req->r_flags & R_TIMING) {
1235 /*
1236 * Since the timer resolution of
1237 * NFS_HZ is so course, it can often
1238 * result in r_rtt == 0. Since
1239 * r_rtt == N means that the actual
1240 * rtt is between N+dt and N+2-dt ticks,
1241 * add 1.
1242 */
1243 if (proct[req->r_procnum] == 0)
1244 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum);
1245 t1 = req->r_rtt + 1;
1246 t1 -= (NFS_SRTT(req) >> 3);
1247 NFS_SRTT(req) += t1;
1248 if (t1 < 0)
1249 t1 = -t1;
1250 t1 -= (NFS_SDRTT(req) >> 2);
1251 NFS_SDRTT(req) += t1;
1252 }
1253 nmp->nm_timeouts = 0;
1254 lck_mtx_unlock(&nmp->nm_lock);
1255 /* signal anyone waiting on this request */
1256 wakeup(req);
1257 asyncioq = (req->r_callback.rcb_func != NULL);
1258 if ((asyncgss = ((req->r_gss_ctx != NULL) && ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_ALLOCATED)) == (R_ASYNC|R_ALLOCATED)))))
1259 nfs_request_ref(req, 1);
1260 lck_mtx_unlock(&req->r_mtx);
1261 lck_mtx_unlock(nfs_request_mutex);
1262 if (asyncgss) {
1263 nfs_gss_clnt_rpcdone(req);
1264 nfs_request_rele(req);
1265 }
1266 /* if it's an async RPC with a callback, queue it up */
1267 if (asyncioq)
1268 nfs_asyncio_finish(req);
1269 break;
1270 }
1271
1272 if (!req) {
1273 /* not matched to a request, so drop it. */
1274 lck_mtx_unlock(nfs_request_mutex);
1275 OSAddAtomic(1, (SInt32*)&nfsstats.rpcunexpected);
1276 mbuf_freem(mrep);
1277 }
1278}
1279
1280/*
1281 * Wait for the reply for a given request...
1282 * ...potentially resending the request if necessary.
1283 */
1284static int
1285nfs_wait_reply(struct nfsreq *req)
1286{
1287 struct nfsmount *nmp = req->r_nmp;
1288 struct timespec ts = { 30, 0 };
1289 int error = 0, slpflag;
1290
1291 if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
1292 slpflag = PCATCH;
1293 else
1294 slpflag = 0;
1295
1296 lck_mtx_lock(&req->r_mtx);
1297 while (!req->r_nmrep.nmc_mhead) {
1298 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0)))
1299 break;
1300 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
1301 break;
1302 /* check if we need to resend */
1303 if (req->r_flags & R_MUSTRESEND) {
1304 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n",
1305 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
1306 lck_mtx_unlock(&req->r_mtx);
1307 if (req->r_gss_ctx) {
1c79356b 1308 /*
2d21ac55
A
1309 * It's an RPCSEC_GSS mount.
1310 * Can't just resend the original request
1311 * without bumping the cred sequence number.
1312 * Go back and re-build the request.
1c79356b 1313 */
2d21ac55 1314 return (EAGAIN);
1c79356b 1315 }
2d21ac55
A
1316 error = nfs_send(req, 1);
1317 lck_mtx_lock(&req->r_mtx);
1318 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n",
1319 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error));
1320 if (error)
1321 break;
1322 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
1323 break;
1c79356b 1324 }
2d21ac55
A
1325 /* need to poll if we're P_NOREMOTEHANG */
1326 if (nfs_noremotehang(req->r_thread))
1327 ts.tv_sec = 1;
36401178
A
1328 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts);
1329 slpflag = 0;
1c79356b 1330 }
2d21ac55
A
1331 lck_mtx_unlock(&req->r_mtx);
1332
1333 return (error);
1c79356b
A
1334}
1335
1336/*
2d21ac55
A
1337 * An NFS request goes something like this:
1338 * (nb: always frees up mreq mbuf list)
1339 * nfs_request_create()
1340 * - allocates a request struct if one is not provided
1341 * - initial fill-in of the request struct
1342 * nfs_request_add_header()
1343 * - add the RPC header
1344 * nfs_request_send()
1345 * - link it into list
1346 * - call nfs_send() for first transmit
1347 * nfs_request_wait()
1348 * - call nfs_wait_reply() to wait for the reply
1349 * nfs_request_finish()
1350 * - break down rpc header and return with error or nfs reply
1351 * pointed to by nmrep.
1352 * nfs_request_rele()
1353 * nfs_request_destroy()
1354 * - clean up the request struct
1355 * - free the request struct if it was allocated by nfs_request_create()
1356 */
1357
1358/*
1359 * Set up an NFS request struct (allocating if no request passed in).
1c79356b
A
1360 */
1361int
2d21ac55
A
1362nfs_request_create(
1363 nfsnode_t np,
1364 mount_t mp, /* used only if !np */
1365 struct nfsm_chain *nmrest,
1366 int procnum,
1367 thread_t thd,
1368 kauth_cred_t cred,
1369 struct nfsreq **reqp)
1c79356b 1370{
2d21ac55 1371 struct nfsreq *req, *newreq = NULL;
1c79356b 1372 struct nfsmount *nmp;
1c79356b 1373
2d21ac55
A
1374 req = *reqp;
1375 if (!req) {
1376 /* allocate a new NFS request structure */
1377 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK);
1378 if (!newreq) {
1379 mbuf_freem(nmrest->nmc_mhead);
1380 nmrest->nmc_mhead = NULL;
1381 return (ENOMEM);
1382 }
1383 req = newreq;
1384 }
55e303ae 1385
2d21ac55
A
1386 bzero(req, sizeof(*req));
1387 if (req == newreq)
1388 req->r_flags = R_ALLOCATED;
1c79356b 1389
2d21ac55
A
1390 nmp = VFSTONFS(np ? NFSTOMP(np) : mp);
1391 if (!nmp) {
1392 if (newreq)
1393 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
1394 return (ENXIO);
1395 }
1396 lck_mtx_lock(&nmp->nm_lock);
1397 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
55e303ae 1398 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
2d21ac55
A
1399 lck_mtx_unlock(&nmp->nm_lock);
1400 mbuf_freem(nmrest->nmc_mhead);
1401 nmrest->nmc_mhead = NULL;
1402 if (newreq)
1403 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ);
55e303ae 1404 return (ENXIO);
1c79356b 1405 }
55e303ae 1406
2d21ac55
A
1407 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS))
1408 OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[procnum]);
1409 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL))
1410 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
55e303ae 1411
2d21ac55
A
1412 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL);
1413 req->r_nmp = nmp;
1414 req->r_np = np;
1415 req->r_thread = thd;
1416 if (IS_VALID_CRED(cred)) {
1417 kauth_cred_ref(cred);
1418 req->r_cred = cred;
1c79356b 1419 }
2d21ac55
A
1420 req->r_procnum = procnum;
1421 if (proct[procnum] > 0)
1422 req->r_flags |= R_TIMING;
1423 req->r_nmrep.nmc_mhead = NULL;
1424 SLIST_INIT(&req->r_gss_seqlist);
1425 req->r_achain.tqe_next = NFSREQNOLIST;
1426 req->r_rchain.tqe_next = NFSREQNOLIST;
1427 req->r_cchain.tqe_next = NFSREQNOLIST;
1428
1429 lck_mtx_unlock(&nmp->nm_lock);
1430
1431 /* move the request mbuf chain to the nfsreq */
1432 req->r_mrest = nmrest->nmc_mhead;
1433 nmrest->nmc_mhead = NULL;
1434
1435 req->r_flags |= R_INITTED;
1436 req->r_refs = 1;
1437 if (newreq)
1438 *reqp = req;
1439 return (0);
1440}
1441
1442/*
1443 * Clean up and free an NFS request structure.
1444 */
1445void
1446nfs_request_destroy(struct nfsreq *req)
1447{
1448 struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1449 struct gss_seq *gsp, *ngsp;
1450 struct timespec ts = { 1, 0 };
1451
1452 if (!req || !(req->r_flags & R_INITTED))
1453 return;
1454 req->r_flags &= ~R_INITTED;
1455 if (req->r_lflags & RL_QUEUED)
1456 nfs_reqdequeue(req);
1457 if (req->r_achain.tqe_next != NFSREQNOLIST) {
1458 /* still on an async I/O queue? */
1459 lck_mtx_lock(nfsiod_mutex);
1460 if (nmp && (req->r_achain.tqe_next != NFSREQNOLIST)) {
1461 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
1462 req->r_achain.tqe_next = NFSREQNOLIST;
1463 }
1464 lck_mtx_unlock(nfsiod_mutex);
1465 }
1466 if (nmp) {
1467 lck_mtx_lock(&nmp->nm_lock);
1468 if (req->r_rchain.tqe_next != NFSREQNOLIST) {
1469 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
1470 req->r_rchain.tqe_next = NFSREQNOLIST;
1471 req->r_flags &= ~R_RESENDQ;
1472 }
1473 if (req->r_cchain.tqe_next != NFSREQNOLIST) {
1474 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain);
1475 req->r_cchain.tqe_next = NFSREQNOLIST;
1476 }
1477 lck_mtx_unlock(&nmp->nm_lock);
1478 }
1479 lck_mtx_lock(&req->r_mtx);
1480 while (req->r_flags & R_RESENDQ)
1481 msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts);
1482 lck_mtx_unlock(&req->r_mtx);
1483 if (req->r_mhead)
1484 mbuf_freem(req->r_mhead);
1485 else if (req->r_mrest)
1486 mbuf_freem(req->r_mrest);
1487 if (req->r_nmrep.nmc_mhead)
1488 mbuf_freem(req->r_nmrep.nmc_mhead);
1489 if (IS_VALID_CRED(req->r_cred))
1490 kauth_cred_unref(&req->r_cred);
1491 if (req->r_gss_ctx)
1492 nfs_gss_clnt_rpcdone(req);
1493 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp)
1494 FREE(gsp, M_TEMP);
1495 if (req->r_gss_ctx)
1496 nfs_gss_clnt_ctx_unref(req);
1497
1498 lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
1499 if (req->r_flags & R_ALLOCATED)
1500 FREE_ZONE(req, sizeof(*req), M_NFSREQ);
1501}
1502
1503void
1504nfs_request_ref(struct nfsreq *req, int locked)
1505{
1506 if (!locked)
1507 lck_mtx_lock(&req->r_mtx);
1508 if (req->r_refs <= 0)
1509 panic("nfsreq reference error");
1510 req->r_refs++;
1511 if (!locked)
1512 lck_mtx_unlock(&req->r_mtx);
1513}
1514
1515void
1516nfs_request_rele(struct nfsreq *req)
1517{
1518 int destroy;
1519
1520 lck_mtx_lock(&req->r_mtx);
1521 if (req->r_refs <= 0)
1522 panic("nfsreq reference underflow");
1523 req->r_refs--;
1524 destroy = (req->r_refs == 0);
1525 lck_mtx_unlock(&req->r_mtx);
1526 if (destroy)
1527 nfs_request_destroy(req);
1528}
1529
1530
1531/*
1532 * Add an (updated) RPC header with authorization to an NFS request.
1533 */
1534int
1535nfs_request_add_header(struct nfsreq *req)
1536{
1537 struct nfsmount *nmp;
1538 int error = 0, auth_len = 0;
1539 mbuf_t m;
1540
1541 /* free up any previous header */
1542 if ((m = req->r_mhead)) {
1543 while (m && (m != req->r_mrest))
1544 m = mbuf_free(m);
1545 req->r_mhead = NULL;
1546 }
1547
1548 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1549 if (!nmp)
1550 return (ENXIO);
1551
1552 if (!req->r_cred) /* RPCAUTH_NULL */
1553 auth_len = 0;
1554 else switch (nmp->nm_auth) {
1555 case RPCAUTH_UNIX:
1556 if (req->r_cred->cr_ngroups < 1)
1557 return (EINVAL);
1558 auth_len = ((((req->r_cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1559 nmp->nm_numgrps : (req->r_cred->cr_ngroups - 1)) << 2) +
1560 5 * NFSX_UNSIGNED;
1561 break;
1562 case RPCAUTH_KRB5:
1563 case RPCAUTH_KRB5I:
1564 case RPCAUTH_KRB5P:
1565 auth_len = 5 * NFSX_UNSIGNED + 0; // zero context handle for now
1566 break;
1567 }
1568
1569 error = nfsm_rpchead(req, auth_len, req->r_mrest, &req->r_xid, &req->r_mhead);
1570 if (error)
1571 return (error);
1572
1573 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead);
1574 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1575 if (!nmp)
1576 return (ENXIO);
1577 lck_mtx_lock(&nmp->nm_lock);
1578 if (nmp->nm_flag & NFSMNT_SOFT)
1579 req->r_retry = nmp->nm_retry;
1580 else
1581 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1582 lck_mtx_unlock(&nmp->nm_lock);
1583
1584 return (error);
1585}
1586
1587
1588/*
1589 * Queue an NFS request up and send it out.
1590 */
1591int
1592nfs_request_send(struct nfsreq *req, int wait)
1593{
1594 struct nfsmount *nmp;
1595 struct timeval now;
1596
1597 lck_mtx_lock(nfs_request_mutex);
1c79356b 1598
2d21ac55 1599 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
55e303ae 1600 if (!nmp) {
2d21ac55 1601 lck_mtx_unlock(nfs_request_mutex);
55e303ae
A
1602 return (ENXIO);
1603 }
1c79356b 1604
2d21ac55
A
1605 microuptime(&now);
1606 if (!req->r_start) {
1607 req->r_start = now.tv_sec;
1608 req->r_lastmsg = now.tv_sec -
1609 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
1c79356b 1610 }
1c79356b 1611
91447636 1612 OSAddAtomic(1, (SInt32*)&nfsstats.rpcrequests);
2d21ac55 1613
1c79356b
A
1614 /*
1615 * Chain request into list of outstanding requests. Be sure
1616 * to put it LAST so timer finds oldest requests first.
2d21ac55
A
1617 * Make sure that the request queue timer is running
1618 * to check for possible request timeout.
1c79356b 1619 */
2d21ac55
A
1620 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain);
1621 req->r_lflags |= RL_QUEUED;
1622 if (!nfs_request_timer_on) {
1623 nfs_request_timer_on = 1;
1624 nfs_interval_timer_start(nfs_request_timer_call,
1625 NFS_REQUESTDELAY);
1626 }
1627 lck_mtx_unlock(nfs_request_mutex);
1c79356b 1628
2d21ac55
A
1629 /* Send the request... */
1630 return (nfs_send(req, wait));
1631}
1632
1633/*
1634 * Call nfs_wait_reply() to wait for the reply.
1635 */
1636void
1637nfs_request_wait(struct nfsreq *req)
1638{
1639 req->r_error = nfs_wait_reply(req);
1640}
55e303ae 1641
2d21ac55
A
1642/*
1643 * Finish up an NFS request by dequeueing it and
1644 * doing the initial NFS request reply processing.
1645 */
1646int
1647nfs_request_finish(
1648 struct nfsreq *req,
1649 struct nfsm_chain *nmrepp,
1650 int *status)
1651{
1652 struct nfsmount *nmp;
1653 mbuf_t mrep;
1654 int verf_type = 0;
1655 uint32_t verf_len = 0;
1656 uint32_t reply_status = 0;
1657 uint32_t rejected_status = 0;
1658 uint32_t auth_status = 0;
1659 uint32_t accepted_status = 0;
1660 struct nfsm_chain nmrep;
1661 int error, auth;
1c79356b 1662
2d21ac55 1663 error = req->r_error;
1c79356b 1664
2d21ac55
A
1665 if (nmrepp)
1666 nmrepp->nmc_mhead = NULL;
1c79356b 1667
2d21ac55
A
1668 /* RPC done, unlink the request. */
1669 nfs_reqdequeue(req);
1c79356b 1670
2d21ac55 1671 mrep = req->r_nmrep.nmc_mhead;
55e303ae 1672
2d21ac55 1673 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
1c79356b
A
1674
1675 /*
1676 * Decrement the outstanding request count.
1677 */
2d21ac55
A
1678 if (req->r_flags & R_CWND) {
1679 req->r_flags &= ~R_CWND;
1680 lck_mtx_lock(&nmp->nm_lock);
1681 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
1682 nmp->nm_sent -= NFS_CWNDSCALE;
1683 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
1684 /* congestion window is open, poke the cwnd queue */
1685 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
1686 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
1687 req2->r_cchain.tqe_next = NFSREQNOLIST;
1688 wakeup(req2);
55e303ae 1689 }
2d21ac55 1690 lck_mtx_unlock(&nmp->nm_lock);
1c79356b
A
1691 }
1692
2d21ac55
A
1693 if (req->r_gss_ctx) { // Using gss cred ?
1694 /*
1695 * If the request had an RPCSEC_GSS credential
1696 * then reset its sequence number bit in the
1697 * request window.
1698 */
1699 nfs_gss_clnt_rpcdone(req);
1700
1701 /*
1702 * If we need to re-send, go back and re-build the
1703 * request based on a new sequence number.
1704 * Note that we're using the original XID.
1705 */
1706 if (error == EAGAIN) {
1707 req->r_error = 0;
1708 if (mrep)
1709 mbuf_freem(mrep);
1710 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs
1711 req->r_nmrep.nmc_mhead = NULL;
1712 req->r_flags |= R_RESTART;
1713 if (error == ENEEDAUTH) {
1714 req->r_xid = 0; // get a new XID
1715 error = 0;
1716 }
1717 goto nfsmout;
1718 }
1c79356b
A
1719 }
1720
1721 /*
2d21ac55
A
1722 * If there was a successful reply, make sure to mark the mount as up.
1723 * If a tprintf message was given (or if this is a timed-out soft mount)
1724 * then post a tprintf message indicating the server is alive again.
1c79356b 1725 */
2d21ac55
A
1726 if (!error) {
1727 if ((req->r_flags & R_TPRINTFMSG) ||
1728 (nmp && (nmp->nm_flag & NFSMNT_SOFT) &&
1729 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO)))
1730 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again");
1731 else
1732 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL);
1c79356b 1733 }
2d21ac55
A
1734 if (!error && !nmp)
1735 error = ENXIO;
1736 nfsmout_if(error);
1c79356b
A
1737
1738 /*
2d21ac55 1739 * break down the RPC header and check if ok
1c79356b 1740 */
2d21ac55
A
1741 nmrep = req->r_nmrep;
1742 nfsm_chain_get_32(error, &nmrep, reply_status);
1743 nfsmout_if(error);
1744 if (reply_status == RPC_MSGDENIED) {
1745 nfsm_chain_get_32(error, &nmrep, rejected_status);
1746 nfsmout_if(error);
1747 if (rejected_status == RPC_MISMATCH) {
1748 error = ENOTSUP;
1c79356b 1749 goto nfsmout;
2d21ac55
A
1750 }
1751 nfsm_chain_get_32(error, &nmrep, auth_status);
1752 nfsmout_if(error);
1753 switch (auth_status) {
1754 case RPCSEC_GSS_CREDPROBLEM:
1755 case RPCSEC_GSS_CTXPROBLEM:
1756 /*
1757 * An RPCSEC_GSS cred or context problem.
1758 * We can't use it anymore.
1759 * Restore the args, renew the context
1760 * and set up for a resend.
1761 */
1762 error = nfs_gss_clnt_args_restore(req);
1763 if (error && error != ENEEDAUTH)
1764 break;
1765
1766 if (!error) {
1767 error = nfs_gss_clnt_ctx_renew(req);
1768 if (error)
1769 break;
1c79356b 1770 }
2d21ac55
A
1771 mbuf_freem(mrep);
1772 req->r_nmrep.nmc_mhead = NULL;
1773 req->r_xid = 0; // get a new XID
1774 req->r_flags |= R_RESTART;
1775 goto nfsmout;
1776 default:
1777 error = EACCES;
1778 break;
1779 }
1780 goto nfsmout;
1781 }
1782
1783 /* Now check the verifier */
1784 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor
1785 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length
1786 nfsmout_if(error);
1787
1788 auth = !req->r_cred ? RPCAUTH_NULL : nmp->nm_auth;
1789 switch (auth) {
1790 case RPCAUTH_NULL:
1791 case RPCAUTH_UNIX:
1792 /* Any AUTH_UNIX verifier is ignored */
1793 if (verf_len > 0)
1794 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
1795 nfsm_chain_get_32(error, &nmrep, accepted_status);
1796 break;
1797 case RPCAUTH_KRB5:
1798 case RPCAUTH_KRB5I:
1799 case RPCAUTH_KRB5P:
1800 error = nfs_gss_clnt_verf_get(req, &nmrep,
1801 verf_type, verf_len, &accepted_status);
1802 break;
1803 }
1804 nfsmout_if(error);
1805
1806 switch (accepted_status) {
1807 case RPC_SUCCESS:
1808 if (req->r_procnum == NFSPROC_NULL) {
1809 /*
1810 * The NFS null procedure is unique,
1811 * in not returning an NFS status.
1812 */
1813 *status = NFS_OK;
1814 } else {
1815 nfsm_chain_get_32(error, &nmrep, *status);
1816 nfsmout_if(error);
1817 }
1c79356b 1818
2d21ac55 1819 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) {
1c79356b 1820 /*
2d21ac55 1821 * It's a JUKEBOX error - delay and try again
1c79356b 1822 */
2d21ac55
A
1823 int delay, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
1824
1825 mbuf_freem(mrep);
1826 req->r_nmrep.nmc_mhead = NULL;
1827 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) {
1828 /* we're not yet completely mounted and */
1829 /* we can't complete an RPC, so we fail */
1830 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
1831 nfs_softterm(req);
1832 error = req->r_error;
1833 goto nfsmout;
1834 }
1835 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2);
1836 if (req->r_delay > 30)
1837 req->r_delay = 30;
1838 if (nmp->nm_tprintf_initial_delay && (req->r_delay == 30)) {
1839 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO,
1840 "resource temporarily unavailable (jukebox)");
1841 req->r_flags |= R_JBTPRINTFMSG;
1842 }
1843 delay = req->r_delay;
1844 if (req->r_callback.rcb_func) {
1845 struct timeval now;
1846 microuptime(&now);
1847 req->r_resendtime = now.tv_sec + delay;
e5568f75 1848 } else {
2d21ac55
A
1849 do {
1850 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
1851 return (error);
1852 tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0);
1853 } while (--delay > 0);
e5568f75 1854 }
2d21ac55
A
1855 req->r_xid = 0; // get a new XID
1856 req->r_flags |= R_RESTART;
1857 req->r_start = 0;
1858 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER);
1859 return (0);
1c79356b
A
1860 }
1861
2d21ac55
A
1862 if (req->r_flags & R_JBTPRINTFMSG)
1863 nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, "resource available again");
1864
1865 if (*status == NFS_OK) {
1866 /*
1867 * Successful NFS request
1868 */
1869 *nmrepp = nmrep;
1870 req->r_nmrep.nmc_mhead = NULL;
1871 break;
1872 }
1873 /* Got an NFS error of some kind */
1874
1875 /*
1876 * If the File Handle was stale, invalidate the
1877 * lookup cache, just in case.
1878 */
1879 if ((*status == ESTALE) && req->r_np)
1880 cache_purge(NFSTOV(req->r_np));
1881 if (nmp->nm_vers == NFS_VER2)
1882 mbuf_freem(mrep);
1883 else
1884 *nmrepp = nmrep;
1885 req->r_nmrep.nmc_mhead = NULL;
1886 error = 0;
1887 break;
1888 case RPC_PROGUNAVAIL:
1889 error = EPROGUNAVAIL;
1890 break;
1891 case RPC_PROGMISMATCH:
1892 error = ERPCMISMATCH;
1893 break;
1894 case RPC_PROCUNAVAIL:
1895 error = EPROCUNAVAIL;
1896 break;
1897 case RPC_GARBAGE:
1898 error = EBADRPC;
1899 break;
1900 case RPC_SYSTEM_ERR:
1901 default:
1902 error = EIO;
1903 break;
1c79356b 1904 }
1c79356b 1905nfsmout:
2d21ac55
A
1906 if (!error && (req->r_flags & R_JBTPRINTFMSG))
1907 nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, NULL);
1908 FSDBG(273, R_XID32(req->r_xid), nmp, req,
1909 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error);
1c79356b
A
1910 return (error);
1911}
1912
2d21ac55 1913
1c79356b 1914/*
2d21ac55 1915 * Perform an NFS request synchronously.
1c79356b 1916 */
2d21ac55 1917
1c79356b 1918int
2d21ac55
A
1919nfs_request(
1920 nfsnode_t np,
1921 mount_t mp, /* used only if !np */
1922 struct nfsm_chain *nmrest,
1923 int procnum,
1924 vfs_context_t ctx,
1925 struct nfsm_chain *nmrepp,
1926 u_int64_t *xidp,
1927 int *status)
1c79356b 1928{
2d21ac55
A
1929 return nfs_request2(np, mp, nmrest, procnum,
1930 vfs_context_thread(ctx), vfs_context_ucred(ctx),
1931 0, nmrepp, xidp, status);
1932}
1c79356b 1933
2d21ac55
A
1934int
1935nfs_request2(
1936 nfsnode_t np,
1937 mount_t mp, /* used only if !np */
1938 struct nfsm_chain *nmrest,
1939 int procnum,
1940 thread_t thd,
1941 kauth_cred_t cred,
1942 int flags,
1943 struct nfsm_chain *nmrepp,
1944 u_int64_t *xidp,
1945 int *status)
1946{
1947 struct nfsreq rq, *req = &rq;
1948 int error;
1c79356b 1949
2d21ac55
A
1950 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req)))
1951 return (error);
1952 req->r_flags |= (flags & R_OPTMASK);
1c79356b 1953
2d21ac55
A
1954 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0);
1955 do {
1956 req->r_error = 0;
1957 req->r_flags &= ~R_RESTART;
1958 if ((error = nfs_request_add_header(req)))
1c79356b 1959 break;
2d21ac55
A
1960 if (xidp)
1961 *xidp = req->r_xid;
1962 if ((error = nfs_request_send(req, 1)))
1c79356b 1963 break;
2d21ac55
A
1964 nfs_request_wait(req);
1965 if ((error = nfs_request_finish(req, nmrepp, status)))
1c79356b 1966 break;
2d21ac55 1967 } while (req->r_flags & R_RESTART);
91447636 1968
2d21ac55
A
1969 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error);
1970 nfs_request_rele(req);
1971 return (error);
1c79356b
A
1972}
1973
2d21ac55
A
1974/*
1975 * Create and start an asynchronous NFS request.
1976 */
1977int
1978nfs_request_async(
1979 nfsnode_t np,
1980 mount_t mp, /* used only if !np */
1981 struct nfsm_chain *nmrest,
1982 int procnum,
1983 thread_t thd,
1984 kauth_cred_t cred,
1985 struct nfsreq_cbinfo *cb,
1986 struct nfsreq **reqp)
1987{
1988 struct nfsreq *req;
1989 int error, sent;
1c79356b 1990
2d21ac55
A
1991 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp);
1992 req = *reqp;
1993 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error);
1994 if (error)
1995 return (error);
1996 req->r_flags |= R_ASYNC;
1997 if (cb)
1998 req->r_callback = *cb;
1999 error = nfs_request_add_header(req);
2000 if (!error) {
2001 req->r_flags |= R_WAITSENT;
2002 if (req->r_callback.rcb_func)
2003 nfs_request_ref(req, 0);
2004 error = nfs_request_send(req, 1);
2005 lck_mtx_lock(&req->r_mtx);
2006 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) {
2007 /* make sure to wait until this async I/O request gets sent */
2008 int slpflag = (req->r_nmp && (req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
2009 struct timespec ts = { 2, 0 };
36401178 2010 while (!(req->r_flags & R_SENT)) {
2d21ac55
A
2011 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
2012 break;
36401178
A
2013 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts);
2014 slpflag = 0;
2d21ac55
A
2015 }
2016 }
2017 sent = req->r_flags & R_SENT;
2018 lck_mtx_unlock(&req->r_mtx);
2019 if (error && req->r_callback.rcb_func && !sent)
2020 nfs_request_rele(req);
2021 }
2022 FSDBG(274, R_XID32(req->r_xid), np, procnum, error);
2023 if (error || req->r_callback.rcb_func)
2024 nfs_request_rele(req);
2025 return (error);
2026}
1c79356b
A
2027
2028/*
2d21ac55 2029 * Wait for and finish an asynchronous NFS request.
1c79356b 2030 */
2d21ac55
A
2031int
2032nfs_request_async_finish(
2033 struct nfsreq *req,
2034 struct nfsm_chain *nmrepp,
2035 u_int64_t *xidp,
2036 int *status)
1c79356b 2037{
cf7d32b8 2038 int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0;
2d21ac55
A
2039
2040 lck_mtx_lock(&req->r_mtx);
2041 if (!asyncio)
2042 req->r_flags |= R_ASYNCWAIT;
cf7d32b8
A
2043 while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */
2044 struct timespec ts = { 2, 0 };
2045 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
2046 break;
2047 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", &ts);
2048 }
2d21ac55
A
2049 lck_mtx_unlock(&req->r_mtx);
2050
cf7d32b8
A
2051 if (!error) {
2052 nfs_request_wait(req);
2053 error = nfs_request_finish(req, nmrepp, status);
2054 }
2d21ac55
A
2055
2056 while (!error && (req->r_flags & R_RESTART)) {
2057 if (asyncio && req->r_resendtime) { /* send later */
2058 lck_mtx_lock(&req->r_mtx);
2059 nfs_asyncio_resend(req);
2060 lck_mtx_unlock(&req->r_mtx);
2061 return (EINPROGRESS);
2062 }
2063 req->r_error = 0;
2064 req->r_flags &= ~R_RESTART;
2065 if ((error = nfs_request_add_header(req)))
2066 break;
2067 if ((error = nfs_request_send(req, !asyncio)))
2068 break;
2069 if (asyncio)
2070 return (EINPROGRESS);
2071 nfs_request_wait(req);
2072 if ((error = nfs_request_finish(req, nmrepp, status)))
2073 break;
1c79356b 2074 }
2d21ac55
A
2075 if (xidp)
2076 *xidp = req->r_xid;
2077
2078 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error);
2079 nfs_request_rele(req);
2080 return (error);
1c79356b
A
2081}
2082
2d21ac55
A
2083/*
2084 * Cancel a pending asynchronous NFS request.
2085 */
1c79356b 2086void
2d21ac55 2087nfs_request_async_cancel(struct nfsreq *req)
1c79356b 2088{
2d21ac55
A
2089 nfs_reqdequeue(req);
2090 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E);
2091 nfs_request_rele(req);
1c79356b
A
2092}
2093
55e303ae 2094/*
2d21ac55 2095 * Flag a request as being terminated.
55e303ae 2096 */
91447636 2097static void
2d21ac55 2098nfs_softterm(struct nfsreq *req)
55e303ae 2099{
2d21ac55
A
2100 struct nfsmount *nmp = req->r_nmp;
2101 req->r_flags |= R_SOFTTERM;
2102 req->r_error = ETIMEDOUT;
2103 if (!(req->r_flags & R_CWND) || !nmp)
2104 return;
2105 /* update congestion window */
2106 req->r_flags &= ~R_CWND;
2107 lck_mtx_lock(&nmp->nm_lock);
2108 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
2109 nmp->nm_sent -= NFS_CWNDSCALE;
2110 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
2111 /* congestion window is open, poke the cwnd queue */
2112 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
2113 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
2114 req2->r_cchain.tqe_next = NFSREQNOLIST;
2115 wakeup(req2);
2116 }
2117 lck_mtx_unlock(&nmp->nm_lock);
2118}
55e303ae 2119
2d21ac55
A
2120/*
2121 * Ensure req isn't in use by the timer, then dequeue it.
2122 */
2123static void
2124nfs_reqdequeue(struct nfsreq *req)
2125{
2126 lck_mtx_lock(nfs_request_mutex);
2127 while (req->r_lflags & RL_BUSY) {
2128 req->r_lflags |= RL_WAITING;
2129 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL);
2130 }
2131 if (req->r_lflags & RL_QUEUED) {
2132 TAILQ_REMOVE(&nfs_reqq, req, r_chain);
2133 req->r_lflags &= ~RL_QUEUED;
55e303ae 2134 }
2d21ac55 2135 lck_mtx_unlock(nfs_request_mutex);
55e303ae
A
2136}
2137
2138/*
2139 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
2140 * free()'d out from under it.
2141 */
91447636 2142static void
2d21ac55 2143nfs_reqbusy(struct nfsreq *req)
55e303ae 2144{
2d21ac55
A
2145 if (req->r_lflags & RL_BUSY)
2146 panic("req locked");
2147 req->r_lflags |= RL_BUSY;
55e303ae
A
2148}
2149
2150/*
2151 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
2152 */
91447636 2153static struct nfsreq *
2d21ac55 2154nfs_reqnext(struct nfsreq *req)
55e303ae 2155{
2d21ac55 2156 struct nfsreq * nextreq;
55e303ae 2157
2d21ac55 2158 if (req == NULL)
55e303ae
A
2159 return (NULL);
2160 /*
2161 * We need to get and busy the next req before signalling the
2162 * current one, otherwise wakeup() may block us and we'll race to
2163 * grab the next req.
2164 */
2d21ac55
A
2165 nextreq = TAILQ_NEXT(req, r_chain);
2166 if (nextreq != NULL)
2167 nfs_reqbusy(nextreq);
55e303ae 2168 /* unbusy and signal. */
2d21ac55
A
2169 req->r_lflags &= ~RL_BUSY;
2170 if (req->r_lflags & RL_WAITING) {
2171 req->r_lflags &= ~RL_WAITING;
2172 wakeup(&req->r_lflags);
55e303ae 2173 }
2d21ac55 2174 return (nextreq);
55e303ae
A
2175}
2176
1c79356b 2177/*
2d21ac55
A
2178 * NFS request queue timer routine
2179 *
2180 * Scan the NFS request queue for any requests that have timed out.
2181 *
2182 * Alert the system of unresponsive servers.
2183 * Mark expired requests on soft mounts as terminated.
2184 * For UDP, mark/signal requests for retransmission.
1c79356b
A
2185 */
2186void
2d21ac55 2187nfs_request_timer(__unused void *param0, __unused void *param1)
1c79356b 2188{
2d21ac55 2189 struct nfsreq *req;
91447636 2190 struct nfsmount *nmp;
2d21ac55 2191 int timeo, maxtime, finish_asyncio, error;
55e303ae 2192 struct timeval now;
2d21ac55
A
2193 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue;
2194
2195 lck_mtx_lock(nfs_request_mutex);
2196 req = TAILQ_FIRST(&nfs_reqq);
2197 if (req == NULL) { /* no requests - turn timer off */
2198 nfs_request_timer_on = 0;
2199 lck_mtx_unlock(nfs_request_mutex);
2200 return;
2201 }
2202
2203 nfs_reqbusy(req);
2204 TAILQ_INIT(&nfs_mount_poke_queue);
1c79356b 2205
55e303ae 2206 microuptime(&now);
2d21ac55
A
2207 for ( ; req != NULL ; req = nfs_reqnext(req)) {
2208 nmp = req->r_nmp;
1c79356b 2209 if (!nmp) /* unmounted */
1c79356b 2210 continue;
2d21ac55 2211 if (req->r_error || req->r_nmrep.nmc_mhead)
1c79356b 2212 continue;
2d21ac55
A
2213 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) {
2214 if (req->r_callback.rcb_func != NULL) {
2215 /* async I/O RPC needs to be finished */
2216 lck_mtx_lock(&req->r_mtx);
2217 req->r_error = error;
2218 finish_asyncio = !(req->r_flags & R_WAITSENT);
2219 wakeup(req);
2220 lck_mtx_unlock(&req->r_mtx);
2221 if (finish_asyncio)
2222 nfs_asyncio_finish(req);
2223 }
2224 continue;
2225 }
2226
2227 lck_mtx_lock(&req->r_mtx);
2228
2229 if (nmp->nm_tprintf_initial_delay &&
2230 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) &&
2231 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
2232 req->r_lastmsg = now.tv_sec;
2233 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
91447636 2234 "not responding");
2d21ac55
A
2235 req->r_flags |= R_TPRINTFMSG;
2236 lck_mtx_lock(&nmp->nm_lock);
4a249263 2237 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
2d21ac55 2238 lck_mtx_unlock(&nmp->nm_lock);
4a249263
A
2239 /* we're not yet completely mounted and */
2240 /* we can't complete an RPC, so we fail */
91447636 2241 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
2d21ac55
A
2242 nfs_softterm(req);
2243 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
2244 wakeup(req);
2245 lck_mtx_unlock(&req->r_mtx);
2246 if (finish_asyncio)
2247 nfs_asyncio_finish(req);
4a249263
A
2248 continue;
2249 }
2d21ac55 2250 lck_mtx_unlock(&nmp->nm_lock);
1c79356b 2251 }
2d21ac55 2252
1c79356b 2253 /*
2d21ac55
A
2254 * Put a reasonable limit on the maximum timeout,
2255 * and reduce that limit when soft mounts get timeouts or are in reconnect.
1c79356b 2256 */
2d21ac55
A
2257 if (!(nmp->nm_flag & NFSMNT_SOFT))
2258 maxtime = NFS_MAXTIMEO;
2259 else if ((req->r_flags & R_SETUP) || ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
2260 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2;
2261 else
2262 maxtime = NFS_MINTIMEO/4;
1c79356b
A
2263
2264 /*
2d21ac55 2265 * Check for request timeout.
1c79356b 2266 */
2d21ac55
A
2267 if (req->r_rtt >= 0) {
2268 req->r_rtt++;
2269 lck_mtx_lock(&nmp->nm_lock);
2270 if (req->r_flags & R_RESENDERR) {
2271 /* with resend errors, retry every few seconds */
2272 timeo = 4*hz;
1c79356b 2273 } else {
2d21ac55
A
2274 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL)
2275 timeo = NFS_MINIDEMTIMEO; // gss context setup
2276 else if (nmp->nm_flag & NFSMNT_DUMBTIMR)
2277 timeo = nmp->nm_timeo;
2278 else
2279 timeo = NFS_RTO(nmp, proct[req->r_procnum]);
1c79356b 2280
2d21ac55
A
2281 /* ensure 62.5 ms floor */
2282 while (16 * timeo < hz)
2283 timeo *= 2;
2284 if (nmp->nm_timeouts > 0)
2285 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
2286 }
2287 /* limit timeout to max */
2288 if (timeo > maxtime)
2289 timeo = maxtime;
2290 if (req->r_rtt <= timeo) {
2291 lck_mtx_unlock(&nmp->nm_lock);
2292 lck_mtx_unlock(&req->r_mtx);
2293 continue;
91447636 2294 }
2d21ac55
A
2295 /* The request has timed out */
2296 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n",
2297 req->r_procnum, proct[req->r_procnum],
2298 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts,
2299 (now.tv_sec - req->r_start)*NFS_HZ, maxtime));
2300 if (nmp->nm_timeouts < 8)
2301 nmp->nm_timeouts++;
2302 /* if it's been a few seconds, try poking the socket */
2303 if ((nmp->nm_sotype == SOCK_STREAM) &&
2304 ((now.tv_sec - req->r_start) >= 3) &&
2305 !(nmp->nm_sockflags & NMSOCK_POKE)) {
2306 nmp->nm_sockflags |= NMSOCK_POKE;
2307 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq);
2308 }
2309 lck_mtx_unlock(&nmp->nm_lock);
2310 }
1c79356b 2311
2d21ac55
A
2312 /* For soft mounts (& SETUPs), check for too many retransmits/timeout. */
2313 if (((nmp->nm_flag & NFSMNT_SOFT) || (req->r_flags & R_SETUP)) &&
2314 ((req->r_rexmit >= req->r_retry) || /* too many */
2315 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */
2316 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
2317 lck_mtx_lock(&nmp->nm_lock);
2318 if (!(nmp->nm_state & NFSSTA_TIMEO)) {
2319 lck_mtx_unlock(&nmp->nm_lock);
2320 /* make sure we note the unresponsive server */
2321 /* (maxtime may be less than tprintf delay) */
2322 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO,
2323 "not responding");
2324 req->r_lastmsg = now.tv_sec;
2325 req->r_flags |= R_TPRINTFMSG;
2326 } else {
2327 lck_mtx_unlock(&nmp->nm_lock);
2328 }
2329 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
2330 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt,
2331 now.tv_sec - req->r_start));
2332 nfs_softterm(req);
2333 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
2334 wakeup(req);
2335 lck_mtx_unlock(&req->r_mtx);
2336 if (finish_asyncio)
2337 nfs_asyncio_finish(req);
2338 continue;
2339 }
1c79356b 2340
2d21ac55
A
2341 /* for TCP, only resend if explicitly requested */
2342 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) {
2343 if (++req->r_rexmit > NFS_MAXREXMIT)
2344 req->r_rexmit = NFS_MAXREXMIT;
2345 req->r_rtt = 0;
2346 lck_mtx_unlock(&req->r_mtx);
2347 continue;
1c79356b 2348 }
483a1d10 2349
483a1d10 2350 /*
2d21ac55
A
2351 * The request needs to be (re)sent. Kick the requester to resend it.
2352 * (unless it's already marked as needing a resend)
483a1d10 2353 */
2d21ac55
A
2354 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) {
2355 lck_mtx_unlock(&req->r_mtx);
2356 continue;
2357 }
2358 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n",
2359 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
2360 req->r_flags |= R_MUSTRESEND;
2361 req->r_rtt = -1;
2362 wakeup(req);
2363 if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC)
2364 nfs_asyncio_resend(req);
2365 lck_mtx_unlock(&req->r_mtx);
2366 }
2367
2368 lck_mtx_unlock(nfs_request_mutex);
2369
2370 /* poke any sockets */
2371 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) {
2372 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
2373 nfs_sock_poke(nmp);
2374 lck_mtx_lock(&nmp->nm_lock);
2375 nmp->nm_sockflags &= ~NMSOCK_POKE;
2376 if (!(nmp->nm_state & NFSSTA_MOUNTED))
2377 wakeup(&nmp->nm_sockflags);
2378 lck_mtx_unlock(&nmp->nm_lock);
2379 }
2380
2381 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY);
1c79356b
A
2382}
2383
2d21ac55
A
2384/*
2385 * check a thread's proc for the "noremotehang" flag.
2386 */
2387int
2388nfs_noremotehang(thread_t thd)
2389{
2390 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
2391 return (p && proc_noremotehang(p));
2392}
1c79356b
A
2393
2394/*
2395 * Test for a termination condition pending on the process.
55e303ae 2396 * This is used to determine if we need to bail on a mount.
2d21ac55 2397 * ETIMEDOUT is returned if there has been a soft timeout.
55e303ae
A
2398 * EINTR is returned if there is a signal pending that is not being ignored
2399 * and the mount is interruptable, or if we are a thread that is in the process
2400 * of cancellation (also SIGKILL posted).
1c79356b
A
2401 */
2402int
2d21ac55 2403nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked)
1c79356b 2404{
2d21ac55 2405 int error = 0;
55e303ae
A
2406
2407 if (nmp == NULL)
2408 return (ENXIO);
2d21ac55
A
2409
2410 if (req && (req->r_flags & R_SOFTTERM))
2411 return (ETIMEDOUT); /* request has been terminated. */
2412
2413 /*
2414 * If we're in the progress of a force unmount and there's
2415 * been a timeout, we're dead and fail IO.
2416 */
2417 if (!nmplocked)
2418 lck_mtx_lock(&nmp->nm_lock);
2419 if ((nmp->nm_state & NFSSTA_FORCE) &&
2420 (nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO|NFSSTA_LOCKTIMEO))) {
2421 error = EIO;
2422 } else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) {
55e303ae 2423 /* Someone is unmounting us, go soft and mark it. */
2d21ac55
A
2424 nmp->nm_flag |= NFSMNT_SOFT;
2425 nmp->nm_state |= NFSSTA_FORCE;
55e303ae 2426 }
2d21ac55
A
2427
2428 /*
2429 * If the mount is hung and we've requested not to hang
2430 * on remote filesystems, then bail now.
2431 */
2432 if (!error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd))
2433 error = EIO;
2434
2435 if (!nmplocked)
2436 lck_mtx_unlock(&nmp->nm_lock);
2437 if (error)
2438 return (error);
2439
2440 /* may not have a thread for async I/O */
2441 if (thd == NULL)
55e303ae 2442 return (0);
1c79356b 2443
2d21ac55
A
2444 /* If this thread belongs to kernel task; then abort check is not needed */
2445 if ((current_proc() != kernproc) && current_thread_aborted())
1c79356b 2446 return (EINTR);
91447636 2447
2d21ac55
A
2448 /* mask off thread and process blocked signals. */
2449 if ((nmp->nm_flag & NFSMNT_INT) &&
2450 proc_pendingsignals(get_bsdthreadtask_info(thd), NFSINT_SIGMASK))
1c79356b
A
2451 return (EINTR);
2452 return (0);
2453}
2454
2455/*
2456 * Lock a socket against others.
2457 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2458 * and also to avoid race conditions between the processes with nfs requests
2459 * in progress when a reconnect is necessary.
2460 */
2461int
2d21ac55 2462nfs_sndlock(struct nfsreq *req)
1c79356b 2463{
2d21ac55 2464 struct nfsmount *nmp = req->r_nmp;
91447636 2465 int *statep;
2d21ac55
A
2466 int error = 0, slpflag = 0;
2467 struct timespec ts = { 0, 0 };
1c79356b 2468
2d21ac55 2469 if (nmp == NULL)
55e303ae 2470 return (ENXIO);
55e303ae 2471
2d21ac55
A
2472 lck_mtx_lock(&nmp->nm_lock);
2473 statep = &nmp->nm_state;
2474
2475 if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
55e303ae 2476 slpflag = PCATCH;
36401178 2477 while (*statep & NFSSTA_SNDLOCK) {
2d21ac55
A
2478 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1)))
2479 break;
55e303ae 2480 *statep |= NFSSTA_WANTSND;
2d21ac55
A
2481 if (nfs_noremotehang(req->r_thread))
2482 ts.tv_sec = 1;
36401178 2483 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts);
1c79356b
A
2484 if (slpflag == PCATCH) {
2485 slpflag = 0;
2d21ac55 2486 ts.tv_sec = 2;
1c79356b
A
2487 }
2488 }
2d21ac55
A
2489 if (!error)
2490 *statep |= NFSSTA_SNDLOCK;
2491 lck_mtx_unlock(&nmp->nm_lock);
2492 return (error);
1c79356b
A
2493}
2494
2495/*
2496 * Unlock the stream socket for others.
2497 */
2498void
2d21ac55 2499nfs_sndunlock(struct nfsreq *req)
1c79356b 2500{
2d21ac55
A
2501 struct nfsmount *nmp = req->r_nmp;
2502 int *statep, wake = 0;
1c79356b 2503
2d21ac55 2504 if (nmp == NULL)
55e303ae 2505 return;
2d21ac55
A
2506 lck_mtx_lock(&nmp->nm_lock);
2507 statep = &nmp->nm_state;
55e303ae 2508 if ((*statep & NFSSTA_SNDLOCK) == 0)
1c79356b 2509 panic("nfs sndunlock");
55e303ae
A
2510 *statep &= ~NFSSTA_SNDLOCK;
2511 if (*statep & NFSSTA_WANTSND) {
2512 *statep &= ~NFSSTA_WANTSND;
2d21ac55 2513 wake = 1;
1c79356b 2514 }
2d21ac55
A
2515 lck_mtx_unlock(&nmp->nm_lock);
2516 if (wake)
2517 wakeup(statep);
1c79356b
A
2518}
2519
2d21ac55
A
2520#endif /* NFSCLIENT */
2521
2522#if NFSSERVER
2523
2524/*
2525 * Generate the rpc reply header
2526 * siz arg. is used to decide if adding a cluster is worthwhile
2527 */
2528int
2529nfsrv_rephead(
2530 struct nfsrv_descript *nd,
2531 __unused struct nfsrv_sock *slp,
2532 struct nfsm_chain *nmrepp,
2533 size_t siz)
1c79356b 2534{
2d21ac55
A
2535 mbuf_t mrep;
2536 u_long *tl;
2537 struct nfsm_chain nmrep;
2538 int err, error;
1c79356b 2539
2d21ac55
A
2540 err = nd->nd_repstat;
2541 if (err && (nd->nd_vers == NFS_VER2))
2542 siz = 0;
d12e1678 2543
2d21ac55
A
2544 /*
2545 * If this is a big reply, use a cluster else
2546 * try and leave leading space for the lower level headers.
2547 */
2548 siz += RPC_REPLYSIZ;
2549 if (siz >= nfs_mbuf_minclsize) {
2550 error = mbuf_getpacket(MBUF_WAITOK, &mrep);
2551 } else {
2552 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep);
2553 }
2554 if (error) {
2555 /* unable to allocate packet */
2556 /* XXX should we keep statistics for these errors? */
2557 return (error);
2558 }
2559 if (siz < nfs_mbuf_minclsize) {
2560 /* leave space for lower level headers */
2561 tl = mbuf_data(mrep);
2562 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */
2563 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED);
2564 }
2565 nfsm_chain_init(&nmrep, mrep);
2566 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid);
2567 nfsm_chain_add_32(error, &nmrep, RPC_REPLY);
2568 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
2569 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED);
2570 if (err & NFSERR_AUTHERR) {
2571 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR);
2572 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR));
2573 } else {
2574 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH);
2575 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
2576 nfsm_chain_add_32(error, &nmrep, RPC_VER2);
1c79356b 2577 }
2d21ac55
A
2578 } else {
2579 /* reply status */
2580 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED);
2581 if (nd->nd_gss_context != NULL) {
2582 /* RPCSEC_GSS verifier */
2583 error = nfs_gss_svc_verf_put(nd, &nmrep);
2584 if (error) {
2585 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR);
2586 goto done;
2587 }
2588 } else {
2589 /* RPCAUTH_NULL verifier */
2590 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL);
2591 nfsm_chain_add_32(error, &nmrep, 0);
1c79356b 2592 }
2d21ac55
A
2593 /* accepted status */
2594 switch (err) {
2595 case EPROGUNAVAIL:
2596 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL);
2597 break;
2598 case EPROGMISMATCH:
2599 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH);
2600 /* XXX hard coded versions? */
2601 nfsm_chain_add_32(error, &nmrep, NFS_VER2);
2602 nfsm_chain_add_32(error, &nmrep, NFS_VER3);
2603 break;
2604 case EPROCUNAVAIL:
2605 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL);
2606 break;
2607 case EBADRPC:
2608 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE);
2609 break;
2610 default:
2611 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS);
2612 if (nd->nd_gss_context != NULL)
2613 error = nfs_gss_svc_prepare_reply(nd, &nmrep);
2614 if (err != NFSERR_RETVOID)
2615 nfsm_chain_add_32(error, &nmrep,
2616 (err ? nfsrv_errmap(nd, err) : 0));
2617 break;
fa4905b1 2618 }
1c79356b 2619 }
2d21ac55
A
2620
2621done:
2622 nfsm_chain_build_done(error, &nmrep);
2623 if (error) {
2624 /* error composing reply header */
2625 /* XXX should we keep statistics for these errors? */
2626 mbuf_freem(mrep);
2627 return (error);
2628 }
2629
2630 *nmrepp = nmrep;
2631 if ((err != 0) && (err != NFSERR_RETVOID))
2632 OSAddAtomic(1, (SInt32*)&nfsstats.srvrpc_errs);
1c79356b
A
2633 return (0);
2634}
2635
2636/*
2d21ac55
A
2637 * The nfs server send routine.
2638 *
2639 * - return EINTR or ERESTART if interrupted by a signal
2640 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
2641 * - do any cleanup required by recoverable socket errors (???)
1c79356b 2642 */
2d21ac55
A
2643int
2644nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
1c79356b 2645{
2d21ac55
A
2646 int error;
2647 socket_t so = slp->ns_so;
2648 struct sockaddr *sendnam;
2649 struct msghdr msg;
1c79356b 2650
2d21ac55
A
2651 bzero(&msg, sizeof(msg));
2652 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) {
2653 if ((sendnam = mbuf_data(nam))) {
2654 msg.msg_name = (caddr_t)sendnam;
2655 msg.msg_namelen = sendnam->sa_len;
2656 }
1c79356b 2657 }
2d21ac55
A
2658 error = sock_sendmbuf(so, &msg, top, 0, NULL);
2659 if (!error)
2660 return (0);
2661 log(LOG_INFO, "nfsd send error %d\n", error);
2662
2663 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM))
2664 error = EPIPE; /* zap TCP sockets if they time out on send */
1c79356b 2665
2d21ac55
A
2666 /* Handle any recoverable (soft) socket errors here. (???) */
2667 if (error != EINTR && error != ERESTART && error != EIO &&
2668 error != EWOULDBLOCK && error != EPIPE)
2669 error = 0;
2670
2671 return (error);
2672}
1c79356b 2673
1c79356b
A
2674/*
2675 * Socket upcall routine for the nfsd sockets.
2d21ac55 2676 * The caddr_t arg is a pointer to the "struct nfsrv_sock".
1c79356b 2677 * Essentially do as much as possible non-blocking, else punt and it will
91447636 2678 * be called with MBUF_WAITOK from an nfsd.
1c79356b
A
2679 */
2680void
91447636 2681nfsrv_rcv(socket_t so, caddr_t arg, int waitflag)
1c79356b 2682{
2d21ac55 2683 struct nfsrv_sock *slp = (struct nfsrv_sock *)arg;
1c79356b 2684
2d21ac55 2685 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID))
1c79356b 2686 return;
91447636
A
2687
2688 lck_rw_lock_exclusive(&slp->ns_rwlock);
2689 nfsrv_rcv_locked(so, slp, waitflag);
2690 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
2691}
2692void
2d21ac55 2693nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
91447636
A
2694{
2695 mbuf_t m, mp, mhck, m2;
2696 int ns_flag=0, error;
2697 struct msghdr msg;
2698 size_t bytes_read;
2699
2700 if ((slp->ns_flag & SLP_VALID) == 0) {
2701 if (waitflag == MBUF_DONTWAIT)
2702 lck_rw_done(&slp->ns_rwlock);
2703 return;
2704 }
2705
1c79356b
A
2706#ifdef notdef
2707 /*
2708 * Define this to test for nfsds handling this under heavy load.
2709 */
91447636
A
2710 if (waitflag == MBUF_DONTWAIT) {
2711 ns_flag = SLP_NEEDQ;
55e303ae 2712 goto dorecs;
1c79356b
A
2713 }
2714#endif
91447636 2715 if (slp->ns_sotype == SOCK_STREAM) {
1c79356b
A
2716 /*
2717 * If there are already records on the queue, defer soreceive()
2718 * to an nfsd so that there is feedback to the TCP layer that
2719 * the nfs servers are heavily loaded.
2720 */
91447636
A
2721 if (slp->ns_rec && waitflag == MBUF_DONTWAIT) {
2722 ns_flag = SLP_NEEDQ;
1c79356b
A
2723 goto dorecs;
2724 }
2725
2726 /*
2727 * Do soreceive().
2728 */
91447636
A
2729 bytes_read = 1000000000;
2730 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
2731 if (error || mp == NULL) {
1c79356b 2732 if (error == EWOULDBLOCK)
2d21ac55 2733 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0;
1c79356b 2734 else
91447636 2735 ns_flag = SLP_DISCONN;
1c79356b
A
2736 goto dorecs;
2737 }
2738 m = mp;
2739 if (slp->ns_rawend) {
91447636
A
2740 if ((error = mbuf_setnext(slp->ns_rawend, m)))
2741 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
2742 slp->ns_cc += bytes_read;
1c79356b
A
2743 } else {
2744 slp->ns_raw = m;
91447636 2745 slp->ns_cc = bytes_read;
1c79356b 2746 }
91447636
A
2747 while ((m2 = mbuf_next(m)))
2748 m = m2;
1c79356b
A
2749 slp->ns_rawend = m;
2750
2751 /*
2752 * Now try and parse record(s) out of the raw stream data.
2753 */
2754 error = nfsrv_getstream(slp, waitflag);
2755 if (error) {
2756 if (error == EPERM)
91447636 2757 ns_flag = SLP_DISCONN;
1c79356b 2758 else
91447636 2759 ns_flag = SLP_NEEDQ;
1c79356b
A
2760 }
2761 } else {
91447636 2762 struct sockaddr_storage nam;
2d21ac55
A
2763
2764 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) {
2765 /* already have max # RPC records queued on this socket */
2766 ns_flag = SLP_NEEDQ;
2767 goto dorecs;
2768 }
91447636
A
2769
2770 bzero(&msg, sizeof(msg));
2771 msg.msg_name = (caddr_t)&nam;
2772 msg.msg_namelen = sizeof(nam);
2773
1c79356b 2774 do {
91447636
A
2775 bytes_read = 1000000000;
2776 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
1c79356b 2777 if (mp) {
91447636
A
2778 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
2779 mbuf_setlen(mhck, nam.ss_len);
2780 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
1c79356b 2781 m = mhck;
91447636
A
2782 if (mbuf_setnext(m, mp)) {
2783 /* trouble... just drop it */
2784 printf("nfsrv_rcv: mbuf_setnext failed\n");
2785 mbuf_free(mhck);
2786 m = mp;
2787 }
2788 } else {
1c79356b 2789 m = mp;
91447636 2790 }
1c79356b 2791 if (slp->ns_recend)
91447636 2792 mbuf_setnextpkt(slp->ns_recend, m);
2d21ac55 2793 else {
1c79356b 2794 slp->ns_rec = m;
2d21ac55
A
2795 slp->ns_flag |= SLP_DOREC;
2796 }
1c79356b 2797 slp->ns_recend = m;
91447636 2798 mbuf_setnextpkt(m, NULL);
2d21ac55 2799 slp->ns_reccnt++;
4a249263 2800 }
1c79356b
A
2801 } while (mp);
2802 }
2803
2804 /*
2805 * Now try and process the request records, non-blocking.
2806 */
2807dorecs:
91447636
A
2808 if (ns_flag)
2809 slp->ns_flag |= ns_flag;
2810 if (waitflag == MBUF_DONTWAIT) {
2d21ac55 2811 int wake = (slp->ns_flag & SLP_WORKTODO);
91447636 2812 lck_rw_done(&slp->ns_rwlock);
2d21ac55 2813 if (wake && nfsd_thread_count) {
91447636
A
2814 lck_mtx_lock(nfsd_mutex);
2815 nfsrv_wakenfsd(slp);
2816 lck_mtx_unlock(nfsd_mutex);
2817 }
1c79356b
A
2818 }
2819}
2820
2821/*
2822 * Try and extract an RPC request from the mbuf data list received on a
2823 * stream socket. The "waitflag" argument indicates whether or not it
2824 * can sleep.
2825 */
2826static int
2d21ac55 2827nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
1c79356b 2828{
91447636
A
2829 mbuf_t m;
2830 char *cp1, *cp2, *mdata;
2831 int len, mlen, error;
2832 mbuf_t om, m2, recm;
1c79356b
A
2833 u_long recmark;
2834
91447636 2835 if (slp->ns_flag & SLP_GETSTREAM)
1c79356b 2836 panic("nfs getstream");
91447636 2837 slp->ns_flag |= SLP_GETSTREAM;
1c79356b
A
2838 for (;;) {
2839 if (slp->ns_reclen == 0) {
2840 if (slp->ns_cc < NFSX_UNSIGNED) {
91447636 2841 slp->ns_flag &= ~SLP_GETSTREAM;
1c79356b
A
2842 return (0);
2843 }
2844 m = slp->ns_raw;
91447636
A
2845 mdata = mbuf_data(m);
2846 mlen = mbuf_len(m);
2847 if (mlen >= NFSX_UNSIGNED) {
2848 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
2849 mdata += NFSX_UNSIGNED;
2850 mlen -= NFSX_UNSIGNED;
2851 mbuf_setdata(m, mdata, mlen);
1c79356b
A
2852 } else {
2853 cp1 = (caddr_t)&recmark;
91447636 2854 cp2 = mdata;
1c79356b 2855 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
91447636
A
2856 while (mlen == 0) {
2857 m = mbuf_next(m);
2858 cp2 = mbuf_data(m);
2859 mlen = mbuf_len(m);
1c79356b
A
2860 }
2861 *cp1++ = *cp2++;
91447636
A
2862 mlen--;
2863 mbuf_setdata(m, cp2, mlen);
1c79356b
A
2864 }
2865 }
2866 slp->ns_cc -= NFSX_UNSIGNED;
2867 recmark = ntohl(recmark);
2868 slp->ns_reclen = recmark & ~0x80000000;
2869 if (recmark & 0x80000000)
91447636 2870 slp->ns_flag |= SLP_LASTFRAG;
1c79356b 2871 else
91447636 2872 slp->ns_flag &= ~SLP_LASTFRAG;
1c79356b 2873 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
91447636 2874 slp->ns_flag &= ~SLP_GETSTREAM;
1c79356b
A
2875 return (EPERM);
2876 }
2877 }
2878
2879 /*
2880 * Now get the record part.
b4c24cb9
A
2881 *
2882 * Note that slp->ns_reclen may be 0. Linux sometimes
2883 * generates 0-length RPCs
1c79356b 2884 */
b4c24cb9 2885 recm = NULL;
1c79356b
A
2886 if (slp->ns_cc == slp->ns_reclen) {
2887 recm = slp->ns_raw;
91447636 2888 slp->ns_raw = slp->ns_rawend = NULL;
1c79356b
A
2889 slp->ns_cc = slp->ns_reclen = 0;
2890 } else if (slp->ns_cc > slp->ns_reclen) {
2891 len = 0;
2892 m = slp->ns_raw;
91447636
A
2893 mlen = mbuf_len(m);
2894 mdata = mbuf_data(m);
2895 om = NULL;
1c79356b 2896 while (len < slp->ns_reclen) {
91447636
A
2897 if ((len + mlen) > slp->ns_reclen) {
2898 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
2899 slp->ns_flag &= ~SLP_GETSTREAM;
1c79356b
A
2900 return (EWOULDBLOCK);
2901 }
91447636
A
2902 if (om) {
2903 if (mbuf_setnext(om, m2)) {
2904 /* trouble... just drop it */
2905 printf("nfsrv_getstream: mbuf_setnext failed\n");
2906 mbuf_freem(m2);
2907 slp->ns_flag &= ~SLP_GETSTREAM;
2908 return (EWOULDBLOCK);
2909 }
2910 recm = slp->ns_raw;
2911 } else {
2912 recm = m2;
2913 }
2914 mdata += slp->ns_reclen - len;
2915 mlen -= slp->ns_reclen - len;
2916 mbuf_setdata(m, mdata, mlen);
2917 len = slp->ns_reclen;
2918 } else if ((len + mlen) == slp->ns_reclen) {
1c79356b 2919 om = m;
91447636
A
2920 len += mlen;
2921 m = mbuf_next(m);
1c79356b 2922 recm = slp->ns_raw;
91447636
A
2923 if (mbuf_setnext(om, NULL)) {
2924 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
2925 slp->ns_flag &= ~SLP_GETSTREAM;
2926 return (EWOULDBLOCK);
2927 }
2928 mlen = mbuf_len(m);
2929 mdata = mbuf_data(m);
1c79356b
A
2930 } else {
2931 om = m;
91447636
A
2932 len += mlen;
2933 m = mbuf_next(m);
2934 mlen = mbuf_len(m);
2935 mdata = mbuf_data(m);
1c79356b
A
2936 }
2937 }
2938 slp->ns_raw = m;
2939 slp->ns_cc -= len;
2940 slp->ns_reclen = 0;
2941 } else {
91447636 2942 slp->ns_flag &= ~SLP_GETSTREAM;
1c79356b
A
2943 return (0);
2944 }
2945
2946 /*
2947 * Accumulate the fragments into a record.
2948 */
91447636
A
2949 if (slp->ns_frag == NULL) {
2950 slp->ns_frag = recm;
2951 } else {
2952 m = slp->ns_frag;
2953 while ((m2 = mbuf_next(m)))
2954 m = m2;
2955 if ((error = mbuf_setnext(m, recm)))
2956 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
2957 }
2958 if (slp->ns_flag & SLP_LASTFRAG) {
1c79356b 2959 if (slp->ns_recend)
91447636 2960 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
2d21ac55 2961 else {
1c79356b 2962 slp->ns_rec = slp->ns_frag;
2d21ac55
A
2963 slp->ns_flag |= SLP_DOREC;
2964 }
1c79356b 2965 slp->ns_recend = slp->ns_frag;
91447636 2966 slp->ns_frag = NULL;
1c79356b
A
2967 }
2968 }
2969}
2970
2971/*
2972 * Parse an RPC header.
2973 */
2974int
2d21ac55
A
2975nfsrv_dorec(
2976 struct nfsrv_sock *slp,
2977 struct nfsd *nfsd,
2978 struct nfsrv_descript **ndp)
1c79356b 2979{
91447636
A
2980 mbuf_t m;
2981 mbuf_t nam;
2982 struct nfsrv_descript *nd;
2d21ac55 2983 int error = 0;
1c79356b
A
2984
2985 *ndp = NULL;
2d21ac55 2986 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL))
1c79356b 2987 return (ENOBUFS);
91447636
A
2988 MALLOC_ZONE(nd, struct nfsrv_descript *,
2989 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2990 if (!nd)
2991 return (ENOMEM);
2992 m = slp->ns_rec;
2993 slp->ns_rec = mbuf_nextpkt(m);
1c79356b 2994 if (slp->ns_rec)
91447636 2995 mbuf_setnextpkt(m, NULL);
2d21ac55
A
2996 else {
2997 slp->ns_flag &= ~SLP_DOREC;
91447636 2998 slp->ns_recend = NULL;
2d21ac55
A
2999 }
3000 slp->ns_reccnt--;
91447636 3001 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
1c79356b 3002 nam = m;
91447636
A
3003 m = mbuf_next(m);
3004 if ((error = mbuf_setnext(nam, NULL)))
3005 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
1c79356b
A
3006 } else
3007 nam = NULL;
1c79356b 3008 nd->nd_nam2 = nam;
2d21ac55
A
3009 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m);
3010 if (!error)
3011 error = nfsrv_getreq(nd);
1c79356b 3012 if (error) {
55e303ae 3013 if (nam)
91447636 3014 mbuf_freem(nam);
2d21ac55 3015 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1c79356b
A
3016 return (error);
3017 }
2d21ac55 3018 nd->nd_mrep = NULL;
1c79356b
A
3019 *ndp = nd;
3020 nfsd->nfsd_nd = nd;
3021 return (0);
3022}
3023
3024/*
3025 * Parse an RPC request
3026 * - verify it
3027 * - fill in the cred struct.
3028 */
2d21ac55
A
3029static int
3030nfsrv_getreq(struct nfsrv_descript *nd)
1c79356b 3031{
2d21ac55 3032 struct nfsm_chain *nmreq;
91447636 3033 int len, i;
1c79356b 3034 u_long nfsvers, auth_type;
2d21ac55 3035 int error = 0;
91447636
A
3036 uid_t user_id;
3037 gid_t group_id;
3038 int ngroups;
3039 struct ucred temp_cred;
2d21ac55 3040 uint32_t val;
1c79356b 3041
91447636 3042 nd->nd_cr = NULL;
2d21ac55
A
3043 nd->nd_gss_context = NULL;
3044 nd->nd_gss_seqnum = 0;
3045 nd->nd_gss_mb = NULL;
3046
3047 user_id = group_id = -2;
3048 val = auth_type = len = 0;
3049
3050 nmreq = &nd->nd_nmreq;
3051 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID
3052 nfsm_chain_get_32(error, nmreq, val); // RPC Call
3053 if (!error && (val != RPC_CALL))
3054 error = EBADRPC;
3055 nfsmout_if(error);
1c79356b 3056 nd->nd_repstat = 0;
2d21ac55
A
3057 nfsm_chain_get_32(error, nmreq, val); // RPC Version
3058 nfsmout_if(error);
3059 if (val != RPC_VER2) {
1c79356b
A
3060 nd->nd_repstat = ERPCMISMATCH;
3061 nd->nd_procnum = NFSPROC_NOOP;
3062 return (0);
3063 }
2d21ac55
A
3064 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number
3065 nfsmout_if(error);
3066 if (val != NFS_PROG) {
91447636
A
3067 nd->nd_repstat = EPROGUNAVAIL;
3068 nd->nd_procnum = NFSPROC_NOOP;
3069 return (0);
1c79356b 3070 }
2d21ac55
A
3071 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number
3072 nfsmout_if(error);
91447636 3073 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
1c79356b
A
3074 nd->nd_repstat = EPROGMISMATCH;
3075 nd->nd_procnum = NFSPROC_NOOP;
3076 return (0);
3077 }
2d21ac55
A
3078 nd->nd_vers = nfsvers;
3079 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number
3080 nfsmout_if(error);
91447636 3081 if ((nd->nd_procnum >= NFS_NPROCS) ||
2d21ac55 3082 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) {
1c79356b
A
3083 nd->nd_repstat = EPROCUNAVAIL;
3084 nd->nd_procnum = NFSPROC_NOOP;
3085 return (0);
3086 }
2d21ac55 3087 if (nfsvers != NFS_VER3)
1c79356b 3088 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
2d21ac55
A
3089 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor
3090 nfsm_chain_get_32(error, nmreq, len); // Auth Length
3091 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ))
3092 error = EBADRPC;
3093 nfsmout_if(error);
3094
3095 /* Handle authentication */
3096 if (auth_type == RPCAUTH_UNIX) {
3097 if (nd->nd_procnum == NFSPROC_NULL)
3098 return (0);
3099 nd->nd_sec = RPCAUTH_UNIX;
3100 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp
3101 nfsm_chain_get_32(error, nmreq, len); // hostname length
3102 if (len < 0 || len > NFS_MAXNAMLEN)
3103 error = EBADRPC;
3104 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname
3105 nfsmout_if(error);
3106
3107 /* create a temporary credential using the bits from the wire */
91447636 3108 bzero(&temp_cred, sizeof(temp_cred));
2d21ac55
A
3109 nfsm_chain_get_32(error, nmreq, user_id);
3110 nfsm_chain_get_32(error, nmreq, group_id);
91447636 3111 temp_cred.cr_groups[0] = group_id;
2d21ac55
A
3112 nfsm_chain_get_32(error, nmreq, len); // extra GID count
3113 if ((len < 0) || (len > RPCAUTH_UNIXGIDS))
3114 error = EBADRPC;
3115 nfsmout_if(error);
1c79356b 3116 for (i = 1; i <= len; i++)
2d21ac55
A
3117 if (i < NGROUPS)
3118 nfsm_chain_get_32(error, nmreq, temp_cred.cr_groups[i]);
3119 else
3120 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
3121 nfsmout_if(error);
91447636
A
3122 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
3123 if (ngroups > 1)
2d21ac55
A
3124 nfsrv_group_sort(&temp_cred.cr_groups[0], ngroups);
3125 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE)
3126 nfsm_chain_get_32(error, nmreq, len); // verifier length
3127 if (len < 0 || len > RPCAUTH_MAXSIZ)
3128 error = EBADRPC;
3129 if (len > 0)
3130 nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
3131
3132 /* request creation of a real credential */
91447636
A
3133 temp_cred.cr_uid = user_id;
3134 temp_cred.cr_ngroups = ngroups;
2d21ac55 3135 nd->nd_cr = kauth_cred_create(&temp_cred);
91447636
A
3136 if (nd->nd_cr == NULL) {
3137 nd->nd_repstat = ENOMEM;
3138 nd->nd_procnum = NFSPROC_NOOP;
3139 return (0);
3140 }
2d21ac55
A
3141 } else if (auth_type == RPCSEC_GSS) {
3142 error = nfs_gss_svc_cred_get(nd, nmreq);
3143 if (error) {
3144 if (error == EINVAL)
3145 goto nfsmout; // drop the request
3146 nd->nd_repstat = error;
3147 nd->nd_procnum = NFSPROC_NOOP;
3148 return (0);
3149 }
1c79356b 3150 } else {
2d21ac55
A
3151 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE
3152 return (0);
1c79356b
A
3153 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
3154 nd->nd_procnum = NFSPROC_NOOP;
3155 return (0);
3156 }
1c79356b
A
3157 return (0);
3158nfsmout:
0c530ab8
A
3159 if (IS_VALID_CRED(nd->nd_cr))
3160 kauth_cred_unref(&nd->nd_cr);
2d21ac55 3161 nfsm_chain_cleanup(nmreq);
1c79356b
A
3162 return (error);
3163}
3164
3165/*
3166 * Search for a sleeping nfsd and wake it up.
2d21ac55
A
3167 * SIDE EFFECT: If none found, make sure the socket is queued up so that one
3168 * of the running nfsds will go look for the work in the nfsrv_sockwait list.
91447636 3169 * Note: Must be called with nfsd_mutex held.
1c79356b
A
3170 */
3171void
2d21ac55 3172nfsrv_wakenfsd(struct nfsrv_sock *slp)
1c79356b 3173{
91447636 3174 struct nfsd *nd;
1c79356b
A
3175
3176 if ((slp->ns_flag & SLP_VALID) == 0)
3177 return;
91447636
A
3178
3179 lck_rw_lock_exclusive(&slp->ns_rwlock);
2d21ac55
A
3180 /* if there's work to do on this socket, make sure it's queued up */
3181 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) {
3182 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq);
3183 slp->ns_flag |= SLP_WAITQ;
1c79356b 3184 }
91447636
A
3185 lck_rw_done(&slp->ns_rwlock);
3186
2d21ac55
A
3187 /* wake up a waiting nfsd, if possible */
3188 nd = TAILQ_FIRST(&nfsd_queue);
3189 if (!nd)
3190 return;
3191
3192 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue);
3193 nd->nfsd_flag &= ~NFSD_WAITING;
3194 wakeup(nd);
1c79356b 3195}
2d21ac55
A
3196
3197#endif /* NFSSERVER */
1c79356b
A
3198
3199static int
2d21ac55 3200nfs_msg(thread_t thd,
91447636
A
3201 const char *server,
3202 const char *msg,
3203 int error)
1c79356b 3204{
2d21ac55 3205 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL;
1c79356b
A
3206 tpr_t tpr;
3207
3208 if (p)
3209 tpr = tprintf_open(p);
3210 else
3211 tpr = NULL;
55e303ae 3212 if (error)
2d21ac55 3213 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error);
55e303ae
A
3214 else
3215 tprintf(tpr, "nfs server %s: %s\n", server, msg);
1c79356b
A
3216 tprintf_close(tpr);
3217 return (0);
3218}
55e303ae 3219
e5568f75 3220void
2d21ac55 3221nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg)
55e303ae 3222{
cf7d32b8 3223 int ostate, do_vfs_signal;
2d21ac55 3224
e5568f75 3225 if (nmp == NULL)
55e303ae 3226 return;
2d21ac55
A
3227
3228 lck_mtx_lock(&nmp->nm_lock);
3229 ostate = nmp->nm_state;
3230 if ((flags & NFSSTA_TIMEO) && !(ostate & NFSSTA_TIMEO))
e5568f75 3231 nmp->nm_state |= NFSSTA_TIMEO;
2d21ac55 3232 if ((flags & NFSSTA_LOCKTIMEO) && !(ostate & NFSSTA_LOCKTIMEO))
e5568f75 3233 nmp->nm_state |= NFSSTA_LOCKTIMEO;
2d21ac55
A
3234 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(ostate & NFSSTA_JUKEBOXTIMEO))
3235 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO;
3236 lck_mtx_unlock(&nmp->nm_lock);
3237
cf7d32b8
A
3238 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
3239 if ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE))
3240 do_vfs_signal = 0;
3241 else
3242 do_vfs_signal = !(ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO));
3243 if (do_vfs_signal)
2d21ac55
A
3244 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 0);
3245
3246 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
55e303ae
A
3247}
3248
e5568f75 3249void
2d21ac55 3250nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
55e303ae 3251{
cf7d32b8 3252 int ostate, state, do_vfs_signal;
2d21ac55 3253
e5568f75 3254 if (nmp == NULL)
55e303ae 3255 return;
2d21ac55 3256
91447636 3257 if (msg)
2d21ac55
A
3258 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
3259
3260 lck_mtx_lock(&nmp->nm_lock);
3261 ostate = nmp->nm_state;
3262 if ((flags & NFSSTA_TIMEO) && (ostate & NFSSTA_TIMEO))
e5568f75 3263 nmp->nm_state &= ~NFSSTA_TIMEO;
2d21ac55 3264 if ((flags & NFSSTA_LOCKTIMEO) && (ostate & NFSSTA_LOCKTIMEO))
e5568f75 3265 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
2d21ac55
A
3266 if ((flags & NFSSTA_JUKEBOXTIMEO) && (ostate & NFSSTA_JUKEBOXTIMEO))
3267 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO;
3268 state = nmp->nm_state;
3269 lck_mtx_unlock(&nmp->nm_lock);
3270
cf7d32b8
A
3271 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
3272 if ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE))
3273 do_vfs_signal = 0;
3274 else
3275 do_vfs_signal = (ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)) &&
3276 !(state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO));
3277 if (do_vfs_signal)
2d21ac55 3278 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
55e303ae 3279}
e5568f75 3280