]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
xnu-792.10.96.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1989, 1991, 1993, 1995
25 * The Regents of the University of California. All rights reserved.
26 *
27 * This code is derived from software contributed to Berkeley by
28 * Rick Macklem at The University of Guelph.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
59 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
60 */
61
62 /*
63 * Socket operations for use by nfs
64 */
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/kauth.h>
70 #include <sys/mount_internal.h>
71 #include <sys/kernel.h>
72 #include <sys/kpi_mbuf.h>
73 #include <sys/malloc.h>
74 #include <sys/vnode.h>
75 #include <sys/domain.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/syslog.h>
79 #include <sys/tprintf.h>
80 #include <sys/uio_internal.h>
81 #include <libkern/OSAtomic.h>
82
83 #include <sys/time.h>
84 #include <kern/clock.h>
85 #include <kern/task.h>
86 #include <kern/thread.h>
87 #include <sys/user.h>
88
89 #include <netinet/in.h>
90 #include <netinet/tcp.h>
91
92 #include <nfs/rpcv2.h>
93 #include <nfs/nfsproto.h>
94 #include <nfs/nfs.h>
95 #include <nfs/xdr_subs.h>
96 #include <nfs/nfsm_subs.h>
97 #include <nfs/nfsmount.h>
98 #include <nfs/nfsnode.h>
99 #include <nfs/nfsrtt.h>
100
101 #include <sys/kdebug.h>
102
103 #define FSDBG(A, B, C, D, E) \
104 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
105 (int)(B), (int)(C), (int)(D), (int)(E), 0)
106 #define FSDBG_TOP(A, B, C, D, E) \
107 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
108 (int)(B), (int)(C), (int)(D), (int)(E), 0)
109 #define FSDBG_BOT(A, B, C, D, E) \
110 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
111 (int)(B), (int)(C), (int)(D), (int)(E), 0)
112
113 /*
114 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
115 * Use the mean and mean deviation of rtt for the appropriate type of rpc
116 * for the frequent rpcs and a default for the others.
117 * The justification for doing "other" this way is that these rpcs
118 * happen so infrequently that timer est. would probably be stale.
119 * Also, since many of these rpcs are
120 * non-idempotent, a conservative timeout is desired.
121 * getattr, lookup - A+2D
122 * read, write - A+4D
123 * other - nm_timeo
124 */
125 #define NFS_RTO(n, t) \
126 ((t) == 0 ? (n)->nm_timeo : \
127 ((t) < 3 ? \
128 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
129 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
130 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
131 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
132 /*
133 * External data, mostly RPC constants in XDR form
134 */
135 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
136 rpc_msgaccepted, rpc_call, rpc_autherr,
137 rpc_auth_kerb;
138 extern u_long nfs_prog;
139 extern struct nfsstats nfsstats;
140 extern int nfsv3_procid[NFS_NPROCS];
141 extern int nfs_ticks;
142 extern u_long nfs_xidwrap;
143
144 /*
145 * Defines which timer to use for the procnum.
146 * 0 - default
147 * 1 - getattr
148 * 2 - lookup
149 * 3 - read
150 * 4 - write
151 */
152 static int proct[NFS_NPROCS] = {
153 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
154 };
155
156 /*
157 * There is a congestion window for outstanding rpcs maintained per mount
158 * point. The cwnd size is adjusted in roughly the way that:
159 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
160 * SIGCOMM '88". ACM, August 1988.
161 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
162 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
163 * of rpcs is in progress.
164 * (The sent count and cwnd are scaled for integer arith.)
165 * Variants of "slow start" were tried and were found to be too much of a
166 * performance hit (ave. rtt 3 times larger),
167 * I suspect due to the large rtt that nfs rpcs have.
168 */
169 #define NFS_CWNDSCALE 256
170 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
171 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
172 int nfsrtton = 0;
173 struct nfsrtt nfsrtt;
174
175 static int nfs_rcvlock(struct nfsreq *);
176 static void nfs_rcvunlock(struct nfsreq *);
177 static int nfs_receive(struct nfsreq *rep, mbuf_t *mp);
178 static int nfs_reconnect(struct nfsreq *rep);
179 static void nfs_repdequeue(struct nfsreq *rep);
180
181 /* XXX */
182 boolean_t current_thread_aborted(void);
183 kern_return_t thread_terminate(thread_t);
184
185 #ifndef NFS_NOSERVER
186 static int nfsrv_getstream(struct nfssvc_sock *,int);
187
188 int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
189 struct nfssvc_sock *slp,
190 proc_t procp,
191 mbuf_t *mreqp) = {
192 nfsrv_null,
193 nfsrv_getattr,
194 nfsrv_setattr,
195 nfsrv_lookup,
196 nfsrv3_access,
197 nfsrv_readlink,
198 nfsrv_read,
199 nfsrv_write,
200 nfsrv_create,
201 nfsrv_mkdir,
202 nfsrv_symlink,
203 nfsrv_mknod,
204 nfsrv_remove,
205 nfsrv_rmdir,
206 nfsrv_rename,
207 nfsrv_link,
208 nfsrv_readdir,
209 nfsrv_readdirplus,
210 nfsrv_statfs,
211 nfsrv_fsinfo,
212 nfsrv_pathconf,
213 nfsrv_commit,
214 nfsrv_noop
215 };
216 #endif /* NFS_NOSERVER */
217
218
219 /*
220 * attempt to bind a socket to a reserved port
221 */
222 static int
223 nfs_bind_resv(struct nfsmount *nmp)
224 {
225 socket_t so = nmp->nm_so;
226 struct sockaddr_in sin;
227 int error;
228 u_short tport;
229
230 if (!so)
231 return (EINVAL);
232
233 sin.sin_len = sizeof (struct sockaddr_in);
234 sin.sin_family = AF_INET;
235 sin.sin_addr.s_addr = INADDR_ANY;
236 tport = IPPORT_RESERVED - 1;
237 sin.sin_port = htons(tport);
238
239 while (((error = sock_bind(so, (struct sockaddr *) &sin)) == EADDRINUSE) &&
240 (--tport > IPPORT_RESERVED / 2))
241 sin.sin_port = htons(tport);
242 return (error);
243 }
244
245 /*
246 * variables for managing the nfs_bind_resv_thread
247 */
248 int nfs_resv_mounts = 0;
249 static int nfs_bind_resv_thread_state = 0;
250 #define NFS_BIND_RESV_THREAD_STATE_INITTED 1
251 #define NFS_BIND_RESV_THREAD_STATE_RUNNING 2
252 lck_grp_t *nfs_bind_resv_lck_grp;
253 lck_grp_attr_t *nfs_bind_resv_lck_grp_attr;
254 lck_attr_t *nfs_bind_resv_lck_attr;
255 lck_mtx_t *nfs_bind_resv_mutex;
256 struct nfs_bind_resv_request {
257 TAILQ_ENTRY(nfs_bind_resv_request) brr_chain;
258 struct nfsmount *brr_nmp;
259 int brr_error;
260 };
261 static TAILQ_HEAD(, nfs_bind_resv_request) nfs_bind_resv_request_queue;
262
263 /*
264 * thread to handle any reserved port bind requests
265 */
266 static void
267 nfs_bind_resv_thread(void)
268 {
269 struct nfs_bind_resv_request *brreq;
270
271 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
272
273 while (nfs_resv_mounts > 0) {
274 lck_mtx_lock(nfs_bind_resv_mutex);
275 while ((brreq = TAILQ_FIRST(&nfs_bind_resv_request_queue))) {
276 TAILQ_REMOVE(&nfs_bind_resv_request_queue, brreq, brr_chain);
277 lck_mtx_unlock(nfs_bind_resv_mutex);
278 brreq->brr_error = nfs_bind_resv(brreq->brr_nmp);
279 wakeup(brreq);
280 lck_mtx_lock(nfs_bind_resv_mutex);
281 }
282 msleep((caddr_t)&nfs_bind_resv_request_queue,
283 nfs_bind_resv_mutex, PSOCK | PDROP,
284 "nfs_bind_resv_request_queue", 0);
285 }
286
287 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
288 (void) thread_terminate(current_thread());
289 }
290
291 int
292 nfs_bind_resv_thread_wake(void)
293 {
294 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING)
295 return (EIO);
296 wakeup(&nfs_bind_resv_request_queue);
297 return (0);
298 }
299
300 /*
301 * underprivileged procs call this to request nfs_bind_resv_thread
302 * to perform the reserved port binding for them.
303 */
304 static int
305 nfs_bind_resv_nopriv(struct nfsmount *nmp)
306 {
307 struct nfs_bind_resv_request brreq;
308 int error;
309
310 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING) {
311 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_INITTED) {
312 nfs_bind_resv_lck_grp_attr = lck_grp_attr_alloc_init();
313 nfs_bind_resv_lck_grp = lck_grp_alloc_init("nfs_bind_resv", nfs_bind_resv_lck_grp_attr);
314 nfs_bind_resv_lck_attr = lck_attr_alloc_init();
315 nfs_bind_resv_mutex = lck_mtx_alloc_init(nfs_bind_resv_lck_grp, nfs_bind_resv_lck_attr);
316 TAILQ_INIT(&nfs_bind_resv_request_queue);
317 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
318 }
319 kernel_thread(kernel_task, nfs_bind_resv_thread);
320 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
321 }
322
323 brreq.brr_nmp = nmp;
324 brreq.brr_error = 0;
325
326 lck_mtx_lock(nfs_bind_resv_mutex);
327 TAILQ_INSERT_TAIL(&nfs_bind_resv_request_queue, &brreq, brr_chain);
328 lck_mtx_unlock(nfs_bind_resv_mutex);
329
330 error = nfs_bind_resv_thread_wake();
331 if (error) {
332 TAILQ_REMOVE(&nfs_bind_resv_request_queue, &brreq, brr_chain);
333 /* Note: we might be able to simply restart the thread */
334 return (error);
335 }
336
337 tsleep((caddr_t)&brreq, PSOCK, "nfsbindresv", 0);
338
339 return (brreq.brr_error);
340 }
341
342 /*
343 * Initialize sockets and congestion for a new NFS connection.
344 * We do not free the sockaddr if error.
345 */
346 int
347 nfs_connect(
348 struct nfsmount *nmp,
349 __unused struct nfsreq *rep)
350 {
351 socket_t so;
352 int error, rcvreserve, sndreserve;
353 struct sockaddr *saddr;
354 struct timeval timeo;
355
356 nmp->nm_so = 0;
357 saddr = mbuf_data(nmp->nm_nam);
358 error = sock_socket(saddr->sa_family, nmp->nm_sotype,
359 nmp->nm_soproto, 0, 0, &nmp->nm_so);
360 if (error) {
361 goto bad;
362 }
363 so = nmp->nm_so;
364
365 /*
366 * Some servers require that the client port be a reserved port number.
367 */
368 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
369 proc_t p;
370 /*
371 * sobind() requires current_proc() to have superuser privs.
372 * If this bind is part of a reconnect, and the current proc
373 * doesn't have superuser privs, we hand the sobind() off to
374 * a kernel thread to process.
375 */
376 if ((nmp->nm_state & NFSSTA_MOUNTED) &&
377 (p = current_proc()) && suser(kauth_cred_get(), 0)) {
378 /* request nfs_bind_resv_thread() to do bind */
379 error = nfs_bind_resv_nopriv(nmp);
380 } else {
381 error = nfs_bind_resv(nmp);
382 }
383 if (error)
384 goto bad;
385 }
386
387 /*
388 * Protocols that do not require connections may be optionally left
389 * unconnected for servers that reply from a port other than NFS_PORT.
390 */
391 if (nmp->nm_flag & NFSMNT_NOCONN) {
392 if (nmp->nm_sotype == SOCK_STREAM) {
393 error = ENOTCONN;
394 goto bad;
395 }
396 } else {
397 struct timeval tv;
398 tv.tv_sec = 2;
399 tv.tv_usec = 0;
400 error = sock_connect(so, mbuf_data(nmp->nm_nam), MSG_DONTWAIT);
401 if (error && error != EINPROGRESS) {
402 goto bad;
403 }
404
405 while ((error = sock_connectwait(so, &tv)) == EINPROGRESS) {
406 if (rep && (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
407 goto bad;
408 }
409 }
410 }
411
412 /*
413 * Always time out on recieve, this allows us to reconnect the
414 * socket to deal with network changes.
415 */
416 timeo.tv_usec = 0;
417 timeo.tv_sec = 2;
418 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
419 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
420 timeo.tv_sec = 5;
421 } else {
422 timeo.tv_sec = 0;
423 }
424 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
425
426 if (nmp->nm_sotype == SOCK_DGRAM) {
427 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
428 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
429 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
430 } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
431 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
432 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
433 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
434 } else {
435 int proto;
436 int on = 1;
437
438 sock_gettype(so, NULL, NULL, &proto);
439 if (nmp->nm_sotype != SOCK_STREAM)
440 panic("nfscon sotype");
441
442 // Assume that SOCK_STREAM always requires a connection
443 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
444
445 if (proto == IPPROTO_TCP) {
446 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
447 }
448
449 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 3;
450 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) *
451 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
452 }
453
454 if (sndreserve > NFS_MAXSOCKBUF)
455 sndreserve = NFS_MAXSOCKBUF;
456 if (rcvreserve > NFS_MAXSOCKBUF)
457 rcvreserve = NFS_MAXSOCKBUF;
458 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &sndreserve, sizeof(sndreserve));
459 if (error) {
460 goto bad;
461 }
462 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &rcvreserve, sizeof(rcvreserve));
463 if (error) {
464 goto bad;
465 }
466
467 sock_nointerrupt(so, 1);
468
469 /* Initialize other non-zero congestion variables */
470 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
471 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
472 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
473 nmp->nm_sdrtt[3] = 0;
474 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
475 nmp->nm_sent = 0;
476 FSDBG(529, nmp, nmp->nm_state, nmp->nm_soflags, nmp->nm_cwnd);
477 nmp->nm_timeouts = 0;
478 return (0);
479
480 bad:
481 nfs_disconnect(nmp);
482 return (error);
483 }
484
485 /*
486 * Reconnect routine:
487 * Called when a connection is broken on a reliable protocol.
488 * - clean up the old socket
489 * - nfs_connect() again
490 * - set R_MUSTRESEND for all outstanding requests on mount point
491 * If this fails the mount point is DEAD!
492 * nb: Must be called with the nfs_sndlock() set on the mount point.
493 */
494 static int
495 nfs_reconnect(struct nfsreq *rep)
496 {
497 struct nfsreq *rp;
498 struct nfsmount *nmp = rep->r_nmp;
499 int error;
500
501 nfs_disconnect(nmp);
502 while ((error = nfs_connect(nmp, rep))) {
503 if (error == EINTR || error == ERESTART)
504 return (EINTR);
505 if (error == EIO)
506 return (EIO);
507 nfs_down(rep->r_nmp, rep->r_procp, error, NFSSTA_TIMEO,
508 "can not connect");
509 rep->r_flags |= R_TPRINTFMSG;
510 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
511 /* we're not yet completely mounted and */
512 /* we can't reconnect, so we fail */
513 return (error);
514 }
515 if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp)))
516 return (error);
517 tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
518 }
519
520 /*
521 * Loop through outstanding request list and fix up all requests
522 * on old socket.
523 */
524 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
525 if (rp->r_nmp == nmp)
526 rp->r_flags |= R_MUSTRESEND;
527 }
528 return (0);
529 }
530
531 /*
532 * NFS disconnect. Clean up and unlink.
533 */
534 void
535 nfs_disconnect(struct nfsmount *nmp)
536 {
537 socket_t so;
538
539 if (nmp->nm_so) {
540 so = nmp->nm_so;
541 nmp->nm_so = 0;
542 sock_shutdown(so, 2);
543 sock_close(so);
544 }
545 }
546
547 /*
548 * This is the nfs send routine. For connection based socket types, it
549 * must be called with an nfs_sndlock() on the socket.
550 * "rep == NULL" indicates that it has been called from a server.
551 * For the client side:
552 * - return EINTR if the RPC is terminated, 0 otherwise
553 * - set R_MUSTRESEND if the send fails for any reason
554 * - do any cleanup required by recoverable socket errors (???)
555 * For the server side:
556 * - return EINTR or ERESTART if interrupted by a signal
557 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
558 * - do any cleanup required by recoverable socket errors (???)
559 */
560 int
561 nfs_send(so, nam, top, rep)
562 socket_t so;
563 mbuf_t nam;
564 mbuf_t top;
565 struct nfsreq *rep;
566 {
567 struct sockaddr *sendnam;
568 int error, error2, sotype, flags;
569 u_long xidqueued = 0;
570 struct nfsreq *rp;
571 char savenametolog[MAXPATHLEN];
572 struct msghdr msg;
573
574 if (rep) {
575 error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
576 if (error) {
577 mbuf_freem(top);
578 return (error);
579 }
580 if ((so = rep->r_nmp->nm_so) == NULL) {
581 rep->r_flags |= R_MUSTRESEND;
582 mbuf_freem(top);
583 return (0);
584 }
585 rep->r_flags &= ~R_MUSTRESEND;
586 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
587 if (rp == rep)
588 break;
589 if (rp)
590 xidqueued = rp->r_xid;
591 }
592 sock_gettype(so, NULL, &sotype, NULL);
593 if ((sotype == SOCK_STREAM) || (sock_isconnected(so)) ||
594 (nam == 0))
595 sendnam = (struct sockaddr *)0;
596 else
597 sendnam = mbuf_data(nam);
598
599 if (sotype == SOCK_SEQPACKET)
600 flags = MSG_EOR;
601 else
602 flags = 0;
603
604 /*
605 * Save the name here in case mount point goes away if we block.
606 * The name is using local stack and is large, but don't
607 * want to block if we malloc.
608 */
609 if (rep)
610 strncpy(savenametolog,
611 vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname,
612 MAXPATHLEN - 1);
613 bzero(&msg, sizeof(msg));
614 msg.msg_name = (caddr_t)sendnam;
615 msg.msg_namelen = sendnam == 0 ? 0 : sendnam->sa_len;
616 error = sock_sendmbuf(so, &msg, top, flags, NULL);
617
618 if (error) {
619 if (rep) {
620 if (xidqueued) {
621 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
622 if (rp == rep && rp->r_xid == xidqueued)
623 break;
624 if (!rp)
625 panic("nfs_send: error %d xid %x gone",
626 error, xidqueued);
627 }
628 log(LOG_INFO, "nfs send error %d for server %s\n",
629 error, savenametolog);
630 /*
631 * Deal with errors for the client side.
632 */
633 error2 = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
634 if (error2) {
635 error = error2;
636 } else {
637 rep->r_flags |= R_MUSTRESEND;
638 }
639 } else
640 log(LOG_INFO, "nfsd send error %d\n", error);
641
642 /*
643 * Handle any recoverable (soft) socket errors here. (???)
644 */
645 if (error != EINTR && error != ERESTART && error != EIO &&
646 error != EWOULDBLOCK && error != EPIPE) {
647 error = 0;
648 }
649 }
650 return (error);
651 }
652
653 /*
654 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
655 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
656 * Mark and consolidate the data into a new mbuf list.
657 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
658 * small mbufs.
659 * For SOCK_STREAM we must be very careful to read an entire record once
660 * we have read any of it, even if the system call has been interrupted.
661 */
662 static int
663 nfs_receive(struct nfsreq *rep, mbuf_t *mp)
664 {
665 socket_t so;
666 struct iovec_32 aio;
667 mbuf_t m, mlast;
668 u_long len, fraglen;
669 int error, error2, sotype;
670 proc_t p = current_proc(); /* XXX */
671 struct msghdr msg;
672 size_t rcvlen;
673 int lastfragment;
674
675 /*
676 * Set up arguments for soreceive()
677 */
678 *mp = NULL;
679 sotype = rep->r_nmp->nm_sotype;
680
681 /*
682 * For reliable protocols, lock against other senders/receivers
683 * in case a reconnect is necessary.
684 * For SOCK_STREAM, first get the Record Mark to find out how much
685 * more there is to get.
686 * We must lock the socket against other receivers
687 * until we have an entire rpc request/reply.
688 */
689 if (sotype != SOCK_DGRAM) {
690 error = nfs_sndlock(rep);
691 if (error)
692 return (error);
693 tryagain:
694 /*
695 * Check for fatal errors and resending request.
696 */
697 /*
698 * Ugh: If a reconnect attempt just happened, nm_so
699 * would have changed. NULL indicates a failed
700 * attempt that has essentially shut down this
701 * mount point.
702 */
703 if ((error = nfs_sigintr(rep->r_nmp, rep, p)) || rep->r_mrep) {
704 nfs_sndunlock(rep);
705 if (error)
706 return (error);
707 return (EINTR);
708 }
709 so = rep->r_nmp->nm_so;
710 if (!so) {
711 error = nfs_reconnect(rep);
712 if (error) {
713 nfs_sndunlock(rep);
714 return (error);
715 }
716 goto tryagain;
717 }
718 while (rep->r_flags & R_MUSTRESEND) {
719 error = mbuf_copym(rep->r_mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m);
720 if (!error) {
721 OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries);
722 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
723 }
724 /*
725 * we also hold rcv lock so rep is still
726 * legit this point
727 */
728 if (error) {
729 if (error == EINTR || error == ERESTART ||
730 (error = nfs_reconnect(rep))) {
731 nfs_sndunlock(rep);
732 return (error);
733 }
734 goto tryagain;
735 }
736 }
737 nfs_sndunlock(rep);
738 if (sotype == SOCK_STREAM) {
739 error = 0;
740 len = 0;
741 lastfragment = 0;
742 mlast = NULL;
743 while (!error && !lastfragment) {
744 aio.iov_base = (uintptr_t) &fraglen;
745 aio.iov_len = sizeof(u_long);
746 bzero(&msg, sizeof(msg));
747 msg.msg_iov = (struct iovec *) &aio;
748 msg.msg_iovlen = 1;
749 do {
750 error = sock_receive(so, &msg, MSG_WAITALL, &rcvlen);
751 if (!rep->r_nmp) /* if unmounted then bailout */
752 goto shutout;
753 if (error == EWOULDBLOCK && rep) {
754 error2 = nfs_sigintr(rep->r_nmp, rep, p);
755 if (error2)
756 error = error2;
757 }
758 } while (error == EWOULDBLOCK);
759 if (!error && rcvlen < aio.iov_len) {
760 /* only log a message if we got a partial word */
761 if (rcvlen != 0)
762 log(LOG_INFO,
763 "short receive (%d/%d) from nfs server %s\n",
764 rcvlen, sizeof(u_long),
765 vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname);
766 error = EPIPE;
767 }
768 if (error)
769 goto errout;
770 lastfragment = ntohl(fraglen) & 0x80000000;
771 fraglen = ntohl(fraglen) & ~0x80000000;
772 len += fraglen;
773 /*
774 * This is SERIOUS! We are out of sync with the sender
775 * and forcing a disconnect/reconnect is all I can do.
776 */
777 if (len > NFS_MAXPACKET) {
778 log(LOG_ERR, "%s (%d) from nfs server %s\n",
779 "impossible RPC record length", len,
780 vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname);
781 error = EFBIG;
782 goto errout;
783 }
784
785 m = NULL;
786 do {
787 rcvlen = fraglen;
788 error = sock_receivembuf(so, NULL, &m, MSG_WAITALL, &rcvlen);
789 if (!rep->r_nmp) /* if unmounted then bailout */ {
790 goto shutout;
791 }
792 } while (error == EWOULDBLOCK || error == EINTR ||
793 error == ERESTART);
794
795 if (!error && fraglen > rcvlen) {
796 log(LOG_INFO,
797 "short receive (%d/%d) from nfs server %s\n",
798 rcvlen, fraglen,
799 vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname);
800 error = EPIPE;
801 mbuf_freem(m);
802 }
803 if (!error) {
804 if (!*mp) {
805 *mp = m;
806 mlast = m;
807 } else {
808 error = mbuf_setnext(mlast, m);
809 if (error) {
810 printf("nfs_receive: mbuf_setnext failed %d\n", error);
811 mbuf_freem(m);
812 }
813 }
814 while (mbuf_next(mlast))
815 mlast = mbuf_next(mlast);
816 }
817 }
818 } else {
819 bzero(&msg, sizeof(msg));
820 do {
821 rcvlen = 100000000;
822 error = sock_receivembuf(so, &msg, mp, 0, &rcvlen);
823 if (!rep->r_nmp) /* if unmounted then bailout */ {
824 goto shutout;
825 }
826 if (error == EWOULDBLOCK && rep) {
827 error2 = nfs_sigintr(rep->r_nmp, rep, p);
828 if (error2) {
829 return (error2);
830 }
831 }
832 } while (error == EWOULDBLOCK);
833
834 if ((msg.msg_flags & MSG_EOR) == 0)
835 printf("Egad!!\n");
836 if (!error && *mp == NULL)
837 error = EPIPE;
838 len = rcvlen;
839 }
840 errout:
841 if (error && error != EINTR && error != ERESTART) {
842 mbuf_freem(*mp);
843 *mp = NULL;
844 if (error != EPIPE)
845 log(LOG_INFO,
846 "receive error %d from nfs server %s\n", error,
847 vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname);
848 error = nfs_sndlock(rep);
849 if (!error) {
850 error = nfs_reconnect(rep);
851 if (!error)
852 goto tryagain;
853 nfs_sndunlock(rep);
854 }
855 }
856 } else {
857 /*
858 * We could have failed while rebinding the datagram socket
859 * so we need to attempt to rebind here.
860 */
861 if ((so = rep->r_nmp->nm_so) == NULL) {
862 error = nfs_sndlock(rep);
863 if (!error) {
864 error = nfs_reconnect(rep);
865 nfs_sndunlock(rep);
866 }
867 if (error)
868 return (error);
869 if (!rep->r_nmp) /* if unmounted then bailout */
870 return (ENXIO);
871 so = rep->r_nmp->nm_so;
872 }
873 bzero(&msg, sizeof(msg));
874 len = 0;
875 do {
876 rcvlen = 1000000;
877 error = sock_receivembuf(so, &msg, mp, 0, &rcvlen);
878 if (!rep->r_nmp) /* if unmounted then bailout */
879 goto shutout;
880 if (error) {
881 error2 = nfs_sigintr(rep->r_nmp, rep, p);
882 if (error2) {
883 error = error2;
884 goto shutout;
885 }
886 }
887 /* Reconnect for all errors. We may be receiving
888 * soft/hard/blocking errors because of a network
889 * change.
890 * XXX: we should rate limit or delay this
891 * to once every N attempts or something.
892 * although TCP doesn't seem to.
893 */
894 if (error) {
895 error2 = nfs_sndlock(rep);
896 if (!error2) {
897 error2 = nfs_reconnect(rep);
898 if (error2)
899 error = error2;
900 else if (!rep->r_nmp) /* if unmounted then bailout */
901 error = ENXIO;
902 else
903 so = rep->r_nmp->nm_so;
904 nfs_sndunlock(rep);
905 } else {
906 error = error2;
907 }
908 }
909 } while (error == EWOULDBLOCK);
910 }
911 shutout:
912 if (error) {
913 mbuf_freem(*mp);
914 *mp = NULL;
915 }
916 return (error);
917 }
918
919 /*
920 * Implement receipt of reply on a socket.
921 * We must search through the list of received datagrams matching them
922 * with outstanding requests using the xid, until ours is found.
923 */
924 /* ARGSUSED */
925 int
926 nfs_reply(myrep)
927 struct nfsreq *myrep;
928 {
929 struct nfsreq *rep;
930 struct nfsmount *nmp = myrep->r_nmp;
931 long t1;
932 mbuf_t mrep, md;
933 u_long rxid, *tl;
934 caddr_t dpos, cp2;
935 int error;
936
937 /*
938 * Loop around until we get our own reply
939 */
940 for (;;) {
941 /*
942 * Lock against other receivers so that I don't get stuck in
943 * sbwait() after someone else has received my reply for me.
944 * Also necessary for connection based protocols to avoid
945 * race conditions during a reconnect.
946 * If nfs_rcvlock() returns EALREADY, that means that
947 * the reply has already been recieved by another
948 * process and we can return immediately. In this
949 * case, the lock is not taken to avoid races with
950 * other processes.
951 */
952 error = nfs_rcvlock(myrep);
953 if (error == EALREADY)
954 return (0);
955 if (error)
956 return (error);
957
958 /*
959 * If we slept after putting bits otw, then reply may have
960 * arrived. In which case returning is required, or we
961 * would hang trying to nfs_receive an already received reply.
962 */
963 if (myrep->r_mrep != NULL) {
964 nfs_rcvunlock(myrep);
965 FSDBG(530, myrep->r_xid, myrep, myrep->r_nmp, -1);
966 return (0);
967 }
968 /*
969 * Get the next Rpc reply off the socket. Assume myrep->r_nmp
970 * is still intact by checks done in nfs_rcvlock.
971 */
972 error = nfs_receive(myrep, &mrep);
973 /*
974 * Bailout asap if nfsmount struct gone (unmounted).
975 */
976 if (!myrep->r_nmp) {
977 FSDBG(530, myrep->r_xid, myrep, nmp, -2);
978 if (mrep)
979 mbuf_freem(mrep);
980 return (ENXIO);
981 }
982 if (error) {
983 FSDBG(530, myrep->r_xid, myrep, nmp, error);
984 nfs_rcvunlock(myrep);
985
986 /* Bailout asap if nfsmount struct gone (unmounted). */
987 if (!myrep->r_nmp) {
988 if (mrep)
989 mbuf_freem(mrep);
990 return (ENXIO);
991 }
992
993 /*
994 * Ignore routing errors on connectionless protocols??
995 */
996 if (NFSIGNORE_SOERROR(nmp->nm_sotype, error)) {
997 if (nmp->nm_so) {
998 int clearerror;
999 int optlen = sizeof(clearerror);
1000 sock_getsockopt(nmp->nm_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
1001 }
1002 continue;
1003 }
1004 if (mrep)
1005 mbuf_freem(mrep);
1006 return (error);
1007 }
1008
1009 /*
1010 * We assume all is fine, but if we did not have an error
1011 * and mrep is 0, better not dereference it. nfs_receive
1012 * calls soreceive which carefully sets error=0 when it got
1013 * errors on sbwait (tsleep). In most cases, I assume that's
1014 * so we could go back again. In tcp case, EPIPE is returned.
1015 * In udp, case nfs_receive gets back here with no error and no
1016 * mrep. Is the right fix to have soreceive check for process
1017 * aborted after sbwait and return something non-zero? Should
1018 * nfs_receive give an EPIPE? Too risky to play with those
1019 * two this late in game for a shutdown problem. Instead,
1020 * just check here and get out. (ekn)
1021 */
1022 if (!mrep) {
1023 nfs_rcvunlock(myrep);
1024 FSDBG(530, myrep->r_xid, myrep, nmp, -3);
1025 return (ENXIO); /* sounds good */
1026 }
1027
1028 /*
1029 * Get the xid and check that it is an rpc reply
1030 */
1031 md = mrep;
1032 dpos = mbuf_data(md);
1033 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
1034 rxid = *tl++;
1035 if (*tl != rpc_reply) {
1036 OSAddAtomic(1, (SInt32*)&nfsstats.rpcinvalid);
1037 mbuf_freem(mrep);
1038 nfsmout:
1039 if (nmp->nm_state & NFSSTA_RCVLOCK)
1040 nfs_rcvunlock(myrep);
1041 continue;
1042 }
1043
1044 /*
1045 * Loop through the request list to match up the reply
1046 * Iff no match, just drop the datagram
1047 */
1048 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
1049 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
1050 /* Found it.. */
1051 rep->r_mrep = mrep;
1052 rep->r_md = md;
1053 rep->r_dpos = dpos;
1054 /*
1055 * If we're tracking the round trip time
1056 * then we update the circular log here
1057 * with the stats from our current request.
1058 */
1059 if (nfsrtton) {
1060 struct rttl *rt;
1061
1062 rt = &nfsrtt.rttl[nfsrtt.pos];
1063 rt->proc = rep->r_procnum;
1064 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
1065 rt->sent = nmp->nm_sent;
1066 rt->cwnd = nmp->nm_cwnd;
1067 if (proct[rep->r_procnum] == 0)
1068 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1069 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
1070 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
1071 rt->fsid = vfs_statfs(nmp->nm_mountp)->f_fsid;
1072 microtime(&rt->tstamp); // XXX unused
1073 if (rep->r_flags & R_TIMING)
1074 rt->rtt = rep->r_rtt;
1075 else
1076 rt->rtt = 1000000;
1077 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
1078 }
1079 /*
1080 * Update congestion window.
1081 * Do the additive increase of
1082 * one rpc/rtt.
1083 */
1084 FSDBG(530, rep->r_xid, rep, nmp->nm_sent,
1085 nmp->nm_cwnd);
1086 if (nmp->nm_cwnd <= nmp->nm_sent) {
1087 nmp->nm_cwnd +=
1088 (NFS_CWNDSCALE * NFS_CWNDSCALE +
1089 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
1090 if (nmp->nm_cwnd > NFS_MAXCWND)
1091 nmp->nm_cwnd = NFS_MAXCWND;
1092 }
1093 if (rep->r_flags & R_SENT) {
1094 rep->r_flags &= ~R_SENT;
1095 nmp->nm_sent -= NFS_CWNDSCALE;
1096 }
1097 /*
1098 * Update rtt using a gain of 0.125 on the mean
1099 * and a gain of 0.25 on the deviation.
1100 */
1101 if (rep->r_flags & R_TIMING) {
1102 /*
1103 * Since the timer resolution of
1104 * NFS_HZ is so course, it can often
1105 * result in r_rtt == 0. Since
1106 * r_rtt == N means that the actual
1107 * rtt is between N+dt and N+2-dt ticks,
1108 * add 1.
1109 */
1110 if (proct[rep->r_procnum] == 0)
1111 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1112 t1 = rep->r_rtt + 1;
1113 t1 -= (NFS_SRTT(rep) >> 3);
1114 NFS_SRTT(rep) += t1;
1115 if (t1 < 0)
1116 t1 = -t1;
1117 t1 -= (NFS_SDRTT(rep) >> 2);
1118 NFS_SDRTT(rep) += t1;
1119 }
1120 nmp->nm_timeouts = 0;
1121 break;
1122 }
1123 }
1124 nfs_rcvunlock(myrep);
1125 /*
1126 * If not matched to a request, drop it.
1127 * If it's mine, get out.
1128 */
1129 if (rep == 0) {
1130 OSAddAtomic(1, (SInt32*)&nfsstats.rpcunexpected);
1131 mbuf_freem(mrep);
1132 } else if (rep == myrep) {
1133 if (rep->r_mrep == NULL)
1134 panic("nfs_reply: nil r_mrep");
1135 return (0);
1136 }
1137 FSDBG(530, myrep->r_xid, myrep, rep,
1138 rep ? rep->r_xid : myrep->r_flags);
1139 }
1140 }
1141
1142 /*
1143 * nfs_request - goes something like this
1144 * - fill in request struct
1145 * - links it into list
1146 * - calls nfs_send() for first transmit
1147 * - calls nfs_receive() to get reply
1148 * - break down rpc header and return with nfs reply pointed to
1149 * by mrep or error
1150 * nb: always frees up mreq mbuf list
1151 */
1152 int
1153 nfs_request(vp, mp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp)
1154 vnode_t vp;
1155 mount_t mp;
1156 mbuf_t mrest;
1157 int procnum;
1158 proc_t procp;
1159 kauth_cred_t cred;
1160 mbuf_t *mrp;
1161 mbuf_t *mdp;
1162 caddr_t *dposp;
1163 u_int64_t *xidp;
1164 {
1165 mbuf_t m, mrep, m2;
1166 struct nfsreq re, *rep;
1167 u_long *tl;
1168 int i;
1169 struct nfsmount *nmp;
1170 mbuf_t md, mheadend;
1171 char nickv[RPCX_NICKVERF];
1172 time_t waituntil;
1173 caddr_t dpos, cp2;
1174 int t1, error = 0, mrest_len, auth_len, auth_type;
1175 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0;
1176 int verf_len, verf_type;
1177 u_long xid;
1178 char *auth_str, *verf_str;
1179 NFSKERBKEY_T key; /* save session key */
1180 int nmsotype;
1181 struct timeval now;
1182
1183 if (mrp)
1184 *mrp = NULL;
1185 if (xidp)
1186 *xidp = 0;
1187 nmp = VFSTONFS(mp);
1188
1189 rep = &re;
1190
1191 if (vp)
1192 nmp = VFSTONFS(vnode_mount(vp));
1193 if (nmp == NULL ||
1194 (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1195 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
1196 mbuf_freem(mrest);
1197 return (ENXIO);
1198 }
1199 nmsotype = nmp->nm_sotype;
1200
1201 FSDBG_TOP(531, vp, procnum, nmp, rep);
1202
1203 rep->r_nmp = nmp;
1204 rep->r_vp = vp;
1205 rep->r_procp = procp;
1206 rep->r_procnum = procnum;
1207 microuptime(&now);
1208 rep->r_lastmsg = now.tv_sec -
1209 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
1210 i = 0;
1211 m = mrest;
1212 while (m) {
1213 i += mbuf_len(m);
1214 m = mbuf_next(m);
1215 }
1216 mrest_len = i;
1217
1218 /*
1219 * Get the RPC header with authorization.
1220 */
1221 kerbauth:
1222 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1223 if (!nmp) {
1224 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1225 mbuf_freem(mrest);
1226 return (ENXIO);
1227 }
1228 verf_str = auth_str = (char *)0;
1229 if (nmp->nm_flag & NFSMNT_KERB) {
1230 verf_str = nickv;
1231 verf_len = sizeof (nickv);
1232 auth_type = RPCAUTH_KERB4;
1233 bzero((caddr_t)key, sizeof (key));
1234 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
1235 &auth_len, verf_str, verf_len)) {
1236 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1237 if (!nmp) {
1238 FSDBG_BOT(531, 2, vp, error, rep);
1239 mbuf_freem(mrest);
1240 return (ENXIO);
1241 }
1242 error = nfs_getauth(nmp, rep, cred, &auth_str,
1243 &auth_len, verf_str, &verf_len, key);
1244 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1245 if (!error && !nmp)
1246 error = ENXIO;
1247 if (error) {
1248 FSDBG_BOT(531, 2, vp, error, rep);
1249 mbuf_freem(mrest);
1250 return (error);
1251 }
1252 }
1253 } else {
1254 auth_type = RPCAUTH_UNIX;
1255 if (cred->cr_ngroups < 1)
1256 panic("nfsreq nogrps");
1257 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1258 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
1259 5 * NFSX_UNSIGNED;
1260 }
1261 error = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
1262 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid, &m);
1263 if (auth_str)
1264 _FREE(auth_str, M_TEMP);
1265 if (error) {
1266 mbuf_freem(mrest);
1267 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1268 return (error);
1269 }
1270 if (xidp)
1271 *xidp = ntohl(xid) + ((u_int64_t)nfs_xidwrap << 32);
1272
1273 /*
1274 * For stream protocols, insert a Sun RPC Record Mark.
1275 */
1276 if (nmsotype == SOCK_STREAM) {
1277 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1278 if (error) {
1279 mbuf_freem(m);
1280 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1281 return (error);
1282 }
1283 *((u_long*)mbuf_data(m)) =
1284 htonl(0x80000000 | (mbuf_pkthdr_len(m) - NFSX_UNSIGNED));
1285 }
1286 rep->r_mreq = m;
1287 rep->r_xid = xid;
1288 tryagain:
1289 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1290 if (nmp && (nmp->nm_flag & NFSMNT_SOFT))
1291 rep->r_retry = nmp->nm_retry;
1292 else
1293 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1294 rep->r_rtt = rep->r_rexmit = 0;
1295 if (proct[procnum] > 0)
1296 rep->r_flags = R_TIMING;
1297 else
1298 rep->r_flags = 0;
1299 rep->r_mrep = NULL;
1300
1301 /*
1302 * Do the client side RPC.
1303 */
1304 OSAddAtomic(1, (SInt32*)&nfsstats.rpcrequests);
1305 /*
1306 * Chain request into list of outstanding requests. Be sure
1307 * to put it LAST so timer finds oldest requests first.
1308 */
1309 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
1310
1311 /*
1312 * If backing off another request or avoiding congestion, don't
1313 * send this one now but let timer do it. If not timing a request,
1314 * do it now.
1315 */
1316 if (nmp && nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
1317 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1318 nmp->nm_sent < nmp->nm_cwnd)) {
1319 int connrequired = (nmp->nm_sotype == SOCK_STREAM);
1320
1321 if (connrequired)
1322 error = nfs_sndlock(rep);
1323
1324 /*
1325 * Set the R_SENT before doing the send in case another thread
1326 * processes the reply before the nfs_send returns here
1327 */
1328 if (!error) {
1329 if ((rep->r_flags & R_MUSTRESEND) == 0) {
1330 FSDBG(531, rep->r_xid, rep, nmp->nm_sent,
1331 nmp->nm_cwnd);
1332 nmp->nm_sent += NFS_CWNDSCALE;
1333 rep->r_flags |= R_SENT;
1334 }
1335
1336 error = mbuf_copym(m, 0, MBUF_COPYALL, MBUF_WAITOK, &m2);
1337 if (!error)
1338 error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
1339 if (connrequired)
1340 nfs_sndunlock(rep);
1341 }
1342 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1343 if (error) {
1344 if (nmp)
1345 nmp->nm_sent -= NFS_CWNDSCALE;
1346 rep->r_flags &= ~R_SENT;
1347 }
1348 } else {
1349 rep->r_rtt = -1;
1350 }
1351
1352 /*
1353 * Wait for the reply from our send or the timer's.
1354 */
1355 if (!error || error == EPIPE)
1356 error = nfs_reply(rep);
1357
1358 /*
1359 * RPC done, unlink the request.
1360 */
1361 nfs_repdequeue(rep);
1362
1363 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1364
1365 /*
1366 * Decrement the outstanding request count.
1367 */
1368 if (rep->r_flags & R_SENT) {
1369 rep->r_flags &= ~R_SENT; /* paranoia */
1370 if (nmp) {
1371 FSDBG(531, rep->r_xid, rep, nmp->nm_sent, nmp->nm_cwnd);
1372 nmp->nm_sent -= NFS_CWNDSCALE;
1373 }
1374 }
1375
1376 /*
1377 * If there was a successful reply and a tprintf msg.
1378 * tprintf a response.
1379 */
1380 if (!error)
1381 nfs_up(nmp, procp, NFSSTA_TIMEO,
1382 (rep->r_flags & R_TPRINTFMSG) ? "is alive again" : NULL);
1383 mrep = rep->r_mrep;
1384 md = rep->r_md;
1385 dpos = rep->r_dpos;
1386 if (!error && !nmp)
1387 error = ENXIO;
1388 if (error) {
1389 mbuf_freem(rep->r_mreq);
1390 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1391 return (error);
1392 }
1393
1394 /*
1395 * break down the rpc header and check if ok
1396 */
1397 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
1398 if (*tl++ == rpc_msgdenied) {
1399 if (*tl == rpc_mismatch)
1400 error = EOPNOTSUPP;
1401 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1402 if (!failed_auth) {
1403 failed_auth++;
1404 error = mbuf_setnext(mheadend, NULL);
1405 mbuf_freem(mrep);
1406 mbuf_freem(rep->r_mreq);
1407 if (!error)
1408 goto kerbauth;
1409 printf("nfs_request: mbuf_setnext failed\n");
1410 } else
1411 error = EAUTH;
1412 } else
1413 error = EACCES;
1414 mbuf_freem(mrep);
1415 mbuf_freem(rep->r_mreq);
1416 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1417 return (error);
1418 }
1419
1420 /*
1421 * Grab any Kerberos verifier, otherwise just throw it away.
1422 */
1423 verf_type = fxdr_unsigned(int, *tl++);
1424 i = fxdr_unsigned(int, *tl);
1425 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1426 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1427 if (error)
1428 goto nfsmout;
1429 } else if (i > 0)
1430 nfsm_adv(nfsm_rndup(i));
1431 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1432 /* 0 == ok */
1433 if (*tl == 0) {
1434 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1435 if (*tl != 0) {
1436 error = fxdr_unsigned(int, *tl);
1437 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1438 error == NFSERR_TRYLATER) {
1439 mbuf_freem(mrep);
1440 error = 0;
1441 microuptime(&now);
1442 waituntil = now.tv_sec + trylater_delay;
1443 while (now.tv_sec < waituntil) {
1444 tsleep((caddr_t)&lbolt, PSOCK, "nfstrylater", 0);
1445 microuptime(&now);
1446 }
1447 trylater_delay *= 2;
1448 if (trylater_delay > 60)
1449 trylater_delay = 60;
1450 goto tryagain;
1451 }
1452
1453 /*
1454 * If the File Handle was stale, invalidate the
1455 * lookup cache, just in case.
1456 */
1457 if ((error == ESTALE) && vp)
1458 cache_purge(vp);
1459 if (nmp->nm_flag & NFSMNT_NFSV3) {
1460 *mrp = mrep;
1461 *mdp = md;
1462 *dposp = dpos;
1463 error |= NFSERR_RETERR;
1464 } else {
1465 mbuf_freem(mrep);
1466 error &= ~NFSERR_RETERR;
1467 }
1468 mbuf_freem(rep->r_mreq);
1469 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1470 return (error);
1471 }
1472
1473 *mrp = mrep;
1474 *mdp = md;
1475 *dposp = dpos;
1476 mbuf_freem(rep->r_mreq);
1477 FSDBG_BOT(531, 0xf0f0f0f0, rep->r_xid, nmp, rep);
1478 return (0);
1479 }
1480 mbuf_freem(mrep);
1481 error = EPROTONOSUPPORT;
1482 nfsmout:
1483 mbuf_freem(rep->r_mreq);
1484 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1485 return (error);
1486 }
1487
1488 #ifndef NFS_NOSERVER
1489 /*
1490 * Generate the rpc reply header
1491 * siz arg. is used to decide if adding a cluster is worthwhile
1492 */
1493 int
1494 nfs_rephead(siz, nd, slp, err, mrq, mbp, bposp)
1495 int siz;
1496 struct nfsrv_descript *nd;
1497 struct nfssvc_sock *slp;
1498 int err;
1499 mbuf_t *mrq;
1500 mbuf_t *mbp;
1501 caddr_t *bposp;
1502 {
1503 u_long *tl;
1504 mbuf_t mreq;
1505 caddr_t bpos;
1506 mbuf_t mb, mb2;
1507 int error, mlen;
1508
1509 /*
1510 * If this is a big reply, use a cluster else
1511 * try and leave leading space for the lower level headers.
1512 */
1513 siz += RPC_REPLYSIZ;
1514 if (siz >= nfs_mbuf_minclsize) {
1515 error = mbuf_getpacket(MBUF_WAITOK, &mreq);
1516 } else {
1517 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mreq);
1518 }
1519 if (error) {
1520 /* unable to allocate packet */
1521 /* XXX nfsstat? */
1522 return (error);
1523 }
1524 mb = mreq;
1525 tl = mbuf_data(mreq);
1526 mlen = 6 * NFSX_UNSIGNED;
1527 if (siz < nfs_mbuf_minclsize) {
1528 /* leave space for lower level headers */
1529 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */
1530 mbuf_setdata(mreq, tl, mlen);
1531 } else {
1532 mbuf_setlen(mreq, mlen);
1533 }
1534 bpos = ((caddr_t)tl) + mlen;
1535 *tl++ = txdr_unsigned(nd->nd_retxid);
1536 *tl++ = rpc_reply;
1537 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1538 *tl++ = rpc_msgdenied;
1539 if (err & NFSERR_AUTHERR) {
1540 *tl++ = rpc_autherr;
1541 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1542 mlen -= NFSX_UNSIGNED;
1543 mbuf_setlen(mreq, mlen);
1544 bpos -= NFSX_UNSIGNED;
1545 } else {
1546 *tl++ = rpc_mismatch;
1547 *tl++ = txdr_unsigned(RPC_VER2);
1548 *tl = txdr_unsigned(RPC_VER2);
1549 }
1550 } else {
1551 *tl++ = rpc_msgaccepted;
1552
1553 /*
1554 * For Kerberos authentication, we must send the nickname
1555 * verifier back, otherwise just RPCAUTH_NULL.
1556 */
1557 if (nd->nd_flag & ND_KERBFULL) {
1558 struct nfsuid *nuidp;
1559 struct timeval ktvin, ktvout;
1560 uid_t uid = kauth_cred_getuid(nd->nd_cr);
1561
1562 lck_rw_lock_shared(&slp->ns_rwlock);
1563 for (nuidp = NUIDHASH(slp, uid)->lh_first;
1564 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1565 if (kauth_cred_getuid(nuidp->nu_cr) == uid &&
1566 (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
1567 &nuidp->nu_haddr, nd->nd_nam2)))
1568 break;
1569 }
1570 if (nuidp) {
1571 ktvin.tv_sec =
1572 txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
1573 ktvin.tv_usec =
1574 txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1575
1576 /*
1577 * Encrypt the timestamp in ecb mode using the
1578 * session key.
1579 */
1580 #if NFSKERB
1581 XXX
1582 #endif
1583
1584 *tl++ = rpc_auth_kerb;
1585 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1586 *tl = ktvout.tv_sec;
1587 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1588 *tl++ = ktvout.tv_usec;
1589 *tl++ = txdr_unsigned(kauth_cred_getuid(nuidp->nu_cr));
1590 } else {
1591 *tl++ = 0;
1592 *tl++ = 0;
1593 }
1594 lck_rw_done(&slp->ns_rwlock);
1595 } else {
1596 *tl++ = 0;
1597 *tl++ = 0;
1598 }
1599 switch (err) {
1600 case EPROGUNAVAIL:
1601 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1602 break;
1603 case EPROGMISMATCH:
1604 *tl = txdr_unsigned(RPC_PROGMISMATCH);
1605 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1606 // XXX hard coded versions
1607 *tl++ = txdr_unsigned(2);
1608 *tl = txdr_unsigned(3);
1609 break;
1610 case EPROCUNAVAIL:
1611 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1612 break;
1613 case EBADRPC:
1614 *tl = txdr_unsigned(RPC_GARBAGE);
1615 break;
1616 default:
1617 *tl = 0;
1618 if (err != NFSERR_RETVOID) {
1619 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1620 if (err)
1621 *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1622 else
1623 *tl = 0;
1624 }
1625 break;
1626 }
1627 }
1628
1629 if (mrq != NULL)
1630 *mrq = mreq;
1631 *mbp = mb;
1632 *bposp = bpos;
1633 if (err != 0 && err != NFSERR_RETVOID) {
1634 OSAddAtomic(1, (SInt32*)&nfsstats.srvrpc_errs);
1635 }
1636 return (0);
1637 }
1638
1639
1640 #endif /* NFS_NOSERVER */
1641
1642
1643 /*
1644 * From FreeBSD 1.58, a Matt Dillon fix...
1645 * Flag a request as being about to terminate.
1646 * The nm_sent count is decremented now to avoid deadlocks when the process
1647 * in soreceive() hasn't yet managed to send its own request.
1648 */
1649 static void
1650 nfs_softterm(struct nfsreq *rep)
1651 {
1652
1653 rep->r_flags |= R_SOFTTERM;
1654 if (rep->r_flags & R_SENT) {
1655 FSDBG(532, rep->r_xid, rep, rep->r_nmp->nm_sent,
1656 rep->r_nmp->nm_cwnd);
1657 rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
1658 rep->r_flags &= ~R_SENT;
1659 }
1660 }
1661
1662 void
1663 nfs_timer_funnel(void * arg)
1664 {
1665 (void) thread_funnel_set(kernel_flock, TRUE);
1666 nfs_timer(arg);
1667 (void) thread_funnel_set(kernel_flock, FALSE);
1668
1669 }
1670
1671 /*
1672 * Ensure rep isn't in use by the timer, then dequeue it.
1673 */
1674 static void
1675 nfs_repdequeue(struct nfsreq *rep)
1676 {
1677
1678 while ((rep->r_flags & R_BUSY)) {
1679 rep->r_flags |= R_WAITING;
1680 tsleep(rep, PSOCK, "repdeq", 0);
1681 }
1682 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
1683 }
1684
1685 /*
1686 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
1687 * free()'d out from under it.
1688 */
1689 static void
1690 nfs_repbusy(struct nfsreq *rep)
1691 {
1692
1693 if ((rep->r_flags & R_BUSY))
1694 panic("rep locked");
1695 rep->r_flags |= R_BUSY;
1696 }
1697
1698 /*
1699 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
1700 */
1701 static struct nfsreq *
1702 nfs_repnext(struct nfsreq *rep)
1703 {
1704 struct nfsreq * nextrep;
1705
1706 if (rep == NULL)
1707 return (NULL);
1708 /*
1709 * We need to get and busy the next req before signalling the
1710 * current one, otherwise wakeup() may block us and we'll race to
1711 * grab the next req.
1712 */
1713 nextrep = TAILQ_NEXT(rep, r_chain);
1714 if (nextrep != NULL)
1715 nfs_repbusy(nextrep);
1716 /* unbusy and signal. */
1717 rep->r_flags &= ~R_BUSY;
1718 if ((rep->r_flags & R_WAITING)) {
1719 rep->r_flags &= ~R_WAITING;
1720 wakeup(rep);
1721 }
1722 return (nextrep);
1723 }
1724
1725 /*
1726 * Nfs timer routine
1727 * Scan the nfsreq list and retranmit any requests that have timed out
1728 * To avoid retransmission attempts on STREAM sockets (in the future) make
1729 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1730 */
1731 void
1732 nfs_timer(__unused void *arg)
1733 {
1734 struct nfsreq *rep;
1735 mbuf_t m;
1736 socket_t so;
1737 struct nfsmount *nmp;
1738 int timeo;
1739 int error;
1740 #ifndef NFS_NOSERVER
1741 struct nfssvc_sock *slp;
1742 u_quad_t cur_usec;
1743 #endif /* NFS_NOSERVER */
1744 int flags, rexmit, cwnd, sent;
1745 u_long xid;
1746 struct timeval now;
1747
1748 rep = TAILQ_FIRST(&nfs_reqq);
1749 if (rep != NULL)
1750 nfs_repbusy(rep);
1751 microuptime(&now);
1752 for ( ; rep != NULL ; rep = nfs_repnext(rep)) {
1753 nmp = rep->r_nmp;
1754 if (!nmp) /* unmounted */
1755 continue;
1756 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1757 continue;
1758 if (nfs_sigintr(nmp, rep, rep->r_procp))
1759 continue;
1760 if (nmp->nm_tprintf_initial_delay != 0 &&
1761 (rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) &&
1762 rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
1763 rep->r_lastmsg = now.tv_sec;
1764 nfs_down(rep->r_nmp, rep->r_procp, 0, NFSSTA_TIMEO,
1765 "not responding");
1766 rep->r_flags |= R_TPRINTFMSG;
1767 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
1768 /* we're not yet completely mounted and */
1769 /* we can't complete an RPC, so we fail */
1770 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
1771 nfs_softterm(rep);
1772 continue;
1773 }
1774 }
1775 if (rep->r_rtt >= 0) {
1776 rep->r_rtt++;
1777 if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1778 timeo = nmp->nm_timeo;
1779 else
1780 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
1781 /* ensure 62.5 ms floor */
1782 while (16 * timeo < hz)
1783 timeo *= 2;
1784 if (nmp->nm_timeouts > 0)
1785 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1786 if (rep->r_rtt <= timeo)
1787 continue;
1788 if (nmp->nm_timeouts < 8)
1789 nmp->nm_timeouts++;
1790 }
1791 /*
1792 * Check for too many retransmits. This is never true for
1793 * 'hard' mounts because we set r_retry to NFS_MAXREXMIT + 1
1794 * and never allow r_rexmit to be more than NFS_MAXREXMIT.
1795 */
1796 if (rep->r_rexmit >= rep->r_retry) { /* too many */
1797 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
1798 nfs_softterm(rep);
1799 continue;
1800 }
1801 if (nmp->nm_sotype != SOCK_DGRAM) {
1802 if (++rep->r_rexmit > NFS_MAXREXMIT)
1803 rep->r_rexmit = NFS_MAXREXMIT;
1804 continue;
1805 }
1806 if ((so = nmp->nm_so) == NULL)
1807 continue;
1808
1809 /*
1810 * If there is enough space and the window allows..
1811 * Resend it
1812 * Set r_rtt to -1 in case we fail to send it now.
1813 */
1814 rep->r_rtt = -1;
1815 if (((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1816 (rep->r_flags & R_SENT) ||
1817 nmp->nm_sent < nmp->nm_cwnd) &&
1818 (mbuf_copym(rep->r_mreq, 0, MBUF_COPYALL, MBUF_DONTWAIT, &m) == 0)){
1819 struct msghdr msg;
1820 /*
1821 * Iff first send, start timing
1822 * else turn timing off, backoff timer
1823 * and divide congestion window by 2.
1824 * We update these *before* the send to avoid
1825 * racing against receiving the reply.
1826 * We save them so we can restore them on send error.
1827 */
1828 flags = rep->r_flags;
1829 rexmit = rep->r_rexmit;
1830 cwnd = nmp->nm_cwnd;
1831 sent = nmp->nm_sent;
1832 xid = rep->r_xid;
1833 if (rep->r_flags & R_SENT) {
1834 rep->r_flags &= ~R_TIMING;
1835 if (++rep->r_rexmit > NFS_MAXREXMIT)
1836 rep->r_rexmit = NFS_MAXREXMIT;
1837 nmp->nm_cwnd >>= 1;
1838 if (nmp->nm_cwnd < NFS_CWNDSCALE)
1839 nmp->nm_cwnd = NFS_CWNDSCALE;
1840 OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries);
1841 } else {
1842 rep->r_flags |= R_SENT;
1843 nmp->nm_sent += NFS_CWNDSCALE;
1844 }
1845 FSDBG(535, xid, rep, nmp->nm_sent, nmp->nm_cwnd);
1846
1847 bzero(&msg, sizeof(msg));
1848 if ((nmp->nm_flag & NFSMNT_NOCONN) == NFSMNT_NOCONN) {
1849 msg.msg_name = mbuf_data(nmp->nm_nam);
1850 msg.msg_namelen = mbuf_len(nmp->nm_nam);
1851 }
1852 error = sock_sendmbuf(so, &msg, m, MSG_DONTWAIT, NULL);
1853
1854 FSDBG(535, xid, error, sent, cwnd);
1855
1856 if (error) {
1857 if (error == EWOULDBLOCK) {
1858 rep->r_flags = flags;
1859 rep->r_rexmit = rexmit;
1860 nmp->nm_cwnd = cwnd;
1861 nmp->nm_sent = sent;
1862 rep->r_xid = xid;
1863 }
1864 else {
1865 if (NFSIGNORE_SOERROR(nmp->nm_sotype, error)) {
1866 int clearerror;
1867 int optlen = sizeof(clearerror);
1868 sock_getsockopt(nmp->nm_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
1869 }
1870 rep->r_flags = flags | R_RESENDERR;
1871 rep->r_rexmit = rexmit;
1872 nmp->nm_cwnd = cwnd;
1873 nmp->nm_sent = sent;
1874 if (flags & R_SENT)
1875 OSAddAtomic(-1, (SInt32*)&nfsstats.rpcretries);
1876 }
1877 } else
1878 rep->r_rtt = 0;
1879 }
1880 }
1881 microuptime(&now);
1882 #ifndef NFS_NOSERVER
1883 /*
1884 * Scan the write gathering queues for writes that need to be
1885 * completed now.
1886 */
1887 cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
1888 lck_mtx_lock(nfsd_mutex);
1889 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
1890 if (slp->ns_wgtime && (slp->ns_wgtime <= cur_usec))
1891 nfsrv_wakenfsd(slp);
1892 }
1893 while ((slp = TAILQ_FIRST(&nfssvc_deadsockhead))) {
1894 if ((slp->ns_timestamp + 5) > now.tv_sec)
1895 break;
1896 TAILQ_REMOVE(&nfssvc_deadsockhead, slp, ns_chain);
1897 nfsrv_slpfree(slp);
1898 }
1899 lck_mtx_unlock(nfsd_mutex);
1900 #endif /* NFS_NOSERVER */
1901
1902 if (nfsbuffreeuptimestamp + 30 <= now.tv_sec) {
1903 /*
1904 * We haven't called nfs_buf_freeup() in a little while.
1905 * So, see if we can free up any stale/unused bufs now.
1906 */
1907 nfs_buf_freeup(1);
1908 }
1909
1910 timeout(nfs_timer_funnel, (void *)0, nfs_ticks);
1911
1912 }
1913
1914
1915 /*
1916 * Test for a termination condition pending on the process.
1917 * This is used to determine if we need to bail on a mount.
1918 * EIO is returned if there has been a soft timeout.
1919 * EINTR is returned if there is a signal pending that is not being ignored
1920 * and the mount is interruptable, or if we are a thread that is in the process
1921 * of cancellation (also SIGKILL posted).
1922 */
1923 int
1924 nfs_sigintr(nmp, rep, p)
1925 struct nfsmount *nmp;
1926 struct nfsreq *rep;
1927 proc_t p;
1928 {
1929 sigset_t pending_sigs;
1930 int context_good = 0;
1931 struct nfsmount *repnmp;
1932 extern proc_t kernproc;
1933
1934 if (nmp == NULL)
1935 return (ENXIO);
1936 if (rep != NULL) {
1937 repnmp = rep->r_nmp;
1938 /* we've had a forced unmount. */
1939 if (repnmp == NULL)
1940 return (ENXIO);
1941 /* request has timed out on a 'soft' mount. */
1942 if (rep->r_flags & R_SOFTTERM)
1943 return (EIO);
1944 /*
1945 * We're in the progress of a force unmount and there's
1946 * been a timeout we're dead and fail IO.
1947 */
1948 if ((repnmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1949 (NFSSTA_FORCE|NFSSTA_TIMEO))
1950 return (EIO);
1951 /* Someone is unmounting us, go soft and mark it. */
1952 if (repnmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) {
1953 repnmp->nm_flag |= NFSMNT_SOFT;
1954 nmp->nm_state |= NFSSTA_FORCE;
1955 }
1956 /*
1957 * If the mount is hung and we've requested not to hang
1958 * on remote filesystems, then bail now.
1959 */
1960 if (p != NULL && (proc_noremotehang(p)) != 0 &&
1961 (repnmp->nm_state & NFSSTA_TIMEO) != 0)
1962 return (EIO);
1963 }
1964 /* XXX: is this valid? this probably should be an assertion. */
1965 if (p == NULL)
1966 return (0);
1967
1968 /* Is this thread belongs to kernel task; then abort check is not needed */
1969 if ((current_proc() != kernproc) && current_thread_aborted()) {
1970 return (EINTR);
1971 }
1972 /* mask off thread and process blocked signals. */
1973
1974 pending_sigs = proc_pendingsignals(p, NFSINT_SIGMASK);
1975 if (pending_sigs && (nmp->nm_flag & NFSMNT_INT) != 0)
1976 return (EINTR);
1977 return (0);
1978 }
1979
1980 /*
1981 * Lock a socket against others.
1982 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1983 * and also to avoid race conditions between the processes with nfs requests
1984 * in progress when a reconnect is necessary.
1985 */
1986 int
1987 nfs_sndlock(rep)
1988 struct nfsreq *rep;
1989 {
1990 int *statep;
1991 proc_t p;
1992 int error, slpflag = 0, slptimeo = 0;
1993
1994 if (rep->r_nmp == NULL)
1995 return (ENXIO);
1996 statep = &rep->r_nmp->nm_state;
1997
1998 p = rep->r_procp;
1999 if (rep->r_nmp->nm_flag & NFSMNT_INT)
2000 slpflag = PCATCH;
2001 while (*statep & NFSSTA_SNDLOCK) {
2002 error = nfs_sigintr(rep->r_nmp, rep, p);
2003 if (error)
2004 return (error);
2005 *statep |= NFSSTA_WANTSND;
2006 if (p != NULL && (proc_noremotehang(p)) != 0)
2007 slptimeo = hz;
2008 tsleep((caddr_t)statep, slpflag | (PZERO - 1), "nfsndlck", slptimeo);
2009 if (slpflag == PCATCH) {
2010 slpflag = 0;
2011 slptimeo = 2 * hz;
2012 }
2013 /*
2014 * Make sure while we slept that the mountpoint didn't go away.
2015 * nfs_sigintr and callers expect it in tact.
2016 */
2017 if (!rep->r_nmp)
2018 return (ENXIO); /* don't have lock until out of loop */
2019 }
2020 *statep |= NFSSTA_SNDLOCK;
2021 return (0);
2022 }
2023
2024 /*
2025 * Unlock the stream socket for others.
2026 */
2027 void
2028 nfs_sndunlock(rep)
2029 struct nfsreq *rep;
2030 {
2031 int *statep;
2032
2033 if (rep->r_nmp == NULL)
2034 return;
2035 statep = &rep->r_nmp->nm_state;
2036 if ((*statep & NFSSTA_SNDLOCK) == 0)
2037 panic("nfs sndunlock");
2038 *statep &= ~NFSSTA_SNDLOCK;
2039 if (*statep & NFSSTA_WANTSND) {
2040 *statep &= ~NFSSTA_WANTSND;
2041 wakeup((caddr_t)statep);
2042 }
2043 }
2044
2045 static int
2046 nfs_rcvlock(struct nfsreq *rep)
2047 {
2048 int *statep;
2049 int error, slpflag, slptimeo = 0;
2050
2051 /* make sure we still have our mountpoint */
2052 if (!rep->r_nmp) {
2053 if (rep->r_mrep != NULL)
2054 return (EALREADY);
2055 return (ENXIO);
2056 }
2057
2058 statep = &rep->r_nmp->nm_state;
2059 FSDBG_TOP(534, rep->r_xid, rep, rep->r_nmp, *statep);
2060 if (rep->r_nmp->nm_flag & NFSMNT_INT)
2061 slpflag = PCATCH;
2062 else
2063 slpflag = 0;
2064 while (*statep & NFSSTA_RCVLOCK) {
2065 if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp))) {
2066 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x100);
2067 return (error);
2068 } else if (rep->r_mrep != NULL) {
2069 /*
2070 * Don't bother sleeping if reply already arrived
2071 */
2072 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x101);
2073 return (EALREADY);
2074 }
2075 FSDBG(534, rep->r_xid, rep, rep->r_nmp, 0x102);
2076 *statep |= NFSSTA_WANTRCV;
2077 /*
2078 * We need to poll if we're P_NOREMOTEHANG so that we
2079 * call nfs_sigintr periodically above.
2080 */
2081 if (rep->r_procp != NULL &&
2082 (proc_noremotehang(rep->r_procp)) != 0)
2083 slptimeo = hz;
2084 tsleep((caddr_t)statep, slpflag | (PZERO - 1), "nfsrcvlk", slptimeo);
2085 if (slpflag == PCATCH) {
2086 slpflag = 0;
2087 slptimeo = 2 * hz;
2088 }
2089 /*
2090 * Make sure while we slept that the mountpoint didn't go away.
2091 * nfs_sigintr and caller nfs_reply expect it intact.
2092 */
2093 if (!rep->r_nmp) {
2094 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x103);
2095 return (ENXIO); /* don't have lock until out of loop */
2096 }
2097 }
2098 /*
2099 * nfs_reply will handle it if reply already arrived.
2100 * (We may have slept or been preempted).
2101 */
2102 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *statep);
2103 *statep |= NFSSTA_RCVLOCK;
2104 return (0);
2105 }
2106
2107 /*
2108 * Unlock the stream socket for others.
2109 */
2110 static void
2111 nfs_rcvunlock(struct nfsreq *rep)
2112 {
2113 int *statep;
2114
2115 if (rep->r_nmp == NULL)
2116 return;
2117 statep = &rep->r_nmp->nm_state;
2118
2119 FSDBG(533, statep, *statep, 0, 0);
2120 if ((*statep & NFSSTA_RCVLOCK) == 0)
2121 panic("nfs rcvunlock");
2122 *statep &= ~NFSSTA_RCVLOCK;
2123 if (*statep & NFSSTA_WANTRCV) {
2124 *statep &= ~NFSSTA_WANTRCV;
2125 wakeup((caddr_t)statep);
2126 }
2127 }
2128
2129
2130 #ifndef NFS_NOSERVER
2131 /*
2132 * Socket upcall routine for the nfsd sockets.
2133 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2134 * Essentially do as much as possible non-blocking, else punt and it will
2135 * be called with MBUF_WAITOK from an nfsd.
2136 */
2137 void
2138 nfsrv_rcv(socket_t so, caddr_t arg, int waitflag)
2139 {
2140 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
2141
2142 if (!nfs_numnfsd || !(slp->ns_flag & SLP_VALID))
2143 return;
2144
2145 lck_rw_lock_exclusive(&slp->ns_rwlock);
2146 nfsrv_rcv_locked(so, slp, waitflag);
2147 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
2148 }
2149 void
2150 nfsrv_rcv_locked(socket_t so, struct nfssvc_sock *slp, int waitflag)
2151 {
2152 mbuf_t m, mp, mhck, m2;
2153 int ns_flag=0, error;
2154 struct msghdr msg;
2155 size_t bytes_read;
2156
2157 if ((slp->ns_flag & SLP_VALID) == 0) {
2158 if (waitflag == MBUF_DONTWAIT)
2159 lck_rw_done(&slp->ns_rwlock);
2160 return;
2161 }
2162
2163 #ifdef notdef
2164 /*
2165 * Define this to test for nfsds handling this under heavy load.
2166 */
2167 if (waitflag == MBUF_DONTWAIT) {
2168 ns_flag = SLP_NEEDQ;
2169 goto dorecs;
2170 }
2171 #endif
2172 if (slp->ns_sotype == SOCK_STREAM) {
2173 /*
2174 * If there are already records on the queue, defer soreceive()
2175 * to an nfsd so that there is feedback to the TCP layer that
2176 * the nfs servers are heavily loaded.
2177 */
2178 if (slp->ns_rec && waitflag == MBUF_DONTWAIT) {
2179 ns_flag = SLP_NEEDQ;
2180 goto dorecs;
2181 }
2182
2183 /*
2184 * Do soreceive().
2185 */
2186 bytes_read = 1000000000;
2187 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
2188 if (error || mp == NULL) {
2189 if (error == EWOULDBLOCK)
2190 ns_flag = SLP_NEEDQ;
2191 else
2192 ns_flag = SLP_DISCONN;
2193 goto dorecs;
2194 }
2195 m = mp;
2196 if (slp->ns_rawend) {
2197 if ((error = mbuf_setnext(slp->ns_rawend, m)))
2198 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
2199 slp->ns_cc += bytes_read;
2200 } else {
2201 slp->ns_raw = m;
2202 slp->ns_cc = bytes_read;
2203 }
2204 while ((m2 = mbuf_next(m)))
2205 m = m2;
2206 slp->ns_rawend = m;
2207
2208 /*
2209 * Now try and parse record(s) out of the raw stream data.
2210 */
2211 error = nfsrv_getstream(slp, waitflag);
2212 if (error) {
2213 if (error == EPERM)
2214 ns_flag = SLP_DISCONN;
2215 else
2216 ns_flag = SLP_NEEDQ;
2217 }
2218 } else {
2219 struct sockaddr_storage nam;
2220
2221 bzero(&msg, sizeof(msg));
2222 msg.msg_name = (caddr_t)&nam;
2223 msg.msg_namelen = sizeof(nam);
2224
2225 do {
2226 bytes_read = 1000000000;
2227 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
2228 if (mp) {
2229 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
2230 mbuf_setlen(mhck, nam.ss_len);
2231 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
2232 m = mhck;
2233 if (mbuf_setnext(m, mp)) {
2234 /* trouble... just drop it */
2235 printf("nfsrv_rcv: mbuf_setnext failed\n");
2236 mbuf_free(mhck);
2237 m = mp;
2238 }
2239 } else {
2240 m = mp;
2241 }
2242 if (slp->ns_recend)
2243 mbuf_setnextpkt(slp->ns_recend, m);
2244 else
2245 slp->ns_rec = m;
2246 slp->ns_recend = m;
2247 mbuf_setnextpkt(m, NULL);
2248 }
2249 #if 0
2250 if (error) {
2251 /*
2252 * This may be needed in the future to support
2253 * non-byte-stream connection-oriented protocols
2254 * such as SCTP.
2255 */
2256 /*
2257 * This (slp->ns_sotype == SOCK_STREAM) should really
2258 * be a check for PR_CONNREQUIRED.
2259 */
2260 if ((slp->ns_sotype == SOCK_STREAM)
2261 && error != EWOULDBLOCK) {
2262 ns_flag = SLP_DISCONN;
2263 goto dorecs;
2264 }
2265 }
2266 #endif
2267 } while (mp);
2268 }
2269
2270 /*
2271 * Now try and process the request records, non-blocking.
2272 */
2273 dorecs:
2274 if (ns_flag)
2275 slp->ns_flag |= ns_flag;
2276 if (waitflag == MBUF_DONTWAIT) {
2277 int wake = (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)));
2278 lck_rw_done(&slp->ns_rwlock);
2279 if (wake && nfs_numnfsd) {
2280 lck_mtx_lock(nfsd_mutex);
2281 nfsrv_wakenfsd(slp);
2282 lck_mtx_unlock(nfsd_mutex);
2283 }
2284 }
2285 }
2286
2287 /*
2288 * Try and extract an RPC request from the mbuf data list received on a
2289 * stream socket. The "waitflag" argument indicates whether or not it
2290 * can sleep.
2291 */
2292 static int
2293 nfsrv_getstream(slp, waitflag)
2294 struct nfssvc_sock *slp;
2295 int waitflag;
2296 {
2297 mbuf_t m;
2298 char *cp1, *cp2, *mdata;
2299 int len, mlen, error;
2300 mbuf_t om, m2, recm;
2301 u_long recmark;
2302
2303 if (slp->ns_flag & SLP_GETSTREAM)
2304 panic("nfs getstream");
2305 slp->ns_flag |= SLP_GETSTREAM;
2306 for (;;) {
2307 if (slp->ns_reclen == 0) {
2308 if (slp->ns_cc < NFSX_UNSIGNED) {
2309 slp->ns_flag &= ~SLP_GETSTREAM;
2310 return (0);
2311 }
2312 m = slp->ns_raw;
2313 mdata = mbuf_data(m);
2314 mlen = mbuf_len(m);
2315 if (mlen >= NFSX_UNSIGNED) {
2316 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
2317 mdata += NFSX_UNSIGNED;
2318 mlen -= NFSX_UNSIGNED;
2319 mbuf_setdata(m, mdata, mlen);
2320 } else {
2321 cp1 = (caddr_t)&recmark;
2322 cp2 = mdata;
2323 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2324 while (mlen == 0) {
2325 m = mbuf_next(m);
2326 cp2 = mbuf_data(m);
2327 mlen = mbuf_len(m);
2328 }
2329 *cp1++ = *cp2++;
2330 mlen--;
2331 mbuf_setdata(m, cp2, mlen);
2332 }
2333 }
2334 slp->ns_cc -= NFSX_UNSIGNED;
2335 recmark = ntohl(recmark);
2336 slp->ns_reclen = recmark & ~0x80000000;
2337 if (recmark & 0x80000000)
2338 slp->ns_flag |= SLP_LASTFRAG;
2339 else
2340 slp->ns_flag &= ~SLP_LASTFRAG;
2341 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
2342 slp->ns_flag &= ~SLP_GETSTREAM;
2343 return (EPERM);
2344 }
2345 }
2346
2347 /*
2348 * Now get the record part.
2349 *
2350 * Note that slp->ns_reclen may be 0. Linux sometimes
2351 * generates 0-length RPCs
2352 */
2353 recm = NULL;
2354 if (slp->ns_cc == slp->ns_reclen) {
2355 recm = slp->ns_raw;
2356 slp->ns_raw = slp->ns_rawend = NULL;
2357 slp->ns_cc = slp->ns_reclen = 0;
2358 } else if (slp->ns_cc > slp->ns_reclen) {
2359 len = 0;
2360 m = slp->ns_raw;
2361 mlen = mbuf_len(m);
2362 mdata = mbuf_data(m);
2363 om = NULL;
2364 while (len < slp->ns_reclen) {
2365 if ((len + mlen) > slp->ns_reclen) {
2366 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
2367 slp->ns_flag &= ~SLP_GETSTREAM;
2368 return (EWOULDBLOCK);
2369 }
2370 if (om) {
2371 if (mbuf_setnext(om, m2)) {
2372 /* trouble... just drop it */
2373 printf("nfsrv_getstream: mbuf_setnext failed\n");
2374 mbuf_freem(m2);
2375 slp->ns_flag &= ~SLP_GETSTREAM;
2376 return (EWOULDBLOCK);
2377 }
2378 recm = slp->ns_raw;
2379 } else {
2380 recm = m2;
2381 }
2382 mdata += slp->ns_reclen - len;
2383 mlen -= slp->ns_reclen - len;
2384 mbuf_setdata(m, mdata, mlen);
2385 len = slp->ns_reclen;
2386 } else if ((len + mlen) == slp->ns_reclen) {
2387 om = m;
2388 len += mlen;
2389 m = mbuf_next(m);
2390 recm = slp->ns_raw;
2391 if (mbuf_setnext(om, NULL)) {
2392 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
2393 slp->ns_flag &= ~SLP_GETSTREAM;
2394 return (EWOULDBLOCK);
2395 }
2396 mlen = mbuf_len(m);
2397 mdata = mbuf_data(m);
2398 } else {
2399 om = m;
2400 len += mlen;
2401 m = mbuf_next(m);
2402 mlen = mbuf_len(m);
2403 mdata = mbuf_data(m);
2404 }
2405 }
2406 slp->ns_raw = m;
2407 slp->ns_cc -= len;
2408 slp->ns_reclen = 0;
2409 } else {
2410 slp->ns_flag &= ~SLP_GETSTREAM;
2411 return (0);
2412 }
2413
2414 /*
2415 * Accumulate the fragments into a record.
2416 */
2417 if (slp->ns_frag == NULL) {
2418 slp->ns_frag = recm;
2419 } else {
2420 m = slp->ns_frag;
2421 while ((m2 = mbuf_next(m)))
2422 m = m2;
2423 if ((error = mbuf_setnext(m, recm)))
2424 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
2425 }
2426 if (slp->ns_flag & SLP_LASTFRAG) {
2427 if (slp->ns_recend)
2428 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
2429 else
2430 slp->ns_rec = slp->ns_frag;
2431 slp->ns_recend = slp->ns_frag;
2432 slp->ns_frag = NULL;
2433 }
2434 }
2435 }
2436
2437 /*
2438 * Parse an RPC header.
2439 */
2440 int
2441 nfsrv_dorec(slp, nfsd, ndp)
2442 struct nfssvc_sock *slp;
2443 struct nfsd *nfsd;
2444 struct nfsrv_descript **ndp;
2445 {
2446 mbuf_t m;
2447 mbuf_t nam;
2448 struct nfsrv_descript *nd;
2449 int error;
2450
2451 *ndp = NULL;
2452 if ((slp->ns_flag & SLP_VALID) == 0 || (slp->ns_rec == NULL))
2453 return (ENOBUFS);
2454 MALLOC_ZONE(nd, struct nfsrv_descript *,
2455 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2456 if (!nd)
2457 return (ENOMEM);
2458 m = slp->ns_rec;
2459 slp->ns_rec = mbuf_nextpkt(m);
2460 if (slp->ns_rec)
2461 mbuf_setnextpkt(m, NULL);
2462 else
2463 slp->ns_recend = NULL;
2464 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
2465 nam = m;
2466 m = mbuf_next(m);
2467 if ((error = mbuf_setnext(nam, NULL)))
2468 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
2469 } else
2470 nam = NULL;
2471 nd->nd_md = nd->nd_mrep = m;
2472 nd->nd_nam2 = nam;
2473 nd->nd_dpos = mbuf_data(m);
2474 error = nfs_getreq(nd, nfsd, TRUE);
2475 if (error) {
2476 if (nam)
2477 mbuf_freem(nam);
2478 FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
2479 return (error);
2480 }
2481 *ndp = nd;
2482 nfsd->nfsd_nd = nd;
2483 return (0);
2484 }
2485
2486 /*
2487 * Parse an RPC request
2488 * - verify it
2489 * - fill in the cred struct.
2490 */
2491 int
2492 nfs_getreq(nd, nfsd, has_header)
2493 struct nfsrv_descript *nd;
2494 struct nfsd *nfsd;
2495 int has_header;
2496 {
2497 int len, i;
2498 u_long *tl;
2499 long t1;
2500 uio_t uiop;
2501 caddr_t dpos, cp2, cp;
2502 u_long nfsvers, auth_type;
2503 uid_t nickuid;
2504 int error = 0, ticklen;
2505 mbuf_t mrep, md;
2506 struct nfsuid *nuidp;
2507 uid_t user_id;
2508 gid_t group_id;
2509 int ngroups;
2510 struct ucred temp_cred;
2511 struct timeval tvin, tvout, now;
2512 char uio_buf[ UIO_SIZEOF(1) ];
2513 #if 0 /* until encrypted keys are implemented */
2514 NFSKERBKEYSCHED_T keys; /* stores key schedule */
2515 #endif
2516
2517 nd->nd_cr = NULL;
2518
2519 mrep = nd->nd_mrep;
2520 md = nd->nd_md;
2521 dpos = nd->nd_dpos;
2522 if (has_header) {
2523 nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED);
2524 nd->nd_retxid = fxdr_unsigned(u_long, *tl++);
2525 if (*tl++ != rpc_call) {
2526 mbuf_freem(mrep);
2527 return (EBADRPC);
2528 }
2529 } else
2530 nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED);
2531 nd->nd_repstat = 0;
2532 nd->nd_flag = 0;
2533 if (*tl++ != rpc_vers) {
2534 nd->nd_repstat = ERPCMISMATCH;
2535 nd->nd_procnum = NFSPROC_NOOP;
2536 return (0);
2537 }
2538 if (*tl != nfs_prog) {
2539 nd->nd_repstat = EPROGUNAVAIL;
2540 nd->nd_procnum = NFSPROC_NOOP;
2541 return (0);
2542 }
2543 tl++;
2544 nfsvers = fxdr_unsigned(u_long, *tl++);
2545 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
2546 nd->nd_repstat = EPROGMISMATCH;
2547 nd->nd_procnum = NFSPROC_NOOP;
2548 return (0);
2549 }
2550 else if (nfsvers == NFS_VER3)
2551 nd->nd_flag = ND_NFSV3;
2552 nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
2553 if (nd->nd_procnum == NFSPROC_NULL)
2554 return (0);
2555 if ((nd->nd_procnum >= NFS_NPROCS) ||
2556 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
2557 nd->nd_repstat = EPROCUNAVAIL;
2558 nd->nd_procnum = NFSPROC_NOOP;
2559 return (0);
2560 }
2561 if ((nd->nd_flag & ND_NFSV3) == 0)
2562 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
2563 auth_type = *tl++;
2564 len = fxdr_unsigned(int, *tl++);
2565 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2566 mbuf_freem(mrep);
2567 return (EBADRPC);
2568 }
2569
2570 nd->nd_flag &= ~ND_KERBAUTH;
2571 /*
2572 * Handle auth_unix or auth_kerb.
2573 */
2574 if (auth_type == rpc_auth_unix) {
2575 len = fxdr_unsigned(int, *++tl);
2576 if (len < 0 || len > NFS_MAXNAMLEN) {
2577 mbuf_freem(mrep);
2578 return (EBADRPC);
2579 }
2580 bzero(&temp_cred, sizeof(temp_cred));
2581 nfsm_adv(nfsm_rndup(len));
2582 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2583 user_id = fxdr_unsigned(uid_t, *tl++);
2584 group_id = fxdr_unsigned(gid_t, *tl++);
2585 temp_cred.cr_groups[0] = group_id;
2586 len = fxdr_unsigned(int, *tl);
2587 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
2588 mbuf_freem(mrep);
2589 return (EBADRPC);
2590 }
2591 nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED);
2592 for (i = 1; i <= len; i++)
2593 if (i < NGROUPS)
2594 temp_cred.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
2595 else
2596 tl++;
2597 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
2598 if (ngroups > 1)
2599 nfsrvw_sort(&temp_cred.cr_groups[0], ngroups);
2600 len = fxdr_unsigned(int, *++tl);
2601 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2602 mbuf_freem(mrep);
2603 return (EBADRPC);
2604 }
2605 temp_cred.cr_uid = user_id;
2606 temp_cred.cr_ngroups = ngroups;
2607 nd->nd_cr = kauth_cred_create(&temp_cred);
2608 if (nd->nd_cr == NULL) {
2609 nd->nd_repstat = ENOMEM;
2610 nd->nd_procnum = NFSPROC_NOOP;
2611 return (0);
2612 }
2613 if (len > 0)
2614 nfsm_adv(nfsm_rndup(len));
2615 } else if (auth_type == rpc_auth_kerb) {
2616 switch (fxdr_unsigned(int, *tl++)) {
2617 case RPCAKN_FULLNAME:
2618 ticklen = fxdr_unsigned(int, *tl);
2619 *((u_long *)nfsd->nfsd_authstr) = *tl;
2620 uiop = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
2621 &uio_buf[0], sizeof(uio_buf));
2622 if (!uiop) {
2623 nd->nd_repstat = ENOMEM;
2624 nd->nd_procnum = NFSPROC_NOOP;
2625 return (0);
2626 }
2627
2628 // LP64todo - fix this
2629 nfsd->nfsd_authlen = (nfsm_rndup(ticklen) + (NFSX_UNSIGNED * 2));
2630 if ((nfsm_rndup(ticklen) + NFSX_UNSIGNED) > (len - 2 * NFSX_UNSIGNED)) {
2631 mbuf_freem(mrep);
2632 return (EBADRPC);
2633 }
2634 uio_addiov(uiop, CAST_USER_ADDR_T(&nfsd->nfsd_authstr[4]), RPCAUTH_MAXSIZ - 4);
2635 // LP64todo - fix this
2636 nfsm_mtouio(uiop, uio_resid(uiop));
2637 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2638 if (*tl++ != rpc_auth_kerb ||
2639 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
2640 printf("Bad kerb verifier\n");
2641 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2642 nd->nd_procnum = NFSPROC_NOOP;
2643 return (0);
2644 }
2645 nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
2646 tl = (u_long *)cp;
2647 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
2648 printf("Not fullname kerb verifier\n");
2649 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2650 nd->nd_procnum = NFSPROC_NOOP;
2651 return (0);
2652 }
2653 cp += NFSX_UNSIGNED;
2654 bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
2655 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
2656 nd->nd_flag |= ND_KERBFULL;
2657 nfsd->nfsd_flag |= NFSD_NEEDAUTH;
2658 break;
2659 case RPCAKN_NICKNAME:
2660 if (len != 2 * NFSX_UNSIGNED) {
2661 printf("Kerb nickname short\n");
2662 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
2663 nd->nd_procnum = NFSPROC_NOOP;
2664 return (0);
2665 }
2666 nickuid = fxdr_unsigned(uid_t, *tl);
2667 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2668 if (*tl++ != rpc_auth_kerb ||
2669 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
2670 printf("Kerb nick verifier bad\n");
2671 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2672 nd->nd_procnum = NFSPROC_NOOP;
2673 return (0);
2674 }
2675 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2676 tvin.tv_sec = *tl++;
2677 tvin.tv_usec = *tl;
2678
2679 for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
2680 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
2681 if (kauth_cred_getuid(nuidp->nu_cr) == nickuid &&
2682 (!nd->nd_nam2 ||
2683 netaddr_match(NU_NETFAM(nuidp),
2684 &nuidp->nu_haddr, nd->nd_nam2)))
2685 break;
2686 }
2687 if (!nuidp) {
2688 nd->nd_repstat =
2689 (NFSERR_AUTHERR|AUTH_REJECTCRED);
2690 nd->nd_procnum = NFSPROC_NOOP;
2691 return (0);
2692 }
2693
2694 /*
2695 * Now, decrypt the timestamp using the session key
2696 * and validate it.
2697 */
2698 #if NFSKERB
2699 XXX
2700 #endif
2701
2702 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
2703 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
2704 microtime(&now);
2705 if (nuidp->nu_expire < now.tv_sec ||
2706 nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
2707 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
2708 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
2709 nuidp->nu_expire = 0;
2710 nd->nd_repstat =
2711 (NFSERR_AUTHERR|AUTH_REJECTVERF);
2712 nd->nd_procnum = NFSPROC_NOOP;
2713 return (0);
2714 }
2715 bzero(&temp_cred, sizeof(temp_cred));
2716 ngroups = nuidp->nu_cr->cr_ngroups;
2717 for (i = 0; i < ngroups; i++)
2718 temp_cred.cr_groups[i] = nuidp->nu_cr->cr_groups[i];
2719 if (ngroups > 1)
2720 nfsrvw_sort(&temp_cred.cr_groups[0], ngroups);
2721
2722 temp_cred.cr_uid = kauth_cred_getuid(nuidp->nu_cr);
2723 temp_cred.cr_ngroups = ngroups;
2724 nd->nd_cr = kauth_cred_create(&temp_cred);
2725 if (!nd->nd_cr) {
2726 nd->nd_repstat = ENOMEM;
2727 nd->nd_procnum = NFSPROC_NOOP;
2728 return (0);
2729 }
2730 nd->nd_flag |= ND_KERBNICK;
2731 };
2732 } else {
2733 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
2734 nd->nd_procnum = NFSPROC_NOOP;
2735 return (0);
2736 }
2737
2738 nd->nd_md = md;
2739 nd->nd_dpos = dpos;
2740 return (0);
2741 nfsmout:
2742 if (nd->nd_cr)
2743 kauth_cred_rele(nd->nd_cr);
2744 return (error);
2745 }
2746
2747 /*
2748 * Search for a sleeping nfsd and wake it up.
2749 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
2750 * running nfsds will go look for the work in the nfssvc_sock list.
2751 * Note: Must be called with nfsd_mutex held.
2752 */
2753 void
2754 nfsrv_wakenfsd(struct nfssvc_sock *slp)
2755 {
2756 struct nfsd *nd;
2757
2758 if ((slp->ns_flag & SLP_VALID) == 0)
2759 return;
2760
2761 lck_rw_lock_exclusive(&slp->ns_rwlock);
2762
2763 if (nfsd_waiting) {
2764 TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
2765 if (nd->nfsd_flag & NFSD_WAITING) {
2766 nd->nfsd_flag &= ~NFSD_WAITING;
2767 if (nd->nfsd_slp)
2768 panic("nfsd wakeup");
2769 slp->ns_sref++;
2770 nd->nfsd_slp = slp;
2771 lck_rw_done(&slp->ns_rwlock);
2772 wakeup((caddr_t)nd);
2773 return;
2774 }
2775 }
2776 }
2777
2778 slp->ns_flag |= SLP_DOREC;
2779
2780 lck_rw_done(&slp->ns_rwlock);
2781
2782 nfsd_head_flag |= NFSD_CHECKSLP;
2783 }
2784 #endif /* NFS_NOSERVER */
2785
2786 static int
2787 nfs_msg(proc_t p,
2788 const char *server,
2789 const char *msg,
2790 int error)
2791 {
2792 tpr_t tpr;
2793
2794 if (p)
2795 tpr = tprintf_open(p);
2796 else
2797 tpr = NULL;
2798 if (error)
2799 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg,
2800 error);
2801 else
2802 tprintf(tpr, "nfs server %s: %s\n", server, msg);
2803 tprintf_close(tpr);
2804 return (0);
2805 }
2806
2807 void
2808 nfs_down(nmp, proc, error, flags, msg)
2809 struct nfsmount *nmp;
2810 proc_t proc;
2811 int error, flags;
2812 const char *msg;
2813 {
2814 if (nmp == NULL)
2815 return;
2816 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
2817 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 0);
2818 nmp->nm_state |= NFSSTA_TIMEO;
2819 }
2820 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
2821 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESPLOCK, 0);
2822 nmp->nm_state |= NFSSTA_LOCKTIMEO;
2823 }
2824 nfs_msg(proc, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
2825 }
2826
2827 void
2828 nfs_up(nmp, proc, flags, msg)
2829 struct nfsmount *nmp;
2830 proc_t proc;
2831 int flags;
2832 const char *msg;
2833 {
2834 if (nmp == NULL)
2835 return;
2836 if (msg)
2837 nfs_msg(proc, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
2838 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
2839 nmp->nm_state &= ~NFSSTA_TIMEO;
2840 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
2841 }
2842 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
2843 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
2844 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESPLOCK, 1);
2845 }
2846 }
2847