]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
xnu-792.6.61.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1989, 1991, 1993, 1995
25 * The Regents of the University of California. All rights reserved.
26 *
27 * This code is derived from software contributed to Berkeley by
28 * Rick Macklem at The University of Guelph.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
59 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
60 */
61
62 /*
63 * Socket operations for use by nfs
64 */
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/kauth.h>
70 #include <sys/mount_internal.h>
71 #include <sys/kernel.h>
72 #include <sys/kpi_mbuf.h>
73 #include <sys/malloc.h>
74 #include <sys/vnode.h>
75 #include <sys/domain.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/syslog.h>
79 #include <sys/tprintf.h>
80 #include <sys/uio_internal.h>
81 #include <libkern/OSAtomic.h>
82
83 #include <sys/time.h>
84 #include <kern/clock.h>
85 #include <kern/task.h>
86 #include <kern/thread.h>
87 #include <sys/user.h>
88
89 #include <netinet/in.h>
90 #include <netinet/tcp.h>
91
92 #include <nfs/rpcv2.h>
93 #include <nfs/nfsproto.h>
94 #include <nfs/nfs.h>
95 #include <nfs/xdr_subs.h>
96 #include <nfs/nfsm_subs.h>
97 #include <nfs/nfsmount.h>
98 #include <nfs/nfsnode.h>
99 #include <nfs/nfsrtt.h>
100
101 #include <sys/kdebug.h>
102
103 #define FSDBG(A, B, C, D, E) \
104 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
105 (int)(B), (int)(C), (int)(D), (int)(E), 0)
106 #define FSDBG_TOP(A, B, C, D, E) \
107 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
108 (int)(B), (int)(C), (int)(D), (int)(E), 0)
109 #define FSDBG_BOT(A, B, C, D, E) \
110 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
111 (int)(B), (int)(C), (int)(D), (int)(E), 0)
112
113 /*
114 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
115 * Use the mean and mean deviation of rtt for the appropriate type of rpc
116 * for the frequent rpcs and a default for the others.
117 * The justification for doing "other" this way is that these rpcs
118 * happen so infrequently that timer est. would probably be stale.
119 * Also, since many of these rpcs are
120 * non-idempotent, a conservative timeout is desired.
121 * getattr, lookup - A+2D
122 * read, write - A+4D
123 * other - nm_timeo
124 */
125 #define NFS_RTO(n, t) \
126 ((t) == 0 ? (n)->nm_timeo : \
127 ((t) < 3 ? \
128 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
129 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
130 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
131 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
132 /*
133 * External data, mostly RPC constants in XDR form
134 */
135 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
136 rpc_msgaccepted, rpc_call, rpc_autherr,
137 rpc_auth_kerb;
138 extern u_long nfs_prog;
139 extern struct nfsstats nfsstats;
140 extern int nfsv3_procid[NFS_NPROCS];
141 extern int nfs_ticks;
142 extern u_long nfs_xidwrap;
143
144 /*
145 * Defines which timer to use for the procnum.
146 * 0 - default
147 * 1 - getattr
148 * 2 - lookup
149 * 3 - read
150 * 4 - write
151 */
152 static int proct[NFS_NPROCS] = {
153 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0
154 };
155
156 /*
157 * There is a congestion window for outstanding rpcs maintained per mount
158 * point. The cwnd size is adjusted in roughly the way that:
159 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
160 * SIGCOMM '88". ACM, August 1988.
161 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
162 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
163 * of rpcs is in progress.
164 * (The sent count and cwnd are scaled for integer arith.)
165 * Variants of "slow start" were tried and were found to be too much of a
166 * performance hit (ave. rtt 3 times larger),
167 * I suspect due to the large rtt that nfs rpcs have.
168 */
169 #define NFS_CWNDSCALE 256
170 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
171 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
172 int nfsrtton = 0;
173 struct nfsrtt nfsrtt;
174
175 static int nfs_rcvlock(struct nfsreq *);
176 static void nfs_rcvunlock(struct nfsreq *);
177 static int nfs_receive(struct nfsreq *rep, mbuf_t *mp);
178 static int nfs_reconnect(struct nfsreq *rep);
179 static void nfs_repdequeue(struct nfsreq *rep);
180
181 /* XXX */
182 boolean_t current_thread_aborted(void);
183 kern_return_t thread_terminate(thread_t);
184
185 #ifndef NFS_NOSERVER
186 static int nfsrv_getstream(struct nfssvc_sock *,int);
187
188 int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
189 struct nfssvc_sock *slp,
190 proc_t procp,
191 mbuf_t *mreqp) = {
192 nfsrv_null,
193 nfsrv_getattr,
194 nfsrv_setattr,
195 nfsrv_lookup,
196 nfsrv3_access,
197 nfsrv_readlink,
198 nfsrv_read,
199 nfsrv_write,
200 nfsrv_create,
201 nfsrv_mkdir,
202 nfsrv_symlink,
203 nfsrv_mknod,
204 nfsrv_remove,
205 nfsrv_rmdir,
206 nfsrv_rename,
207 nfsrv_link,
208 nfsrv_readdir,
209 nfsrv_readdirplus,
210 nfsrv_statfs,
211 nfsrv_fsinfo,
212 nfsrv_pathconf,
213 nfsrv_commit,
214 nfsrv_noop
215 };
216 #endif /* NFS_NOSERVER */
217
218
219 /*
220 * attempt to bind a socket to a reserved port
221 */
222 static int
223 nfs_bind_resv(struct nfsmount *nmp)
224 {
225 socket_t so = nmp->nm_so;
226 struct sockaddr_in sin;
227 int error;
228 u_short tport;
229
230 if (!so)
231 return (EINVAL);
232
233 sin.sin_len = sizeof (struct sockaddr_in);
234 sin.sin_family = AF_INET;
235 sin.sin_addr.s_addr = INADDR_ANY;
236 tport = IPPORT_RESERVED - 1;
237 sin.sin_port = htons(tport);
238
239 while (((error = sock_bind(so, (struct sockaddr *) &sin)) == EADDRINUSE) &&
240 (--tport > IPPORT_RESERVED / 2))
241 sin.sin_port = htons(tport);
242 return (error);
243 }
244
245 /*
246 * variables for managing the nfs_bind_resv_thread
247 */
248 int nfs_resv_mounts = 0;
249 static int nfs_bind_resv_thread_state = 0;
250 #define NFS_BIND_RESV_THREAD_STATE_INITTED 1
251 #define NFS_BIND_RESV_THREAD_STATE_RUNNING 2
252 lck_grp_t *nfs_bind_resv_lck_grp;
253 lck_grp_attr_t *nfs_bind_resv_lck_grp_attr;
254 lck_attr_t *nfs_bind_resv_lck_attr;
255 lck_mtx_t *nfs_bind_resv_mutex;
256 struct nfs_bind_resv_request {
257 TAILQ_ENTRY(nfs_bind_resv_request) brr_chain;
258 struct nfsmount *brr_nmp;
259 int brr_error;
260 };
261 static TAILQ_HEAD(, nfs_bind_resv_request) nfs_bind_resv_request_queue;
262
263 /*
264 * thread to handle any reserved port bind requests
265 */
266 static void
267 nfs_bind_resv_thread(void)
268 {
269 struct nfs_bind_resv_request *brreq;
270
271 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
272
273 while (nfs_resv_mounts > 0) {
274 lck_mtx_lock(nfs_bind_resv_mutex);
275 while ((brreq = TAILQ_FIRST(&nfs_bind_resv_request_queue))) {
276 TAILQ_REMOVE(&nfs_bind_resv_request_queue, brreq, brr_chain);
277 lck_mtx_unlock(nfs_bind_resv_mutex);
278 brreq->brr_error = nfs_bind_resv(brreq->brr_nmp);
279 wakeup(brreq);
280 lck_mtx_lock(nfs_bind_resv_mutex);
281 }
282 msleep((caddr_t)&nfs_bind_resv_request_queue,
283 nfs_bind_resv_mutex, PSOCK | PDROP,
284 "nfs_bind_resv_request_queue", 0);
285 }
286
287 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
288 (void) thread_terminate(current_thread());
289 }
290
291 int
292 nfs_bind_resv_thread_wake(void)
293 {
294 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING)
295 return (EIO);
296 wakeup(&nfs_bind_resv_request_queue);
297 return (0);
298 }
299
300 /*
301 * underprivileged procs call this to request nfs_bind_resv_thread
302 * to perform the reserved port binding for them.
303 */
304 static int
305 nfs_bind_resv_nopriv(struct nfsmount *nmp)
306 {
307 struct nfs_bind_resv_request brreq;
308 int error;
309
310 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING) {
311 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_INITTED) {
312 nfs_bind_resv_lck_grp_attr = lck_grp_attr_alloc_init();
313 lck_grp_attr_setstat(nfs_bind_resv_lck_grp_attr);
314 nfs_bind_resv_lck_grp = lck_grp_alloc_init("nfs_bind_resv", nfs_bind_resv_lck_grp_attr);
315 nfs_bind_resv_lck_attr = lck_attr_alloc_init();
316 nfs_bind_resv_mutex = lck_mtx_alloc_init(nfs_bind_resv_lck_grp, nfs_bind_resv_lck_attr);
317 TAILQ_INIT(&nfs_bind_resv_request_queue);
318 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
319 }
320 kernel_thread(kernel_task, nfs_bind_resv_thread);
321 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
322 }
323
324 brreq.brr_nmp = nmp;
325 brreq.brr_error = 0;
326
327 lck_mtx_lock(nfs_bind_resv_mutex);
328 TAILQ_INSERT_TAIL(&nfs_bind_resv_request_queue, &brreq, brr_chain);
329 lck_mtx_unlock(nfs_bind_resv_mutex);
330
331 error = nfs_bind_resv_thread_wake();
332 if (error) {
333 TAILQ_REMOVE(&nfs_bind_resv_request_queue, &brreq, brr_chain);
334 /* Note: we might be able to simply restart the thread */
335 return (error);
336 }
337
338 tsleep((caddr_t)&brreq, PSOCK, "nfsbindresv", 0);
339
340 return (brreq.brr_error);
341 }
342
343 /*
344 * Initialize sockets and congestion for a new NFS connection.
345 * We do not free the sockaddr if error.
346 */
347 int
348 nfs_connect(
349 struct nfsmount *nmp,
350 __unused struct nfsreq *rep)
351 {
352 socket_t so;
353 int error, rcvreserve, sndreserve;
354 struct sockaddr *saddr;
355 struct timeval timeo;
356
357 nmp->nm_so = 0;
358 saddr = mbuf_data(nmp->nm_nam);
359 error = sock_socket(saddr->sa_family, nmp->nm_sotype,
360 nmp->nm_soproto, 0, 0, &nmp->nm_so);
361 if (error) {
362 goto bad;
363 }
364 so = nmp->nm_so;
365
366 /*
367 * Some servers require that the client port be a reserved port number.
368 */
369 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
370 proc_t p;
371 /*
372 * sobind() requires current_proc() to have superuser privs.
373 * If this bind is part of a reconnect, and the current proc
374 * doesn't have superuser privs, we hand the sobind() off to
375 * a kernel thread to process.
376 */
377 if ((nmp->nm_state & NFSSTA_MOUNTED) &&
378 (p = current_proc()) && suser(kauth_cred_get(), 0)) {
379 /* request nfs_bind_resv_thread() to do bind */
380 error = nfs_bind_resv_nopriv(nmp);
381 } else {
382 error = nfs_bind_resv(nmp);
383 }
384 if (error)
385 goto bad;
386 }
387
388 /*
389 * Protocols that do not require connections may be optionally left
390 * unconnected for servers that reply from a port other than NFS_PORT.
391 */
392 if (nmp->nm_flag & NFSMNT_NOCONN) {
393 if (nmp->nm_sotype == SOCK_STREAM) {
394 error = ENOTCONN;
395 goto bad;
396 }
397 } else {
398 struct timeval tv;
399 tv.tv_sec = 2;
400 tv.tv_usec = 0;
401 error = sock_connect(so, mbuf_data(nmp->nm_nam), MSG_DONTWAIT);
402 if (error && error != EINPROGRESS) {
403 goto bad;
404 }
405
406 while ((error = sock_connectwait(so, &tv)) == EINPROGRESS) {
407 if (rep && (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
408 goto bad;
409 }
410 }
411 }
412
413 /*
414 * Always time out on recieve, this allows us to reconnect the
415 * socket to deal with network changes.
416 */
417 timeo.tv_usec = 0;
418 timeo.tv_sec = 2;
419 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
420 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
421 timeo.tv_sec = 5;
422 } else {
423 timeo.tv_sec = 0;
424 }
425 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
426
427 if (nmp->nm_sotype == SOCK_DGRAM) {
428 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
429 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
430 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
431 } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
432 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
433 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
434 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
435 } else {
436 int proto;
437 int on = 1;
438
439 sock_gettype(so, NULL, NULL, &proto);
440 if (nmp->nm_sotype != SOCK_STREAM)
441 panic("nfscon sotype");
442
443 // Assume that SOCK_STREAM always requires a connection
444 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
445
446 if (proto == IPPROTO_TCP) {
447 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
448 }
449
450 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 3;
451 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) *
452 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
453 }
454
455 if (sndreserve > NFS_MAXSOCKBUF)
456 sndreserve = NFS_MAXSOCKBUF;
457 if (rcvreserve > NFS_MAXSOCKBUF)
458 rcvreserve = NFS_MAXSOCKBUF;
459 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &sndreserve, sizeof(sndreserve));
460 if (error) {
461 goto bad;
462 }
463 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &rcvreserve, sizeof(rcvreserve));
464 if (error) {
465 goto bad;
466 }
467
468 sock_nointerrupt(so, 1);
469
470 /* Initialize other non-zero congestion variables */
471 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
472 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
473 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
474 nmp->nm_sdrtt[3] = 0;
475 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
476 nmp->nm_sent = 0;
477 FSDBG(529, nmp, nmp->nm_state, nmp->nm_soflags, nmp->nm_cwnd);
478 nmp->nm_timeouts = 0;
479 return (0);
480
481 bad:
482 nfs_disconnect(nmp);
483 return (error);
484 }
485
486 /*
487 * Reconnect routine:
488 * Called when a connection is broken on a reliable protocol.
489 * - clean up the old socket
490 * - nfs_connect() again
491 * - set R_MUSTRESEND for all outstanding requests on mount point
492 * If this fails the mount point is DEAD!
493 * nb: Must be called with the nfs_sndlock() set on the mount point.
494 */
495 static int
496 nfs_reconnect(struct nfsreq *rep)
497 {
498 struct nfsreq *rp;
499 struct nfsmount *nmp = rep->r_nmp;
500 int error;
501
502 nfs_disconnect(nmp);
503 while ((error = nfs_connect(nmp, rep))) {
504 if (error == EINTR || error == ERESTART)
505 return (EINTR);
506 if (error == EIO)
507 return (EIO);
508 nfs_down(rep->r_nmp, rep->r_procp, error, NFSSTA_TIMEO,
509 "can not connect");
510 rep->r_flags |= R_TPRINTFMSG;
511 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
512 /* we're not yet completely mounted and */
513 /* we can't reconnect, so we fail */
514 return (error);
515 }
516 if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp)))
517 return (error);
518 tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
519 }
520
521 /*
522 * Loop through outstanding request list and fix up all requests
523 * on old socket.
524 */
525 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
526 if (rp->r_nmp == nmp)
527 rp->r_flags |= R_MUSTRESEND;
528 }
529 return (0);
530 }
531
532 /*
533 * NFS disconnect. Clean up and unlink.
534 */
535 void
536 nfs_disconnect(struct nfsmount *nmp)
537 {
538 socket_t so;
539
540 if (nmp->nm_so) {
541 so = nmp->nm_so;
542 nmp->nm_so = 0;
543 sock_shutdown(so, 2);
544 sock_close(so);
545 }
546 }
547
548 /*
549 * This is the nfs send routine. For connection based socket types, it
550 * must be called with an nfs_sndlock() on the socket.
551 * "rep == NULL" indicates that it has been called from a server.
552 * For the client side:
553 * - return EINTR if the RPC is terminated, 0 otherwise
554 * - set R_MUSTRESEND if the send fails for any reason
555 * - do any cleanup required by recoverable socket errors (???)
556 * For the server side:
557 * - return EINTR or ERESTART if interrupted by a signal
558 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
559 * - do any cleanup required by recoverable socket errors (???)
560 */
561 int
562 nfs_send(so, nam, top, rep)
563 socket_t so;
564 mbuf_t nam;
565 mbuf_t top;
566 struct nfsreq *rep;
567 {
568 struct sockaddr *sendnam;
569 int error, error2, sotype, flags;
570 u_long xidqueued = 0;
571 struct nfsreq *rp;
572 char savenametolog[MAXPATHLEN];
573 struct msghdr msg;
574
575 if (rep) {
576 error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
577 if (error) {
578 mbuf_freem(top);
579 return (error);
580 }
581 if ((so = rep->r_nmp->nm_so) == NULL) {
582 rep->r_flags |= R_MUSTRESEND;
583 mbuf_freem(top);
584 return (0);
585 }
586 rep->r_flags &= ~R_MUSTRESEND;
587 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
588 if (rp == rep)
589 break;
590 if (rp)
591 xidqueued = rp->r_xid;
592 }
593 sock_gettype(so, NULL, &sotype, NULL);
594 if ((sotype == SOCK_STREAM) || (sock_isconnected(so)) ||
595 (nam == 0))
596 sendnam = (struct sockaddr *)0;
597 else
598 sendnam = mbuf_data(nam);
599
600 if (sotype == SOCK_SEQPACKET)
601 flags = MSG_EOR;
602 else
603 flags = 0;
604
605 /*
606 * Save the name here in case mount point goes away if we block.
607 * The name is using local stack and is large, but don't
608 * want to block if we malloc.
609 */
610 if (rep)
611 strncpy(savenametolog,
612 vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname,
613 MAXPATHLEN - 1);
614 bzero(&msg, sizeof(msg));
615 msg.msg_name = (caddr_t)sendnam;
616 msg.msg_namelen = sendnam == 0 ? 0 : sendnam->sa_len;
617 error = sock_sendmbuf(so, &msg, top, flags, NULL);
618
619 if (error) {
620 if (rep) {
621 if (xidqueued) {
622 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
623 if (rp == rep && rp->r_xid == xidqueued)
624 break;
625 if (!rp)
626 panic("nfs_send: error %d xid %x gone",
627 error, xidqueued);
628 }
629 log(LOG_INFO, "nfs send error %d for server %s\n",
630 error, savenametolog);
631 /*
632 * Deal with errors for the client side.
633 */
634 error2 = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
635 if (error2) {
636 error = error2;
637 } else {
638 rep->r_flags |= R_MUSTRESEND;
639 }
640 } else
641 log(LOG_INFO, "nfsd send error %d\n", error);
642
643 /*
644 * Handle any recoverable (soft) socket errors here. (???)
645 */
646 if (error != EINTR && error != ERESTART && error != EIO &&
647 error != EWOULDBLOCK && error != EPIPE) {
648 error = 0;
649 }
650 }
651 return (error);
652 }
653
654 /*
655 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
656 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
657 * Mark and consolidate the data into a new mbuf list.
658 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
659 * small mbufs.
660 * For SOCK_STREAM we must be very careful to read an entire record once
661 * we have read any of it, even if the system call has been interrupted.
662 */
663 static int
664 nfs_receive(struct nfsreq *rep, mbuf_t *mp)
665 {
666 socket_t so;
667 struct iovec_32 aio;
668 mbuf_t m, mlast;
669 u_long len, fraglen;
670 int error, error2, sotype;
671 proc_t p = current_proc(); /* XXX */
672 struct msghdr msg;
673 size_t rcvlen;
674 int lastfragment;
675
676 /*
677 * Set up arguments for soreceive()
678 */
679 *mp = NULL;
680 sotype = rep->r_nmp->nm_sotype;
681
682 /*
683 * For reliable protocols, lock against other senders/receivers
684 * in case a reconnect is necessary.
685 * For SOCK_STREAM, first get the Record Mark to find out how much
686 * more there is to get.
687 * We must lock the socket against other receivers
688 * until we have an entire rpc request/reply.
689 */
690 if (sotype != SOCK_DGRAM) {
691 error = nfs_sndlock(rep);
692 if (error)
693 return (error);
694 tryagain:
695 /*
696 * Check for fatal errors and resending request.
697 */
698 /*
699 * Ugh: If a reconnect attempt just happened, nm_so
700 * would have changed. NULL indicates a failed
701 * attempt that has essentially shut down this
702 * mount point.
703 */
704 if ((error = nfs_sigintr(rep->r_nmp, rep, p)) || rep->r_mrep) {
705 nfs_sndunlock(rep);
706 if (error)
707 return (error);
708 return (EINTR);
709 }
710 so = rep->r_nmp->nm_so;
711 if (!so) {
712 error = nfs_reconnect(rep);
713 if (error) {
714 nfs_sndunlock(rep);
715 return (error);
716 }
717 goto tryagain;
718 }
719 while (rep->r_flags & R_MUSTRESEND) {
720 error = mbuf_copym(rep->r_mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m);
721 if (!error) {
722 OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries);
723 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
724 }
725 /*
726 * we also hold rcv lock so rep is still
727 * legit this point
728 */
729 if (error) {
730 if (error == EINTR || error == ERESTART ||
731 (error = nfs_reconnect(rep))) {
732 nfs_sndunlock(rep);
733 return (error);
734 }
735 goto tryagain;
736 }
737 }
738 nfs_sndunlock(rep);
739 if (sotype == SOCK_STREAM) {
740 error = 0;
741 len = 0;
742 lastfragment = 0;
743 mlast = NULL;
744 while (!error && !lastfragment) {
745 aio.iov_base = (uintptr_t) &fraglen;
746 aio.iov_len = sizeof(u_long);
747 bzero(&msg, sizeof(msg));
748 msg.msg_iov = (struct iovec *) &aio;
749 msg.msg_iovlen = 1;
750 do {
751 error = sock_receive(so, &msg, MSG_WAITALL, &rcvlen);
752 if (!rep->r_nmp) /* if unmounted then bailout */
753 goto shutout;
754 if (error == EWOULDBLOCK && rep) {
755 error2 = nfs_sigintr(rep->r_nmp, rep, p);
756 if (error2)
757 error = error2;
758 }
759 } while (error == EWOULDBLOCK);
760 if (!error && rcvlen < aio.iov_len) {
761 /* only log a message if we got a partial word */
762 if (rcvlen != 0)
763 log(LOG_INFO,
764 "short receive (%d/%d) from nfs server %s\n",
765 rcvlen, sizeof(u_long),
766 vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname);
767 error = EPIPE;
768 }
769 if (error)
770 goto errout;
771 lastfragment = ntohl(fraglen) & 0x80000000;
772 fraglen = ntohl(fraglen) & ~0x80000000;
773 len += fraglen;
774 /*
775 * This is SERIOUS! We are out of sync with the sender
776 * and forcing a disconnect/reconnect is all I can do.
777 */
778 if (len > NFS_MAXPACKET) {
779 log(LOG_ERR, "%s (%d) from nfs server %s\n",
780 "impossible RPC record length", len,
781 vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname);
782 error = EFBIG;
783 goto errout;
784 }
785
786 m = NULL;
787 do {
788 rcvlen = fraglen;
789 error = sock_receivembuf(so, NULL, &m, MSG_WAITALL, &rcvlen);
790 if (!rep->r_nmp) /* if unmounted then bailout */ {
791 goto shutout;
792 }
793 } while (error == EWOULDBLOCK || error == EINTR ||
794 error == ERESTART);
795
796 if (!error && fraglen > rcvlen) {
797 log(LOG_INFO,
798 "short receive (%d/%d) from nfs server %s\n",
799 rcvlen, fraglen,
800 vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname);
801 error = EPIPE;
802 mbuf_freem(m);
803 }
804 if (!error) {
805 if (!*mp) {
806 *mp = m;
807 mlast = m;
808 } else {
809 error = mbuf_setnext(mlast, m);
810 if (error) {
811 printf("nfs_receive: mbuf_setnext failed %d\n", error);
812 mbuf_freem(m);
813 }
814 }
815 while (mbuf_next(mlast))
816 mlast = mbuf_next(mlast);
817 }
818 }
819 } else {
820 bzero(&msg, sizeof(msg));
821 do {
822 rcvlen = 100000000;
823 error = sock_receivembuf(so, &msg, mp, 0, &rcvlen);
824 if (!rep->r_nmp) /* if unmounted then bailout */ {
825 goto shutout;
826 }
827 if (error == EWOULDBLOCK && rep) {
828 error2 = nfs_sigintr(rep->r_nmp, rep, p);
829 if (error2) {
830 return (error2);
831 }
832 }
833 } while (error == EWOULDBLOCK);
834
835 if ((msg.msg_flags & MSG_EOR) == 0)
836 printf("Egad!!\n");
837 if (!error && *mp == NULL)
838 error = EPIPE;
839 len = rcvlen;
840 }
841 errout:
842 if (error && error != EINTR && error != ERESTART) {
843 mbuf_freem(*mp);
844 *mp = NULL;
845 if (error != EPIPE)
846 log(LOG_INFO,
847 "receive error %d from nfs server %s\n", error,
848 vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname);
849 error = nfs_sndlock(rep);
850 if (!error) {
851 error = nfs_reconnect(rep);
852 if (!error)
853 goto tryagain;
854 nfs_sndunlock(rep);
855 }
856 }
857 } else {
858 /*
859 * We could have failed while rebinding the datagram socket
860 * so we need to attempt to rebind here.
861 */
862 if ((so = rep->r_nmp->nm_so) == NULL) {
863 error = nfs_sndlock(rep);
864 if (!error) {
865 error = nfs_reconnect(rep);
866 nfs_sndunlock(rep);
867 }
868 if (error)
869 return (error);
870 if (!rep->r_nmp) /* if unmounted then bailout */
871 return (ENXIO);
872 so = rep->r_nmp->nm_so;
873 }
874 bzero(&msg, sizeof(msg));
875 len = 0;
876 do {
877 rcvlen = 1000000;
878 error = sock_receivembuf(so, &msg, mp, 0, &rcvlen);
879 if (!rep->r_nmp) /* if unmounted then bailout */
880 goto shutout;
881 if (error) {
882 error2 = nfs_sigintr(rep->r_nmp, rep, p);
883 if (error2) {
884 error = error2;
885 goto shutout;
886 }
887 }
888 /* Reconnect for all errors. We may be receiving
889 * soft/hard/blocking errors because of a network
890 * change.
891 * XXX: we should rate limit or delay this
892 * to once every N attempts or something.
893 * although TCP doesn't seem to.
894 */
895 if (error) {
896 error2 = nfs_sndlock(rep);
897 if (!error2) {
898 error2 = nfs_reconnect(rep);
899 if (error2)
900 error = error2;
901 else if (!rep->r_nmp) /* if unmounted then bailout */
902 error = ENXIO;
903 else
904 so = rep->r_nmp->nm_so;
905 nfs_sndunlock(rep);
906 } else {
907 error = error2;
908 }
909 }
910 } while (error == EWOULDBLOCK);
911 }
912 shutout:
913 if (error) {
914 mbuf_freem(*mp);
915 *mp = NULL;
916 }
917 return (error);
918 }
919
920 /*
921 * Implement receipt of reply on a socket.
922 * We must search through the list of received datagrams matching them
923 * with outstanding requests using the xid, until ours is found.
924 */
925 /* ARGSUSED */
926 int
927 nfs_reply(myrep)
928 struct nfsreq *myrep;
929 {
930 struct nfsreq *rep;
931 struct nfsmount *nmp = myrep->r_nmp;
932 long t1;
933 mbuf_t mrep, md;
934 u_long rxid, *tl;
935 caddr_t dpos, cp2;
936 int error;
937
938 /*
939 * Loop around until we get our own reply
940 */
941 for (;;) {
942 /*
943 * Lock against other receivers so that I don't get stuck in
944 * sbwait() after someone else has received my reply for me.
945 * Also necessary for connection based protocols to avoid
946 * race conditions during a reconnect.
947 * If nfs_rcvlock() returns EALREADY, that means that
948 * the reply has already been recieved by another
949 * process and we can return immediately. In this
950 * case, the lock is not taken to avoid races with
951 * other processes.
952 */
953 error = nfs_rcvlock(myrep);
954 if (error == EALREADY)
955 return (0);
956 if (error)
957 return (error);
958
959 /*
960 * If we slept after putting bits otw, then reply may have
961 * arrived. In which case returning is required, or we
962 * would hang trying to nfs_receive an already received reply.
963 */
964 if (myrep->r_mrep != NULL) {
965 nfs_rcvunlock(myrep);
966 FSDBG(530, myrep->r_xid, myrep, myrep->r_nmp, -1);
967 return (0);
968 }
969 /*
970 * Get the next Rpc reply off the socket. Assume myrep->r_nmp
971 * is still intact by checks done in nfs_rcvlock.
972 */
973 error = nfs_receive(myrep, &mrep);
974 /*
975 * Bailout asap if nfsmount struct gone (unmounted).
976 */
977 if (!myrep->r_nmp) {
978 FSDBG(530, myrep->r_xid, myrep, nmp, -2);
979 if (mrep)
980 mbuf_freem(mrep);
981 return (ENXIO);
982 }
983 if (error) {
984 FSDBG(530, myrep->r_xid, myrep, nmp, error);
985 nfs_rcvunlock(myrep);
986
987 /* Bailout asap if nfsmount struct gone (unmounted). */
988 if (!myrep->r_nmp) {
989 if (mrep)
990 mbuf_freem(mrep);
991 return (ENXIO);
992 }
993
994 /*
995 * Ignore routing errors on connectionless protocols??
996 */
997 if (NFSIGNORE_SOERROR(nmp->nm_sotype, error)) {
998 if (nmp->nm_so) {
999 int clearerror;
1000 int optlen = sizeof(clearerror);
1001 sock_getsockopt(nmp->nm_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
1002 }
1003 continue;
1004 }
1005 if (mrep)
1006 mbuf_freem(mrep);
1007 return (error);
1008 }
1009
1010 /*
1011 * We assume all is fine, but if we did not have an error
1012 * and mrep is 0, better not dereference it. nfs_receive
1013 * calls soreceive which carefully sets error=0 when it got
1014 * errors on sbwait (tsleep). In most cases, I assume that's
1015 * so we could go back again. In tcp case, EPIPE is returned.
1016 * In udp, case nfs_receive gets back here with no error and no
1017 * mrep. Is the right fix to have soreceive check for process
1018 * aborted after sbwait and return something non-zero? Should
1019 * nfs_receive give an EPIPE? Too risky to play with those
1020 * two this late in game for a shutdown problem. Instead,
1021 * just check here and get out. (ekn)
1022 */
1023 if (!mrep) {
1024 nfs_rcvunlock(myrep);
1025 FSDBG(530, myrep->r_xid, myrep, nmp, -3);
1026 return (ENXIO); /* sounds good */
1027 }
1028
1029 /*
1030 * Get the xid and check that it is an rpc reply
1031 */
1032 md = mrep;
1033 dpos = mbuf_data(md);
1034 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
1035 rxid = *tl++;
1036 if (*tl != rpc_reply) {
1037 OSAddAtomic(1, (SInt32*)&nfsstats.rpcinvalid);
1038 mbuf_freem(mrep);
1039 nfsmout:
1040 if (nmp->nm_state & NFSSTA_RCVLOCK)
1041 nfs_rcvunlock(myrep);
1042 continue;
1043 }
1044
1045 /*
1046 * Loop through the request list to match up the reply
1047 * Iff no match, just drop the datagram
1048 */
1049 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
1050 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
1051 /* Found it.. */
1052 rep->r_mrep = mrep;
1053 rep->r_md = md;
1054 rep->r_dpos = dpos;
1055 /*
1056 * If we're tracking the round trip time
1057 * then we update the circular log here
1058 * with the stats from our current request.
1059 */
1060 if (nfsrtton) {
1061 struct rttl *rt;
1062
1063 rt = &nfsrtt.rttl[nfsrtt.pos];
1064 rt->proc = rep->r_procnum;
1065 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
1066 rt->sent = nmp->nm_sent;
1067 rt->cwnd = nmp->nm_cwnd;
1068 if (proct[rep->r_procnum] == 0)
1069 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1070 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
1071 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
1072 rt->fsid = vfs_statfs(nmp->nm_mountp)->f_fsid;
1073 microtime(&rt->tstamp); // XXX unused
1074 if (rep->r_flags & R_TIMING)
1075 rt->rtt = rep->r_rtt;
1076 else
1077 rt->rtt = 1000000;
1078 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
1079 }
1080 /*
1081 * Update congestion window.
1082 * Do the additive increase of
1083 * one rpc/rtt.
1084 */
1085 FSDBG(530, rep->r_xid, rep, nmp->nm_sent,
1086 nmp->nm_cwnd);
1087 if (nmp->nm_cwnd <= nmp->nm_sent) {
1088 nmp->nm_cwnd +=
1089 (NFS_CWNDSCALE * NFS_CWNDSCALE +
1090 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
1091 if (nmp->nm_cwnd > NFS_MAXCWND)
1092 nmp->nm_cwnd = NFS_MAXCWND;
1093 }
1094 if (rep->r_flags & R_SENT) {
1095 rep->r_flags &= ~R_SENT;
1096 nmp->nm_sent -= NFS_CWNDSCALE;
1097 }
1098 /*
1099 * Update rtt using a gain of 0.125 on the mean
1100 * and a gain of 0.25 on the deviation.
1101 */
1102 if (rep->r_flags & R_TIMING) {
1103 /*
1104 * Since the timer resolution of
1105 * NFS_HZ is so course, it can often
1106 * result in r_rtt == 0. Since
1107 * r_rtt == N means that the actual
1108 * rtt is between N+dt and N+2-dt ticks,
1109 * add 1.
1110 */
1111 if (proct[rep->r_procnum] == 0)
1112 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1113 t1 = rep->r_rtt + 1;
1114 t1 -= (NFS_SRTT(rep) >> 3);
1115 NFS_SRTT(rep) += t1;
1116 if (t1 < 0)
1117 t1 = -t1;
1118 t1 -= (NFS_SDRTT(rep) >> 2);
1119 NFS_SDRTT(rep) += t1;
1120 }
1121 nmp->nm_timeouts = 0;
1122 break;
1123 }
1124 }
1125 nfs_rcvunlock(myrep);
1126 /*
1127 * If not matched to a request, drop it.
1128 * If it's mine, get out.
1129 */
1130 if (rep == 0) {
1131 OSAddAtomic(1, (SInt32*)&nfsstats.rpcunexpected);
1132 mbuf_freem(mrep);
1133 } else if (rep == myrep) {
1134 if (rep->r_mrep == NULL)
1135 panic("nfs_reply: nil r_mrep");
1136 return (0);
1137 }
1138 FSDBG(530, myrep->r_xid, myrep, rep,
1139 rep ? rep->r_xid : myrep->r_flags);
1140 }
1141 }
1142
1143 /*
1144 * nfs_request - goes something like this
1145 * - fill in request struct
1146 * - links it into list
1147 * - calls nfs_send() for first transmit
1148 * - calls nfs_receive() to get reply
1149 * - break down rpc header and return with nfs reply pointed to
1150 * by mrep or error
1151 * nb: always frees up mreq mbuf list
1152 */
1153 int
1154 nfs_request(vp, mp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp)
1155 vnode_t vp;
1156 mount_t mp;
1157 mbuf_t mrest;
1158 int procnum;
1159 proc_t procp;
1160 kauth_cred_t cred;
1161 mbuf_t *mrp;
1162 mbuf_t *mdp;
1163 caddr_t *dposp;
1164 u_int64_t *xidp;
1165 {
1166 mbuf_t m, mrep, m2;
1167 struct nfsreq re, *rep;
1168 u_long *tl;
1169 int i;
1170 struct nfsmount *nmp;
1171 mbuf_t md, mheadend;
1172 char nickv[RPCX_NICKVERF];
1173 time_t waituntil;
1174 caddr_t dpos, cp2;
1175 int t1, error = 0, mrest_len, auth_len, auth_type;
1176 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0;
1177 int verf_len, verf_type;
1178 u_long xid;
1179 char *auth_str, *verf_str;
1180 NFSKERBKEY_T key; /* save session key */
1181 int nmsotype;
1182 struct timeval now;
1183
1184 if (mrp)
1185 *mrp = NULL;
1186 if (xidp)
1187 *xidp = 0;
1188 nmp = VFSTONFS(mp);
1189
1190 rep = &re;
1191
1192 if (vp)
1193 nmp = VFSTONFS(vnode_mount(vp));
1194 if (nmp == NULL ||
1195 (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1196 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
1197 mbuf_freem(mrest);
1198 return (ENXIO);
1199 }
1200 nmsotype = nmp->nm_sotype;
1201
1202 FSDBG_TOP(531, vp, procnum, nmp, rep);
1203
1204 rep->r_nmp = nmp;
1205 rep->r_vp = vp;
1206 rep->r_procp = procp;
1207 rep->r_procnum = procnum;
1208 microuptime(&now);
1209 rep->r_lastmsg = now.tv_sec -
1210 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
1211 i = 0;
1212 m = mrest;
1213 while (m) {
1214 i += mbuf_len(m);
1215 m = mbuf_next(m);
1216 }
1217 mrest_len = i;
1218
1219 /*
1220 * Get the RPC header with authorization.
1221 */
1222 kerbauth:
1223 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1224 if (!nmp) {
1225 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1226 mbuf_freem(mrest);
1227 return (ENXIO);
1228 }
1229 verf_str = auth_str = (char *)0;
1230 if (nmp->nm_flag & NFSMNT_KERB) {
1231 verf_str = nickv;
1232 verf_len = sizeof (nickv);
1233 auth_type = RPCAUTH_KERB4;
1234 bzero((caddr_t)key, sizeof (key));
1235 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
1236 &auth_len, verf_str, verf_len)) {
1237 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1238 if (!nmp) {
1239 FSDBG_BOT(531, 2, vp, error, rep);
1240 mbuf_freem(mrest);
1241 return (ENXIO);
1242 }
1243 error = nfs_getauth(nmp, rep, cred, &auth_str,
1244 &auth_len, verf_str, &verf_len, key);
1245 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1246 if (!error && !nmp)
1247 error = ENXIO;
1248 if (error) {
1249 FSDBG_BOT(531, 2, vp, error, rep);
1250 mbuf_freem(mrest);
1251 return (error);
1252 }
1253 }
1254 } else {
1255 auth_type = RPCAUTH_UNIX;
1256 if (cred->cr_ngroups < 1)
1257 panic("nfsreq nogrps");
1258 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1259 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
1260 5 * NFSX_UNSIGNED;
1261 }
1262 error = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
1263 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid, &m);
1264 if (auth_str)
1265 _FREE(auth_str, M_TEMP);
1266 if (error) {
1267 mbuf_freem(mrest);
1268 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1269 return (error);
1270 }
1271 if (xidp)
1272 *xidp = ntohl(xid) + ((u_int64_t)nfs_xidwrap << 32);
1273
1274 /*
1275 * For stream protocols, insert a Sun RPC Record Mark.
1276 */
1277 if (nmsotype == SOCK_STREAM) {
1278 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1279 if (error) {
1280 mbuf_freem(m);
1281 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1282 return (error);
1283 }
1284 *((u_long*)mbuf_data(m)) =
1285 htonl(0x80000000 | (mbuf_pkthdr_len(m) - NFSX_UNSIGNED));
1286 }
1287 rep->r_mreq = m;
1288 rep->r_xid = xid;
1289 tryagain:
1290 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1291 if (nmp && (nmp->nm_flag & NFSMNT_SOFT))
1292 rep->r_retry = nmp->nm_retry;
1293 else
1294 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1295 rep->r_rtt = rep->r_rexmit = 0;
1296 if (proct[procnum] > 0)
1297 rep->r_flags = R_TIMING;
1298 else
1299 rep->r_flags = 0;
1300 rep->r_mrep = NULL;
1301
1302 /*
1303 * Do the client side RPC.
1304 */
1305 OSAddAtomic(1, (SInt32*)&nfsstats.rpcrequests);
1306 /*
1307 * Chain request into list of outstanding requests. Be sure
1308 * to put it LAST so timer finds oldest requests first.
1309 */
1310 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
1311
1312 /*
1313 * If backing off another request or avoiding congestion, don't
1314 * send this one now but let timer do it. If not timing a request,
1315 * do it now.
1316 */
1317 if (nmp && nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
1318 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1319 nmp->nm_sent < nmp->nm_cwnd)) {
1320 int connrequired = (nmp->nm_sotype == SOCK_STREAM);
1321
1322 if (connrequired)
1323 error = nfs_sndlock(rep);
1324
1325 /*
1326 * Set the R_SENT before doing the send in case another thread
1327 * processes the reply before the nfs_send returns here
1328 */
1329 if (!error) {
1330 if ((rep->r_flags & R_MUSTRESEND) == 0) {
1331 FSDBG(531, rep->r_xid, rep, nmp->nm_sent,
1332 nmp->nm_cwnd);
1333 nmp->nm_sent += NFS_CWNDSCALE;
1334 rep->r_flags |= R_SENT;
1335 }
1336
1337 error = mbuf_copym(m, 0, MBUF_COPYALL, MBUF_WAITOK, &m2);
1338 if (!error)
1339 error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
1340 if (connrequired)
1341 nfs_sndunlock(rep);
1342 }
1343 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1344 if (error) {
1345 if (nmp)
1346 nmp->nm_sent -= NFS_CWNDSCALE;
1347 rep->r_flags &= ~R_SENT;
1348 }
1349 } else {
1350 rep->r_rtt = -1;
1351 }
1352
1353 /*
1354 * Wait for the reply from our send or the timer's.
1355 */
1356 if (!error || error == EPIPE)
1357 error = nfs_reply(rep);
1358
1359 /*
1360 * RPC done, unlink the request.
1361 */
1362 nfs_repdequeue(rep);
1363
1364 nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp;
1365
1366 /*
1367 * Decrement the outstanding request count.
1368 */
1369 if (rep->r_flags & R_SENT) {
1370 rep->r_flags &= ~R_SENT; /* paranoia */
1371 if (nmp) {
1372 FSDBG(531, rep->r_xid, rep, nmp->nm_sent, nmp->nm_cwnd);
1373 nmp->nm_sent -= NFS_CWNDSCALE;
1374 }
1375 }
1376
1377 /*
1378 * If there was a successful reply and a tprintf msg.
1379 * tprintf a response.
1380 */
1381 if (!error)
1382 nfs_up(nmp, procp, NFSSTA_TIMEO,
1383 (rep->r_flags & R_TPRINTFMSG) ? "is alive again" : NULL);
1384 mrep = rep->r_mrep;
1385 md = rep->r_md;
1386 dpos = rep->r_dpos;
1387 if (!error && !nmp)
1388 error = ENXIO;
1389 if (error) {
1390 mbuf_freem(rep->r_mreq);
1391 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1392 return (error);
1393 }
1394
1395 /*
1396 * break down the rpc header and check if ok
1397 */
1398 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
1399 if (*tl++ == rpc_msgdenied) {
1400 if (*tl == rpc_mismatch)
1401 error = EOPNOTSUPP;
1402 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1403 if (!failed_auth) {
1404 failed_auth++;
1405 error = mbuf_setnext(mheadend, NULL);
1406 mbuf_freem(mrep);
1407 mbuf_freem(rep->r_mreq);
1408 if (!error)
1409 goto kerbauth;
1410 printf("nfs_request: mbuf_setnext failed\n");
1411 } else
1412 error = EAUTH;
1413 } else
1414 error = EACCES;
1415 mbuf_freem(mrep);
1416 mbuf_freem(rep->r_mreq);
1417 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1418 return (error);
1419 }
1420
1421 /*
1422 * Grab any Kerberos verifier, otherwise just throw it away.
1423 */
1424 verf_type = fxdr_unsigned(int, *tl++);
1425 i = fxdr_unsigned(int, *tl);
1426 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1427 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1428 if (error)
1429 goto nfsmout;
1430 } else if (i > 0)
1431 nfsm_adv(nfsm_rndup(i));
1432 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1433 /* 0 == ok */
1434 if (*tl == 0) {
1435 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1436 if (*tl != 0) {
1437 error = fxdr_unsigned(int, *tl);
1438 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1439 error == NFSERR_TRYLATER) {
1440 mbuf_freem(mrep);
1441 error = 0;
1442 microuptime(&now);
1443 waituntil = now.tv_sec + trylater_delay;
1444 while (now.tv_sec < waituntil) {
1445 tsleep((caddr_t)&lbolt, PSOCK, "nfstrylater", 0);
1446 microuptime(&now);
1447 }
1448 trylater_delay *= 2;
1449 if (trylater_delay > 60)
1450 trylater_delay = 60;
1451 goto tryagain;
1452 }
1453
1454 /*
1455 * If the File Handle was stale, invalidate the
1456 * lookup cache, just in case.
1457 */
1458 if ((error == ESTALE) && vp)
1459 cache_purge(vp);
1460 if (nmp->nm_flag & NFSMNT_NFSV3) {
1461 *mrp = mrep;
1462 *mdp = md;
1463 *dposp = dpos;
1464 error |= NFSERR_RETERR;
1465 } else {
1466 mbuf_freem(mrep);
1467 error &= ~NFSERR_RETERR;
1468 }
1469 mbuf_freem(rep->r_mreq);
1470 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1471 return (error);
1472 }
1473
1474 *mrp = mrep;
1475 *mdp = md;
1476 *dposp = dpos;
1477 mbuf_freem(rep->r_mreq);
1478 FSDBG_BOT(531, 0xf0f0f0f0, rep->r_xid, nmp, rep);
1479 return (0);
1480 }
1481 mbuf_freem(mrep);
1482 error = EPROTONOSUPPORT;
1483 nfsmout:
1484 mbuf_freem(rep->r_mreq);
1485 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1486 return (error);
1487 }
1488
1489 #ifndef NFS_NOSERVER
1490 /*
1491 * Generate the rpc reply header
1492 * siz arg. is used to decide if adding a cluster is worthwhile
1493 */
1494 int
1495 nfs_rephead(siz, nd, slp, err, mrq, mbp, bposp)
1496 int siz;
1497 struct nfsrv_descript *nd;
1498 struct nfssvc_sock *slp;
1499 int err;
1500 mbuf_t *mrq;
1501 mbuf_t *mbp;
1502 caddr_t *bposp;
1503 {
1504 u_long *tl;
1505 mbuf_t mreq;
1506 caddr_t bpos;
1507 mbuf_t mb, mb2;
1508 int error, mlen;
1509
1510 /*
1511 * If this is a big reply, use a cluster else
1512 * try and leave leading space for the lower level headers.
1513 */
1514 siz += RPC_REPLYSIZ;
1515 if (siz >= nfs_mbuf_minclsize) {
1516 error = mbuf_getpacket(MBUF_WAITOK, &mreq);
1517 } else {
1518 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mreq);
1519 }
1520 if (error) {
1521 /* unable to allocate packet */
1522 /* XXX nfsstat? */
1523 return (error);
1524 }
1525 mb = mreq;
1526 tl = mbuf_data(mreq);
1527 mlen = 6 * NFSX_UNSIGNED;
1528 if (siz < nfs_mbuf_minclsize) {
1529 /* leave space for lower level headers */
1530 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */
1531 mbuf_setdata(mreq, tl, mlen);
1532 } else {
1533 mbuf_setlen(mreq, mlen);
1534 }
1535 bpos = ((caddr_t)tl) + mlen;
1536 *tl++ = txdr_unsigned(nd->nd_retxid);
1537 *tl++ = rpc_reply;
1538 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1539 *tl++ = rpc_msgdenied;
1540 if (err & NFSERR_AUTHERR) {
1541 *tl++ = rpc_autherr;
1542 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1543 mlen -= NFSX_UNSIGNED;
1544 mbuf_setlen(mreq, mlen);
1545 bpos -= NFSX_UNSIGNED;
1546 } else {
1547 *tl++ = rpc_mismatch;
1548 *tl++ = txdr_unsigned(RPC_VER2);
1549 *tl = txdr_unsigned(RPC_VER2);
1550 }
1551 } else {
1552 *tl++ = rpc_msgaccepted;
1553
1554 /*
1555 * For Kerberos authentication, we must send the nickname
1556 * verifier back, otherwise just RPCAUTH_NULL.
1557 */
1558 if (nd->nd_flag & ND_KERBFULL) {
1559 struct nfsuid *nuidp;
1560 struct timeval ktvin, ktvout;
1561 uid_t uid = kauth_cred_getuid(nd->nd_cr);
1562
1563 lck_rw_lock_shared(&slp->ns_rwlock);
1564 for (nuidp = NUIDHASH(slp, uid)->lh_first;
1565 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1566 if (kauth_cred_getuid(nuidp->nu_cr) == uid &&
1567 (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
1568 &nuidp->nu_haddr, nd->nd_nam2)))
1569 break;
1570 }
1571 if (nuidp) {
1572 ktvin.tv_sec =
1573 txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
1574 ktvin.tv_usec =
1575 txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1576
1577 /*
1578 * Encrypt the timestamp in ecb mode using the
1579 * session key.
1580 */
1581 #if NFSKERB
1582 XXX
1583 #endif
1584
1585 *tl++ = rpc_auth_kerb;
1586 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1587 *tl = ktvout.tv_sec;
1588 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1589 *tl++ = ktvout.tv_usec;
1590 *tl++ = txdr_unsigned(kauth_cred_getuid(nuidp->nu_cr));
1591 } else {
1592 *tl++ = 0;
1593 *tl++ = 0;
1594 }
1595 lck_rw_done(&slp->ns_rwlock);
1596 } else {
1597 *tl++ = 0;
1598 *tl++ = 0;
1599 }
1600 switch (err) {
1601 case EPROGUNAVAIL:
1602 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1603 break;
1604 case EPROGMISMATCH:
1605 *tl = txdr_unsigned(RPC_PROGMISMATCH);
1606 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1607 // XXX hard coded versions
1608 *tl++ = txdr_unsigned(2);
1609 *tl = txdr_unsigned(3);
1610 break;
1611 case EPROCUNAVAIL:
1612 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1613 break;
1614 case EBADRPC:
1615 *tl = txdr_unsigned(RPC_GARBAGE);
1616 break;
1617 default:
1618 *tl = 0;
1619 if (err != NFSERR_RETVOID) {
1620 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1621 if (err)
1622 *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1623 else
1624 *tl = 0;
1625 }
1626 break;
1627 }
1628 }
1629
1630 if (mrq != NULL)
1631 *mrq = mreq;
1632 *mbp = mb;
1633 *bposp = bpos;
1634 if (err != 0 && err != NFSERR_RETVOID) {
1635 OSAddAtomic(1, (SInt32*)&nfsstats.srvrpc_errs);
1636 }
1637 return (0);
1638 }
1639
1640
1641 #endif /* NFS_NOSERVER */
1642
1643
1644 /*
1645 * From FreeBSD 1.58, a Matt Dillon fix...
1646 * Flag a request as being about to terminate.
1647 * The nm_sent count is decremented now to avoid deadlocks when the process
1648 * in soreceive() hasn't yet managed to send its own request.
1649 */
1650 static void
1651 nfs_softterm(struct nfsreq *rep)
1652 {
1653
1654 rep->r_flags |= R_SOFTTERM;
1655 if (rep->r_flags & R_SENT) {
1656 FSDBG(532, rep->r_xid, rep, rep->r_nmp->nm_sent,
1657 rep->r_nmp->nm_cwnd);
1658 rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
1659 rep->r_flags &= ~R_SENT;
1660 }
1661 }
1662
1663 void
1664 nfs_timer_funnel(void * arg)
1665 {
1666 (void) thread_funnel_set(kernel_flock, TRUE);
1667 nfs_timer(arg);
1668 (void) thread_funnel_set(kernel_flock, FALSE);
1669
1670 }
1671
1672 /*
1673 * Ensure rep isn't in use by the timer, then dequeue it.
1674 */
1675 static void
1676 nfs_repdequeue(struct nfsreq *rep)
1677 {
1678
1679 while ((rep->r_flags & R_BUSY)) {
1680 rep->r_flags |= R_WAITING;
1681 tsleep(rep, PSOCK, "repdeq", 0);
1682 }
1683 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
1684 }
1685
1686 /*
1687 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
1688 * free()'d out from under it.
1689 */
1690 static void
1691 nfs_repbusy(struct nfsreq *rep)
1692 {
1693
1694 if ((rep->r_flags & R_BUSY))
1695 panic("rep locked");
1696 rep->r_flags |= R_BUSY;
1697 }
1698
1699 /*
1700 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
1701 */
1702 static struct nfsreq *
1703 nfs_repnext(struct nfsreq *rep)
1704 {
1705 struct nfsreq * nextrep;
1706
1707 if (rep == NULL)
1708 return (NULL);
1709 /*
1710 * We need to get and busy the next req before signalling the
1711 * current one, otherwise wakeup() may block us and we'll race to
1712 * grab the next req.
1713 */
1714 nextrep = TAILQ_NEXT(rep, r_chain);
1715 if (nextrep != NULL)
1716 nfs_repbusy(nextrep);
1717 /* unbusy and signal. */
1718 rep->r_flags &= ~R_BUSY;
1719 if ((rep->r_flags & R_WAITING)) {
1720 rep->r_flags &= ~R_WAITING;
1721 wakeup(rep);
1722 }
1723 return (nextrep);
1724 }
1725
1726 /*
1727 * Nfs timer routine
1728 * Scan the nfsreq list and retranmit any requests that have timed out
1729 * To avoid retransmission attempts on STREAM sockets (in the future) make
1730 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1731 */
1732 void
1733 nfs_timer(__unused void *arg)
1734 {
1735 struct nfsreq *rep;
1736 mbuf_t m;
1737 socket_t so;
1738 struct nfsmount *nmp;
1739 int timeo;
1740 int error;
1741 #ifndef NFS_NOSERVER
1742 struct nfssvc_sock *slp;
1743 u_quad_t cur_usec;
1744 #endif /* NFS_NOSERVER */
1745 int flags, rexmit, cwnd, sent;
1746 u_long xid;
1747 struct timeval now;
1748
1749 rep = TAILQ_FIRST(&nfs_reqq);
1750 if (rep != NULL)
1751 nfs_repbusy(rep);
1752 microuptime(&now);
1753 for ( ; rep != NULL ; rep = nfs_repnext(rep)) {
1754 nmp = rep->r_nmp;
1755 if (!nmp) /* unmounted */
1756 continue;
1757 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1758 continue;
1759 if (nfs_sigintr(nmp, rep, rep->r_procp))
1760 continue;
1761 if (nmp->nm_tprintf_initial_delay != 0 &&
1762 (rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) &&
1763 rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
1764 rep->r_lastmsg = now.tv_sec;
1765 nfs_down(rep->r_nmp, rep->r_procp, 0, NFSSTA_TIMEO,
1766 "not responding");
1767 rep->r_flags |= R_TPRINTFMSG;
1768 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
1769 /* we're not yet completely mounted and */
1770 /* we can't complete an RPC, so we fail */
1771 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
1772 nfs_softterm(rep);
1773 continue;
1774 }
1775 }
1776 if (rep->r_rtt >= 0) {
1777 rep->r_rtt++;
1778 if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1779 timeo = nmp->nm_timeo;
1780 else
1781 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
1782 /* ensure 62.5 ms floor */
1783 while (16 * timeo < hz)
1784 timeo *= 2;
1785 if (nmp->nm_timeouts > 0)
1786 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1787 if (rep->r_rtt <= timeo)
1788 continue;
1789 if (nmp->nm_timeouts < 8)
1790 nmp->nm_timeouts++;
1791 }
1792 /*
1793 * Check for too many retransmits. This is never true for
1794 * 'hard' mounts because we set r_retry to NFS_MAXREXMIT + 1
1795 * and never allow r_rexmit to be more than NFS_MAXREXMIT.
1796 */
1797 if (rep->r_rexmit >= rep->r_retry) { /* too many */
1798 OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts);
1799 nfs_softterm(rep);
1800 continue;
1801 }
1802 if (nmp->nm_sotype != SOCK_DGRAM) {
1803 if (++rep->r_rexmit > NFS_MAXREXMIT)
1804 rep->r_rexmit = NFS_MAXREXMIT;
1805 continue;
1806 }
1807 if ((so = nmp->nm_so) == NULL)
1808 continue;
1809
1810 /*
1811 * If there is enough space and the window allows..
1812 * Resend it
1813 * Set r_rtt to -1 in case we fail to send it now.
1814 */
1815 rep->r_rtt = -1;
1816 if (((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1817 (rep->r_flags & R_SENT) ||
1818 nmp->nm_sent < nmp->nm_cwnd) &&
1819 (mbuf_copym(rep->r_mreq, 0, MBUF_COPYALL, MBUF_DONTWAIT, &m) == 0)){
1820 struct msghdr msg;
1821 /*
1822 * Iff first send, start timing
1823 * else turn timing off, backoff timer
1824 * and divide congestion window by 2.
1825 * We update these *before* the send to avoid
1826 * racing against receiving the reply.
1827 * We save them so we can restore them on send error.
1828 */
1829 flags = rep->r_flags;
1830 rexmit = rep->r_rexmit;
1831 cwnd = nmp->nm_cwnd;
1832 sent = nmp->nm_sent;
1833 xid = rep->r_xid;
1834 if (rep->r_flags & R_SENT) {
1835 rep->r_flags &= ~R_TIMING;
1836 if (++rep->r_rexmit > NFS_MAXREXMIT)
1837 rep->r_rexmit = NFS_MAXREXMIT;
1838 nmp->nm_cwnd >>= 1;
1839 if (nmp->nm_cwnd < NFS_CWNDSCALE)
1840 nmp->nm_cwnd = NFS_CWNDSCALE;
1841 OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries);
1842 } else {
1843 rep->r_flags |= R_SENT;
1844 nmp->nm_sent += NFS_CWNDSCALE;
1845 }
1846 FSDBG(535, xid, rep, nmp->nm_sent, nmp->nm_cwnd);
1847
1848 bzero(&msg, sizeof(msg));
1849 if ((nmp->nm_flag & NFSMNT_NOCONN) == NFSMNT_NOCONN) {
1850 msg.msg_name = mbuf_data(nmp->nm_nam);
1851 msg.msg_namelen = mbuf_len(nmp->nm_nam);
1852 }
1853 error = sock_sendmbuf(so, &msg, m, MSG_DONTWAIT, NULL);
1854
1855 FSDBG(535, xid, error, sent, cwnd);
1856
1857 if (error) {
1858 if (error == EWOULDBLOCK) {
1859 rep->r_flags = flags;
1860 rep->r_rexmit = rexmit;
1861 nmp->nm_cwnd = cwnd;
1862 nmp->nm_sent = sent;
1863 rep->r_xid = xid;
1864 }
1865 else {
1866 if (NFSIGNORE_SOERROR(nmp->nm_sotype, error)) {
1867 int clearerror;
1868 int optlen = sizeof(clearerror);
1869 sock_getsockopt(nmp->nm_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
1870 }
1871 rep->r_flags = flags | R_RESENDERR;
1872 rep->r_rexmit = rexmit;
1873 nmp->nm_cwnd = cwnd;
1874 nmp->nm_sent = sent;
1875 if (flags & R_SENT)
1876 OSAddAtomic(-1, (SInt32*)&nfsstats.rpcretries);
1877 }
1878 } else
1879 rep->r_rtt = 0;
1880 }
1881 }
1882 microuptime(&now);
1883 #ifndef NFS_NOSERVER
1884 /*
1885 * Scan the write gathering queues for writes that need to be
1886 * completed now.
1887 */
1888 cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
1889 lck_mtx_lock(nfsd_mutex);
1890 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
1891 if (slp->ns_wgtime && (slp->ns_wgtime <= cur_usec))
1892 nfsrv_wakenfsd(slp);
1893 }
1894 lck_mtx_unlock(nfsd_mutex);
1895 #endif /* NFS_NOSERVER */
1896
1897 if (nfsbuffreeuptimestamp + 30 <= now.tv_sec) {
1898 /*
1899 * We haven't called nfs_buf_freeup() in a little while.
1900 * So, see if we can free up any stale/unused bufs now.
1901 */
1902 nfs_buf_freeup(1);
1903 }
1904
1905 timeout(nfs_timer_funnel, (void *)0, nfs_ticks);
1906
1907 }
1908
1909
1910 /*
1911 * Test for a termination condition pending on the process.
1912 * This is used to determine if we need to bail on a mount.
1913 * EIO is returned if there has been a soft timeout.
1914 * EINTR is returned if there is a signal pending that is not being ignored
1915 * and the mount is interruptable, or if we are a thread that is in the process
1916 * of cancellation (also SIGKILL posted).
1917 */
1918 int
1919 nfs_sigintr(nmp, rep, p)
1920 struct nfsmount *nmp;
1921 struct nfsreq *rep;
1922 proc_t p;
1923 {
1924 sigset_t pending_sigs;
1925 int context_good = 0;
1926 struct nfsmount *repnmp;
1927 extern proc_t kernproc;
1928
1929 if (nmp == NULL)
1930 return (ENXIO);
1931 if (rep != NULL) {
1932 repnmp = rep->r_nmp;
1933 /* we've had a forced unmount. */
1934 if (repnmp == NULL)
1935 return (ENXIO);
1936 /* request has timed out on a 'soft' mount. */
1937 if (rep->r_flags & R_SOFTTERM)
1938 return (EIO);
1939 /*
1940 * We're in the progress of a force unmount and there's
1941 * been a timeout we're dead and fail IO.
1942 */
1943 if ((repnmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1944 (NFSSTA_FORCE|NFSSTA_TIMEO))
1945 return (EIO);
1946 /* Someone is unmounting us, go soft and mark it. */
1947 if (repnmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) {
1948 repnmp->nm_flag |= NFSMNT_SOFT;
1949 nmp->nm_state |= NFSSTA_FORCE;
1950 }
1951 /*
1952 * If the mount is hung and we've requested not to hang
1953 * on remote filesystems, then bail now.
1954 */
1955 if (p != NULL && (proc_noremotehang(p)) != 0 &&
1956 (repnmp->nm_state & NFSSTA_TIMEO) != 0)
1957 return (EIO);
1958 }
1959 /* XXX: is this valid? this probably should be an assertion. */
1960 if (p == NULL)
1961 return (0);
1962
1963 /* Is this thread belongs to kernel task; then abort check is not needed */
1964 if ((current_proc() != kernproc) && current_thread_aborted()) {
1965 return (EINTR);
1966 }
1967 /* mask off thread and process blocked signals. */
1968
1969 pending_sigs = proc_pendingsignals(p, NFSINT_SIGMASK);
1970 if (pending_sigs && (nmp->nm_flag & NFSMNT_INT) != 0)
1971 return (EINTR);
1972 return (0);
1973 }
1974
1975 /*
1976 * Lock a socket against others.
1977 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1978 * and also to avoid race conditions between the processes with nfs requests
1979 * in progress when a reconnect is necessary.
1980 */
1981 int
1982 nfs_sndlock(rep)
1983 struct nfsreq *rep;
1984 {
1985 int *statep;
1986 proc_t p;
1987 int error, slpflag = 0, slptimeo = 0;
1988
1989 if (rep->r_nmp == NULL)
1990 return (ENXIO);
1991 statep = &rep->r_nmp->nm_state;
1992
1993 p = rep->r_procp;
1994 if (rep->r_nmp->nm_flag & NFSMNT_INT)
1995 slpflag = PCATCH;
1996 while (*statep & NFSSTA_SNDLOCK) {
1997 error = nfs_sigintr(rep->r_nmp, rep, p);
1998 if (error)
1999 return (error);
2000 *statep |= NFSSTA_WANTSND;
2001 if (p != NULL && (proc_noremotehang(p)) != 0)
2002 slptimeo = hz;
2003 tsleep((caddr_t)statep, slpflag | (PZERO - 1), "nfsndlck", slptimeo);
2004 if (slpflag == PCATCH) {
2005 slpflag = 0;
2006 slptimeo = 2 * hz;
2007 }
2008 /*
2009 * Make sure while we slept that the mountpoint didn't go away.
2010 * nfs_sigintr and callers expect it in tact.
2011 */
2012 if (!rep->r_nmp)
2013 return (ENXIO); /* don't have lock until out of loop */
2014 }
2015 *statep |= NFSSTA_SNDLOCK;
2016 return (0);
2017 }
2018
2019 /*
2020 * Unlock the stream socket for others.
2021 */
2022 void
2023 nfs_sndunlock(rep)
2024 struct nfsreq *rep;
2025 {
2026 int *statep;
2027
2028 if (rep->r_nmp == NULL)
2029 return;
2030 statep = &rep->r_nmp->nm_state;
2031 if ((*statep & NFSSTA_SNDLOCK) == 0)
2032 panic("nfs sndunlock");
2033 *statep &= ~NFSSTA_SNDLOCK;
2034 if (*statep & NFSSTA_WANTSND) {
2035 *statep &= ~NFSSTA_WANTSND;
2036 wakeup((caddr_t)statep);
2037 }
2038 }
2039
2040 static int
2041 nfs_rcvlock(struct nfsreq *rep)
2042 {
2043 int *statep;
2044 int error, slpflag, slptimeo = 0;
2045
2046 /* make sure we still have our mountpoint */
2047 if (!rep->r_nmp) {
2048 if (rep->r_mrep != NULL)
2049 return (EALREADY);
2050 return (ENXIO);
2051 }
2052
2053 statep = &rep->r_nmp->nm_state;
2054 FSDBG_TOP(534, rep->r_xid, rep, rep->r_nmp, *statep);
2055 if (rep->r_nmp->nm_flag & NFSMNT_INT)
2056 slpflag = PCATCH;
2057 else
2058 slpflag = 0;
2059 while (*statep & NFSSTA_RCVLOCK) {
2060 if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp))) {
2061 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x100);
2062 return (error);
2063 } else if (rep->r_mrep != NULL) {
2064 /*
2065 * Don't bother sleeping if reply already arrived
2066 */
2067 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x101);
2068 return (EALREADY);
2069 }
2070 FSDBG(534, rep->r_xid, rep, rep->r_nmp, 0x102);
2071 *statep |= NFSSTA_WANTRCV;
2072 /*
2073 * We need to poll if we're P_NOREMOTEHANG so that we
2074 * call nfs_sigintr periodically above.
2075 */
2076 if (rep->r_procp != NULL &&
2077 (proc_noremotehang(rep->r_procp)) != 0)
2078 slptimeo = hz;
2079 tsleep((caddr_t)statep, slpflag | (PZERO - 1), "nfsrcvlk", slptimeo);
2080 if (slpflag == PCATCH) {
2081 slpflag = 0;
2082 slptimeo = 2 * hz;
2083 }
2084 /*
2085 * Make sure while we slept that the mountpoint didn't go away.
2086 * nfs_sigintr and caller nfs_reply expect it intact.
2087 */
2088 if (!rep->r_nmp) {
2089 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x103);
2090 return (ENXIO); /* don't have lock until out of loop */
2091 }
2092 }
2093 /*
2094 * nfs_reply will handle it if reply already arrived.
2095 * (We may have slept or been preempted).
2096 */
2097 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *statep);
2098 *statep |= NFSSTA_RCVLOCK;
2099 return (0);
2100 }
2101
2102 /*
2103 * Unlock the stream socket for others.
2104 */
2105 static void
2106 nfs_rcvunlock(struct nfsreq *rep)
2107 {
2108 int *statep;
2109
2110 if (rep->r_nmp == NULL)
2111 return;
2112 statep = &rep->r_nmp->nm_state;
2113
2114 FSDBG(533, statep, *statep, 0, 0);
2115 if ((*statep & NFSSTA_RCVLOCK) == 0)
2116 panic("nfs rcvunlock");
2117 *statep &= ~NFSSTA_RCVLOCK;
2118 if (*statep & NFSSTA_WANTRCV) {
2119 *statep &= ~NFSSTA_WANTRCV;
2120 wakeup((caddr_t)statep);
2121 }
2122 }
2123
2124
2125 #ifndef NFS_NOSERVER
2126 /*
2127 * Socket upcall routine for the nfsd sockets.
2128 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2129 * Essentially do as much as possible non-blocking, else punt and it will
2130 * be called with MBUF_WAITOK from an nfsd.
2131 */
2132 void
2133 nfsrv_rcv(socket_t so, caddr_t arg, int waitflag)
2134 {
2135 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
2136
2137 if (!nfs_numnfsd || !(slp->ns_flag & SLP_VALID))
2138 return;
2139
2140 lck_rw_lock_exclusive(&slp->ns_rwlock);
2141 nfsrv_rcv_locked(so, slp, waitflag);
2142 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */
2143 }
2144 void
2145 nfsrv_rcv_locked(socket_t so, struct nfssvc_sock *slp, int waitflag)
2146 {
2147 mbuf_t m, mp, mhck, m2;
2148 int ns_flag=0, error;
2149 struct msghdr msg;
2150 size_t bytes_read;
2151
2152 if ((slp->ns_flag & SLP_VALID) == 0) {
2153 if (waitflag == MBUF_DONTWAIT)
2154 lck_rw_done(&slp->ns_rwlock);
2155 return;
2156 }
2157
2158 #ifdef notdef
2159 /*
2160 * Define this to test for nfsds handling this under heavy load.
2161 */
2162 if (waitflag == MBUF_DONTWAIT) {
2163 ns_flag = SLP_NEEDQ;
2164 goto dorecs;
2165 }
2166 #endif
2167 if (slp->ns_sotype == SOCK_STREAM) {
2168 /*
2169 * If there are already records on the queue, defer soreceive()
2170 * to an nfsd so that there is feedback to the TCP layer that
2171 * the nfs servers are heavily loaded.
2172 */
2173 if (slp->ns_rec && waitflag == MBUF_DONTWAIT) {
2174 ns_flag = SLP_NEEDQ;
2175 goto dorecs;
2176 }
2177
2178 /*
2179 * Do soreceive().
2180 */
2181 bytes_read = 1000000000;
2182 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read);
2183 if (error || mp == NULL) {
2184 if (error == EWOULDBLOCK)
2185 ns_flag = SLP_NEEDQ;
2186 else
2187 ns_flag = SLP_DISCONN;
2188 goto dorecs;
2189 }
2190 m = mp;
2191 if (slp->ns_rawend) {
2192 if ((error = mbuf_setnext(slp->ns_rawend, m)))
2193 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error);
2194 slp->ns_cc += bytes_read;
2195 } else {
2196 slp->ns_raw = m;
2197 slp->ns_cc = bytes_read;
2198 }
2199 while ((m2 = mbuf_next(m)))
2200 m = m2;
2201 slp->ns_rawend = m;
2202
2203 /*
2204 * Now try and parse record(s) out of the raw stream data.
2205 */
2206 error = nfsrv_getstream(slp, waitflag);
2207 if (error) {
2208 if (error == EPERM)
2209 ns_flag = SLP_DISCONN;
2210 else
2211 ns_flag = SLP_NEEDQ;
2212 }
2213 } else {
2214 struct sockaddr_storage nam;
2215
2216 bzero(&msg, sizeof(msg));
2217 msg.msg_name = (caddr_t)&nam;
2218 msg.msg_namelen = sizeof(nam);
2219
2220 do {
2221 bytes_read = 1000000000;
2222 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
2223 if (mp) {
2224 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) {
2225 mbuf_setlen(mhck, nam.ss_len);
2226 bcopy(&nam, mbuf_data(mhck), nam.ss_len);
2227 m = mhck;
2228 if (mbuf_setnext(m, mp)) {
2229 /* trouble... just drop it */
2230 printf("nfsrv_rcv: mbuf_setnext failed\n");
2231 mbuf_free(mhck);
2232 m = mp;
2233 }
2234 } else {
2235 m = mp;
2236 }
2237 if (slp->ns_recend)
2238 mbuf_setnextpkt(slp->ns_recend, m);
2239 else
2240 slp->ns_rec = m;
2241 slp->ns_recend = m;
2242 mbuf_setnextpkt(m, NULL);
2243 }
2244 #if 0
2245 if (error) {
2246 /*
2247 * This may be needed in the future to support
2248 * non-byte-stream connection-oriented protocols
2249 * such as SCTP.
2250 */
2251 /*
2252 * This (slp->ns_sotype == SOCK_STREAM) should really
2253 * be a check for PR_CONNREQUIRED.
2254 */
2255 if ((slp->ns_sotype == SOCK_STREAM)
2256 && error != EWOULDBLOCK) {
2257 ns_flag = SLP_DISCONN;
2258 goto dorecs;
2259 }
2260 }
2261 #endif
2262 } while (mp);
2263 }
2264
2265 /*
2266 * Now try and process the request records, non-blocking.
2267 */
2268 dorecs:
2269 if (ns_flag)
2270 slp->ns_flag |= ns_flag;
2271 if (waitflag == MBUF_DONTWAIT) {
2272 int wake = (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)));
2273 lck_rw_done(&slp->ns_rwlock);
2274 if (wake && nfs_numnfsd) {
2275 lck_mtx_lock(nfsd_mutex);
2276 nfsrv_wakenfsd(slp);
2277 lck_mtx_unlock(nfsd_mutex);
2278 }
2279 }
2280 }
2281
2282 /*
2283 * Try and extract an RPC request from the mbuf data list received on a
2284 * stream socket. The "waitflag" argument indicates whether or not it
2285 * can sleep.
2286 */
2287 static int
2288 nfsrv_getstream(slp, waitflag)
2289 struct nfssvc_sock *slp;
2290 int waitflag;
2291 {
2292 mbuf_t m;
2293 char *cp1, *cp2, *mdata;
2294 int len, mlen, error;
2295 mbuf_t om, m2, recm;
2296 u_long recmark;
2297
2298 if (slp->ns_flag & SLP_GETSTREAM)
2299 panic("nfs getstream");
2300 slp->ns_flag |= SLP_GETSTREAM;
2301 for (;;) {
2302 if (slp->ns_reclen == 0) {
2303 if (slp->ns_cc < NFSX_UNSIGNED) {
2304 slp->ns_flag &= ~SLP_GETSTREAM;
2305 return (0);
2306 }
2307 m = slp->ns_raw;
2308 mdata = mbuf_data(m);
2309 mlen = mbuf_len(m);
2310 if (mlen >= NFSX_UNSIGNED) {
2311 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED);
2312 mdata += NFSX_UNSIGNED;
2313 mlen -= NFSX_UNSIGNED;
2314 mbuf_setdata(m, mdata, mlen);
2315 } else {
2316 cp1 = (caddr_t)&recmark;
2317 cp2 = mdata;
2318 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2319 while (mlen == 0) {
2320 m = mbuf_next(m);
2321 cp2 = mbuf_data(m);
2322 mlen = mbuf_len(m);
2323 }
2324 *cp1++ = *cp2++;
2325 mlen--;
2326 mbuf_setdata(m, cp2, mlen);
2327 }
2328 }
2329 slp->ns_cc -= NFSX_UNSIGNED;
2330 recmark = ntohl(recmark);
2331 slp->ns_reclen = recmark & ~0x80000000;
2332 if (recmark & 0x80000000)
2333 slp->ns_flag |= SLP_LASTFRAG;
2334 else
2335 slp->ns_flag &= ~SLP_LASTFRAG;
2336 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
2337 slp->ns_flag &= ~SLP_GETSTREAM;
2338 return (EPERM);
2339 }
2340 }
2341
2342 /*
2343 * Now get the record part.
2344 *
2345 * Note that slp->ns_reclen may be 0. Linux sometimes
2346 * generates 0-length RPCs
2347 */
2348 recm = NULL;
2349 if (slp->ns_cc == slp->ns_reclen) {
2350 recm = slp->ns_raw;
2351 slp->ns_raw = slp->ns_rawend = NULL;
2352 slp->ns_cc = slp->ns_reclen = 0;
2353 } else if (slp->ns_cc > slp->ns_reclen) {
2354 len = 0;
2355 m = slp->ns_raw;
2356 mlen = mbuf_len(m);
2357 mdata = mbuf_data(m);
2358 om = NULL;
2359 while (len < slp->ns_reclen) {
2360 if ((len + mlen) > slp->ns_reclen) {
2361 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) {
2362 slp->ns_flag &= ~SLP_GETSTREAM;
2363 return (EWOULDBLOCK);
2364 }
2365 if (om) {
2366 if (mbuf_setnext(om, m2)) {
2367 /* trouble... just drop it */
2368 printf("nfsrv_getstream: mbuf_setnext failed\n");
2369 mbuf_freem(m2);
2370 slp->ns_flag &= ~SLP_GETSTREAM;
2371 return (EWOULDBLOCK);
2372 }
2373 recm = slp->ns_raw;
2374 } else {
2375 recm = m2;
2376 }
2377 mdata += slp->ns_reclen - len;
2378 mlen -= slp->ns_reclen - len;
2379 mbuf_setdata(m, mdata, mlen);
2380 len = slp->ns_reclen;
2381 } else if ((len + mlen) == slp->ns_reclen) {
2382 om = m;
2383 len += mlen;
2384 m = mbuf_next(m);
2385 recm = slp->ns_raw;
2386 if (mbuf_setnext(om, NULL)) {
2387 printf("nfsrv_getstream: mbuf_setnext failed 2\n");
2388 slp->ns_flag &= ~SLP_GETSTREAM;
2389 return (EWOULDBLOCK);
2390 }
2391 mlen = mbuf_len(m);
2392 mdata = mbuf_data(m);
2393 } else {
2394 om = m;
2395 len += mlen;
2396 m = mbuf_next(m);
2397 mlen = mbuf_len(m);
2398 mdata = mbuf_data(m);
2399 }
2400 }
2401 slp->ns_raw = m;
2402 slp->ns_cc -= len;
2403 slp->ns_reclen = 0;
2404 } else {
2405 slp->ns_flag &= ~SLP_GETSTREAM;
2406 return (0);
2407 }
2408
2409 /*
2410 * Accumulate the fragments into a record.
2411 */
2412 if (slp->ns_frag == NULL) {
2413 slp->ns_frag = recm;
2414 } else {
2415 m = slp->ns_frag;
2416 while ((m2 = mbuf_next(m)))
2417 m = m2;
2418 if ((error = mbuf_setnext(m, recm)))
2419 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error);
2420 }
2421 if (slp->ns_flag & SLP_LASTFRAG) {
2422 if (slp->ns_recend)
2423 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag);
2424 else
2425 slp->ns_rec = slp->ns_frag;
2426 slp->ns_recend = slp->ns_frag;
2427 slp->ns_frag = NULL;
2428 }
2429 }
2430 }
2431
2432 /*
2433 * Parse an RPC header.
2434 */
2435 int
2436 nfsrv_dorec(slp, nfsd, ndp)
2437 struct nfssvc_sock *slp;
2438 struct nfsd *nfsd;
2439 struct nfsrv_descript **ndp;
2440 {
2441 mbuf_t m;
2442 mbuf_t nam;
2443 struct nfsrv_descript *nd;
2444 int error;
2445
2446 *ndp = NULL;
2447 if ((slp->ns_flag & SLP_VALID) == 0 || (slp->ns_rec == NULL))
2448 return (ENOBUFS);
2449 MALLOC_ZONE(nd, struct nfsrv_descript *,
2450 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2451 if (!nd)
2452 return (ENOMEM);
2453 m = slp->ns_rec;
2454 slp->ns_rec = mbuf_nextpkt(m);
2455 if (slp->ns_rec)
2456 mbuf_setnextpkt(m, NULL);
2457 else
2458 slp->ns_recend = NULL;
2459 if (mbuf_type(m) == MBUF_TYPE_SONAME) {
2460 nam = m;
2461 m = mbuf_next(m);
2462 if ((error = mbuf_setnext(nam, NULL)))
2463 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error);
2464 } else
2465 nam = NULL;
2466 nd->nd_md = nd->nd_mrep = m;
2467 nd->nd_nam2 = nam;
2468 nd->nd_dpos = mbuf_data(m);
2469 error = nfs_getreq(nd, nfsd, TRUE);
2470 if (error) {
2471 if (nam)
2472 mbuf_freem(nam);
2473 FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
2474 return (error);
2475 }
2476 *ndp = nd;
2477 nfsd->nfsd_nd = nd;
2478 return (0);
2479 }
2480
2481 /*
2482 * Parse an RPC request
2483 * - verify it
2484 * - fill in the cred struct.
2485 */
2486 int
2487 nfs_getreq(nd, nfsd, has_header)
2488 struct nfsrv_descript *nd;
2489 struct nfsd *nfsd;
2490 int has_header;
2491 {
2492 int len, i;
2493 u_long *tl;
2494 long t1;
2495 uio_t uiop;
2496 caddr_t dpos, cp2, cp;
2497 u_long nfsvers, auth_type;
2498 uid_t nickuid;
2499 int error = 0, ticklen;
2500 mbuf_t mrep, md;
2501 struct nfsuid *nuidp;
2502 uid_t user_id;
2503 gid_t group_id;
2504 int ngroups;
2505 struct ucred temp_cred;
2506 struct timeval tvin, tvout, now;
2507 char uio_buf[ UIO_SIZEOF(1) ];
2508 #if 0 /* until encrypted keys are implemented */
2509 NFSKERBKEYSCHED_T keys; /* stores key schedule */
2510 #endif
2511
2512 nd->nd_cr = NULL;
2513
2514 mrep = nd->nd_mrep;
2515 md = nd->nd_md;
2516 dpos = nd->nd_dpos;
2517 if (has_header) {
2518 nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED);
2519 nd->nd_retxid = fxdr_unsigned(u_long, *tl++);
2520 if (*tl++ != rpc_call) {
2521 mbuf_freem(mrep);
2522 return (EBADRPC);
2523 }
2524 } else
2525 nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED);
2526 nd->nd_repstat = 0;
2527 nd->nd_flag = 0;
2528 if (*tl++ != rpc_vers) {
2529 nd->nd_repstat = ERPCMISMATCH;
2530 nd->nd_procnum = NFSPROC_NOOP;
2531 return (0);
2532 }
2533 if (*tl != nfs_prog) {
2534 nd->nd_repstat = EPROGUNAVAIL;
2535 nd->nd_procnum = NFSPROC_NOOP;
2536 return (0);
2537 }
2538 tl++;
2539 nfsvers = fxdr_unsigned(u_long, *tl++);
2540 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) {
2541 nd->nd_repstat = EPROGMISMATCH;
2542 nd->nd_procnum = NFSPROC_NOOP;
2543 return (0);
2544 }
2545 else if (nfsvers == NFS_VER3)
2546 nd->nd_flag = ND_NFSV3;
2547 nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
2548 if (nd->nd_procnum == NFSPROC_NULL)
2549 return (0);
2550 if ((nd->nd_procnum >= NFS_NPROCS) ||
2551 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
2552 nd->nd_repstat = EPROCUNAVAIL;
2553 nd->nd_procnum = NFSPROC_NOOP;
2554 return (0);
2555 }
2556 if ((nd->nd_flag & ND_NFSV3) == 0)
2557 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
2558 auth_type = *tl++;
2559 len = fxdr_unsigned(int, *tl++);
2560 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2561 mbuf_freem(mrep);
2562 return (EBADRPC);
2563 }
2564
2565 nd->nd_flag &= ~ND_KERBAUTH;
2566 /*
2567 * Handle auth_unix or auth_kerb.
2568 */
2569 if (auth_type == rpc_auth_unix) {
2570 len = fxdr_unsigned(int, *++tl);
2571 if (len < 0 || len > NFS_MAXNAMLEN) {
2572 mbuf_freem(mrep);
2573 return (EBADRPC);
2574 }
2575 bzero(&temp_cred, sizeof(temp_cred));
2576 nfsm_adv(nfsm_rndup(len));
2577 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2578 user_id = fxdr_unsigned(uid_t, *tl++);
2579 group_id = fxdr_unsigned(gid_t, *tl++);
2580 temp_cred.cr_groups[0] = group_id;
2581 len = fxdr_unsigned(int, *tl);
2582 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
2583 mbuf_freem(mrep);
2584 return (EBADRPC);
2585 }
2586 nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED);
2587 for (i = 1; i <= len; i++)
2588 if (i < NGROUPS)
2589 temp_cred.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
2590 else
2591 tl++;
2592 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
2593 if (ngroups > 1)
2594 nfsrvw_sort(&temp_cred.cr_groups[0], ngroups);
2595 len = fxdr_unsigned(int, *++tl);
2596 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2597 mbuf_freem(mrep);
2598 return (EBADRPC);
2599 }
2600 temp_cred.cr_uid = user_id;
2601 temp_cred.cr_ngroups = ngroups;
2602 nd->nd_cr = kauth_cred_create(&temp_cred);
2603 if (nd->nd_cr == NULL) {
2604 nd->nd_repstat = ENOMEM;
2605 nd->nd_procnum = NFSPROC_NOOP;
2606 return (0);
2607 }
2608 if (len > 0)
2609 nfsm_adv(nfsm_rndup(len));
2610 } else if (auth_type == rpc_auth_kerb) {
2611 switch (fxdr_unsigned(int, *tl++)) {
2612 case RPCAKN_FULLNAME:
2613 ticklen = fxdr_unsigned(int, *tl);
2614 *((u_long *)nfsd->nfsd_authstr) = *tl;
2615 uiop = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
2616 &uio_buf[0], sizeof(uio_buf));
2617 if (!uiop) {
2618 nd->nd_repstat = ENOMEM;
2619 nd->nd_procnum = NFSPROC_NOOP;
2620 return (0);
2621 }
2622
2623 // LP64todo - fix this
2624 nfsd->nfsd_authlen = (nfsm_rndup(ticklen) + (NFSX_UNSIGNED * 2));
2625 if ((nfsm_rndup(ticklen) + NFSX_UNSIGNED) > (len - 2 * NFSX_UNSIGNED)) {
2626 mbuf_freem(mrep);
2627 return (EBADRPC);
2628 }
2629 uio_addiov(uiop, CAST_USER_ADDR_T(&nfsd->nfsd_authstr[4]), RPCAUTH_MAXSIZ - 4);
2630 // LP64todo - fix this
2631 nfsm_mtouio(uiop, uio_resid(uiop));
2632 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2633 if (*tl++ != rpc_auth_kerb ||
2634 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
2635 printf("Bad kerb verifier\n");
2636 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2637 nd->nd_procnum = NFSPROC_NOOP;
2638 return (0);
2639 }
2640 nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
2641 tl = (u_long *)cp;
2642 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
2643 printf("Not fullname kerb verifier\n");
2644 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2645 nd->nd_procnum = NFSPROC_NOOP;
2646 return (0);
2647 }
2648 cp += NFSX_UNSIGNED;
2649 bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
2650 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
2651 nd->nd_flag |= ND_KERBFULL;
2652 nfsd->nfsd_flag |= NFSD_NEEDAUTH;
2653 break;
2654 case RPCAKN_NICKNAME:
2655 if (len != 2 * NFSX_UNSIGNED) {
2656 printf("Kerb nickname short\n");
2657 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
2658 nd->nd_procnum = NFSPROC_NOOP;
2659 return (0);
2660 }
2661 nickuid = fxdr_unsigned(uid_t, *tl);
2662 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2663 if (*tl++ != rpc_auth_kerb ||
2664 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
2665 printf("Kerb nick verifier bad\n");
2666 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2667 nd->nd_procnum = NFSPROC_NOOP;
2668 return (0);
2669 }
2670 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2671 tvin.tv_sec = *tl++;
2672 tvin.tv_usec = *tl;
2673
2674 for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
2675 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
2676 if (kauth_cred_getuid(nuidp->nu_cr) == nickuid &&
2677 (!nd->nd_nam2 ||
2678 netaddr_match(NU_NETFAM(nuidp),
2679 &nuidp->nu_haddr, nd->nd_nam2)))
2680 break;
2681 }
2682 if (!nuidp) {
2683 nd->nd_repstat =
2684 (NFSERR_AUTHERR|AUTH_REJECTCRED);
2685 nd->nd_procnum = NFSPROC_NOOP;
2686 return (0);
2687 }
2688
2689 /*
2690 * Now, decrypt the timestamp using the session key
2691 * and validate it.
2692 */
2693 #if NFSKERB
2694 XXX
2695 #endif
2696
2697 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
2698 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
2699 microtime(&now);
2700 if (nuidp->nu_expire < now.tv_sec ||
2701 nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
2702 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
2703 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
2704 nuidp->nu_expire = 0;
2705 nd->nd_repstat =
2706 (NFSERR_AUTHERR|AUTH_REJECTVERF);
2707 nd->nd_procnum = NFSPROC_NOOP;
2708 return (0);
2709 }
2710 bzero(&temp_cred, sizeof(temp_cred));
2711 ngroups = nuidp->nu_cr->cr_ngroups;
2712 for (i = 0; i < ngroups; i++)
2713 temp_cred.cr_groups[i] = nuidp->nu_cr->cr_groups[i];
2714 if (ngroups > 1)
2715 nfsrvw_sort(&temp_cred.cr_groups[0], ngroups);
2716
2717 temp_cred.cr_uid = kauth_cred_getuid(nuidp->nu_cr);
2718 temp_cred.cr_ngroups = ngroups;
2719 nd->nd_cr = kauth_cred_create(&temp_cred);
2720 if (!nd->nd_cr) {
2721 nd->nd_repstat = ENOMEM;
2722 nd->nd_procnum = NFSPROC_NOOP;
2723 return (0);
2724 }
2725 nd->nd_flag |= ND_KERBNICK;
2726 };
2727 } else {
2728 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
2729 nd->nd_procnum = NFSPROC_NOOP;
2730 return (0);
2731 }
2732
2733 nd->nd_md = md;
2734 nd->nd_dpos = dpos;
2735 return (0);
2736 nfsmout:
2737 if (nd->nd_cr)
2738 kauth_cred_rele(nd->nd_cr);
2739 return (error);
2740 }
2741
2742 /*
2743 * Search for a sleeping nfsd and wake it up.
2744 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
2745 * running nfsds will go look for the work in the nfssvc_sock list.
2746 * Note: Must be called with nfsd_mutex held.
2747 */
2748 void
2749 nfsrv_wakenfsd(struct nfssvc_sock *slp)
2750 {
2751 struct nfsd *nd;
2752
2753 if ((slp->ns_flag & SLP_VALID) == 0)
2754 return;
2755
2756 lck_rw_lock_exclusive(&slp->ns_rwlock);
2757
2758 if (nfsd_waiting) {
2759 TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
2760 if (nd->nfsd_flag & NFSD_WAITING) {
2761 nd->nfsd_flag &= ~NFSD_WAITING;
2762 if (nd->nfsd_slp)
2763 panic("nfsd wakeup");
2764 slp->ns_sref++;
2765 nd->nfsd_slp = slp;
2766 lck_rw_done(&slp->ns_rwlock);
2767 wakeup((caddr_t)nd);
2768 return;
2769 }
2770 }
2771 }
2772
2773 slp->ns_flag |= SLP_DOREC;
2774
2775 lck_rw_done(&slp->ns_rwlock);
2776
2777 nfsd_head_flag |= NFSD_CHECKSLP;
2778 }
2779 #endif /* NFS_NOSERVER */
2780
2781 static int
2782 nfs_msg(proc_t p,
2783 const char *server,
2784 const char *msg,
2785 int error)
2786 {
2787 tpr_t tpr;
2788
2789 if (p)
2790 tpr = tprintf_open(p);
2791 else
2792 tpr = NULL;
2793 if (error)
2794 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg,
2795 error);
2796 else
2797 tprintf(tpr, "nfs server %s: %s\n", server, msg);
2798 tprintf_close(tpr);
2799 return (0);
2800 }
2801
2802 void
2803 nfs_down(nmp, proc, error, flags, msg)
2804 struct nfsmount *nmp;
2805 proc_t proc;
2806 int error, flags;
2807 const char *msg;
2808 {
2809 if (nmp == NULL)
2810 return;
2811 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
2812 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 0);
2813 nmp->nm_state |= NFSSTA_TIMEO;
2814 }
2815 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
2816 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESPLOCK, 0);
2817 nmp->nm_state |= NFSSTA_LOCKTIMEO;
2818 }
2819 nfs_msg(proc, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error);
2820 }
2821
2822 void
2823 nfs_up(nmp, proc, flags, msg)
2824 struct nfsmount *nmp;
2825 proc_t proc;
2826 int flags;
2827 const char *msg;
2828 {
2829 if (nmp == NULL)
2830 return;
2831 if (msg)
2832 nfs_msg(proc, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0);
2833 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
2834 nmp->nm_state &= ~NFSSTA_TIMEO;
2835 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1);
2836 }
2837 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
2838 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
2839 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESPLOCK, 1);
2840 }
2841 }
2842