]> git.saurik.com Git - apple/xnu.git/blame - bsd/nfs/nfs_socket.c
xnu-517.12.7.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
CommitLineData
1c79356b 1/*
e5568f75 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23/*
24 * Copyright (c) 1989, 1991, 1993, 1995
25 * The Regents of the University of California. All rights reserved.
26 *
27 * This code is derived from software contributed to Berkeley by
28 * Rick Macklem at The University of Guelph.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
59 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
60 */
61
62/*
63 * Socket operations for use by nfs
64 */
65
66#include <sys/param.h>
67#include <sys/systm.h>
68#include <sys/proc.h>
69#include <sys/mount.h>
70#include <sys/kernel.h>
71#include <sys/mbuf.h>
72#include <sys/malloc.h>
73#include <sys/vnode.h>
74#include <sys/domain.h>
75#include <sys/protosw.h>
76#include <sys/socket.h>
77#include <sys/socketvar.h>
78#include <sys/syslog.h>
79#include <sys/tprintf.h>
80#include <machine/spl.h>
81
82#include <sys/time.h>
83#include <kern/clock.h>
4a249263
A
84#include <kern/task.h>
85#include <kern/thread.h>
9bccf70c 86#include <sys/user.h>
1c79356b
A
87
88#include <netinet/in.h>
89#include <netinet/tcp.h>
90
91#include <nfs/rpcv2.h>
92#include <nfs/nfsproto.h>
93#include <nfs/nfs.h>
94#include <nfs/xdr_subs.h>
95#include <nfs/nfsm_subs.h>
96#include <nfs/nfsmount.h>
97#include <nfs/nfsnode.h>
98#include <nfs/nfsrtt.h>
99#include <nfs/nqnfs.h>
100
fa4905b1
A
101#include <sys/kdebug.h>
102
103#define FSDBG(A, B, C, D, E) \
104 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
105 (int)(B), (int)(C), (int)(D), (int)(E), 0)
106#define FSDBG_TOP(A, B, C, D, E) \
107 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
108 (int)(B), (int)(C), (int)(D), (int)(E), 0)
109#define FSDBG_BOT(A, B, C, D, E) \
110 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
111 (int)(B), (int)(C), (int)(D), (int)(E), 0)
112
1c79356b
A
113#define TRUE 1
114#define FALSE 0
115
116/*
117 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
118 * Use the mean and mean deviation of rtt for the appropriate type of rpc
119 * for the frequent rpcs and a default for the others.
120 * The justification for doing "other" this way is that these rpcs
121 * happen so infrequently that timer est. would probably be stale.
122 * Also, since many of these rpcs are
123 * non-idempotent, a conservative timeout is desired.
124 * getattr, lookup - A+2D
125 * read, write - A+4D
126 * other - nm_timeo
127 */
128#define NFS_RTO(n, t) \
129 ((t) == 0 ? (n)->nm_timeo : \
130 ((t) < 3 ? \
131 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
132 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
133#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
134#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
135/*
136 * External data, mostly RPC constants in XDR form
137 */
138extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
139 rpc_msgaccepted, rpc_call, rpc_autherr,
140 rpc_auth_kerb;
141extern u_long nfs_prog, nqnfs_prog;
142extern time_t nqnfsstarttime;
143extern struct nfsstats nfsstats;
144extern int nfsv3_procid[NFS_NPROCS];
145extern int nfs_ticks;
fa4905b1 146extern u_long nfs_xidwrap;
1c79356b
A
147
148/*
149 * Defines which timer to use for the procnum.
150 * 0 - default
151 * 1 - getattr
152 * 2 - lookup
153 * 3 - read
154 * 4 - write
155 */
156static int proct[NFS_NPROCS] = {
157 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
158 0, 0, 0,
159};
160
161/*
162 * There is a congestion window for outstanding rpcs maintained per mount
163 * point. The cwnd size is adjusted in roughly the way that:
164 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
165 * SIGCOMM '88". ACM, August 1988.
166 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
167 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
168 * of rpcs is in progress.
169 * (The sent count and cwnd are scaled for integer arith.)
170 * Variants of "slow start" were tried and were found to be too much of a
171 * performance hit (ave. rtt 3 times larger),
172 * I suspect due to the large rtt that nfs rpcs have.
173 */
174#define NFS_CWNDSCALE 256
175#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
176static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
177int nfsrtton = 0;
178struct nfsrtt nfsrtt;
179
55e303ae 180static int nfs_msg __P((struct proc *, const char *, const char *, int));
1c79356b 181static int nfs_rcvlock __P((struct nfsreq *));
55e303ae 182static void nfs_rcvunlock __P((struct nfsreq *));
1c79356b
A
183static int nfs_receive __P((struct nfsreq *rep, struct mbuf **aname,
184 struct mbuf **mp));
185static int nfs_reconnect __P((struct nfsreq *rep));
55e303ae
A
186static void nfs_repbusy(struct nfsreq *rep);
187static struct nfsreq * nfs_repnext(struct nfsreq *rep);
188static void nfs_repdequeue(struct nfsreq *rep);
4a249263
A
189
190/* XXX */
191boolean_t current_thread_aborted(void);
192kern_return_t thread_terminate(thread_act_t);
193
1c79356b
A
194#ifndef NFS_NOSERVER
195static int nfsrv_getstream __P((struct nfssvc_sock *,int));
196
197int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
198 struct nfssvc_sock *slp,
199 struct proc *procp,
200 struct mbuf **mreqp)) = {
201 nfsrv_null,
202 nfsrv_getattr,
203 nfsrv_setattr,
204 nfsrv_lookup,
205 nfsrv3_access,
206 nfsrv_readlink,
207 nfsrv_read,
208 nfsrv_write,
209 nfsrv_create,
210 nfsrv_mkdir,
211 nfsrv_symlink,
212 nfsrv_mknod,
213 nfsrv_remove,
214 nfsrv_rmdir,
215 nfsrv_rename,
216 nfsrv_link,
217 nfsrv_readdir,
218 nfsrv_readdirplus,
219 nfsrv_statfs,
220 nfsrv_fsinfo,
221 nfsrv_pathconf,
222 nfsrv_commit,
223 nqnfsrv_getlease,
224 nqnfsrv_vacated,
225 nfsrv_noop,
226 nfsrv_noop
227};
228#endif /* NFS_NOSERVER */
229
fa4905b1
A
230/*
231 * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG)
232 * But some of this code may prove useful someday...
233 */
234#undef NFSDIAG
1c79356b
A
235#if NFSDIAG
236int nfstraceindx = 0;
237struct nfstracerec nfstracebuf[NFSTBUFSIZ] = {{0,0,0,0}};
238
239#define NFSTRACESUSPENDERS
240#ifdef NFSTRACESUSPENDERS
241uint nfstracemask = 0xfff00200;
242int nfstracexid = -1;
243uint onfstracemask = 0;
244int nfstracesuspend = -1;
245#define NFSTRACE_SUSPEND \
246 { \
247 if (nfstracemask) { \
248 onfstracemask = nfstracemask; \
249 nfstracemask = 0; \
250 } \
251 }
252#define NFSTRACE_RESUME \
253 { \
254 nfstracesuspend = -1; \
255 if (!nfstracemask) \
256 nfstracemask = onfstracemask; \
257 }
258#define NFSTRACE_STARTSUSPENDCOUNTDOWN \
259 { \
260 nfstracesuspend = (nfstraceindx+100) % NFSTBUFSIZ; \
261 }
262#define NFSTRACE_SUSPENDING (nfstracesuspend != -1)
263#define NFSTRACE_SUSPENSEOVER \
264 (nfstracesuspend > 100 ? \
265 (nfstraceindx >= nfstracesuspend || \
266 nfstraceindx < nfstracesuspend - 100) : \
267 (nfstraceindx >= nfstracesuspend && \
268 nfstraceindx < nfstracesuspend + 8192 - 100))
269#else
270uint nfstracemask = 0;
271#endif /* NFSTRACESUSPENDERS */
272
273int nfsprnttimo = 1;
274
275int nfsodata[1024];
276int nfsoprocnum, nfsolen;
277int nfsbt[32], nfsbtlen;
278
279#if defined(__ppc__)
280int
281backtrace(int *where, int size)
282{
283 int register sp, *fp, numsaved;
284
285 __asm__ volatile("mr %0,r1" : "=r" (sp));
286
287 fp = (int *)*((int *)sp);
288 size /= sizeof(int);
289 for (numsaved = 0; numsaved < size; numsaved++) {
290 *where++ = fp[2];
291 if ((int)fp <= 0)
292 break;
293 fp = (int *)*fp;
294 }
295 return (numsaved);
296}
297#elif defined(__i386__)
298int
299backtrace()
300{
301 return (0); /* Till someone implements a real routine */
302}
303#else
304#error architecture not implemented.
305#endif
306
307void
308nfsdup(struct nfsreq *rep)
309{
310 int *ip, i, first = 1, end;
311 char *s, b[240];
312 struct mbuf *mb;
313
314 if ((nfs_debug & NFS_DEBUG_DUP) == 0)
315 return;
316 /* last mbuf in chain will be nfs content */
317 for (mb = rep->r_mreq; mb->m_next; mb = mb->m_next)
318 ;
319 if (rep->r_procnum == nfsoprocnum && mb->m_len == nfsolen &&
320 !bcmp((caddr_t)nfsodata, mb->m_data, nfsolen)) {
321 s = b + sprintf(b, "nfsdup x=%x p=%d h=", rep->r_xid,
322 rep->r_procnum);
323 end = (int)(VTONFS(rep->r_vp)->n_fhp);
324 ip = (int *)(end & ~3);
325 end += VTONFS(rep->r_vp)->n_fhsize;
326 while ((int)ip < end) {
327 i = *ip++;
328 if (first) { /* avoid leading zeroes */
329 if (i == 0)
330 continue;
331 first = 0;
332 s += sprintf(s, "%x", i);
333 } else
334 s += sprintf(s, "%08x", i);
335 }
336 if (first)
337 sprintf(s, "%x", 0);
338 else /* eliminate trailing zeroes */
339 while (*--s == '0')
340 *s = 0;
341 /*
342 * set a breakpoint here and you can view the
343 * current backtrace and the one saved in nfsbt
344 */
345 kprintf("%s\n", b);
346 }
347 nfsoprocnum = rep->r_procnum;
348 nfsolen = mb->m_len;
349 bcopy(mb->m_data, (caddr_t)nfsodata, mb->m_len);
350 nfsbtlen = backtrace(&nfsbt, sizeof(nfsbt));
351}
352#endif /* NFSDIAG */
353
4a249263
A
354
355/*
356 * attempt to bind a socket to a reserved port
357 */
358static int
359nfs_bind_resv(struct nfsmount *nmp)
360{
361 struct socket *so = nmp->nm_so;
362 struct sockaddr_in sin;
363 int error;
364 u_short tport;
365
366 if (!so)
367 return (EINVAL);
368
369 sin.sin_len = sizeof (struct sockaddr_in);
370 sin.sin_family = AF_INET;
371 sin.sin_addr.s_addr = INADDR_ANY;
372 tport = IPPORT_RESERVED - 1;
373 sin.sin_port = htons(tport);
374
375 while (((error = sobind(so, (struct sockaddr *) &sin)) == EADDRINUSE) &&
376 (--tport > IPPORT_RESERVED / 2))
377 sin.sin_port = htons(tport);
378 return (error);
379}
380
381/*
382 * variables for managing the nfs_bind_resv_thread
383 */
384int nfs_resv_mounts = 0;
385static int nfs_bind_resv_thread_state = 0;
386#define NFS_BIND_RESV_THREAD_STATE_INITTED 1
387#define NFS_BIND_RESV_THREAD_STATE_RUNNING 2
388static struct slock nfs_bind_resv_slock;
389struct nfs_bind_resv_request {
390 TAILQ_ENTRY(nfs_bind_resv_request) brr_chain;
391 struct nfsmount *brr_nmp;
392 int brr_error;
393};
394static TAILQ_HEAD(, nfs_bind_resv_request) nfs_bind_resv_request_queue;
395
396/*
397 * thread to handle any reserved port bind requests
398 */
399static void
400nfs_bind_resv_thread(void)
401{
402 struct nfs_bind_resv_request *brreq;
403 boolean_t funnel_state;
404
405 funnel_state = thread_funnel_set(network_flock, TRUE);
406 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
407
408 while (nfs_resv_mounts > 0) {
409 simple_lock(&nfs_bind_resv_slock);
410 while ((brreq = TAILQ_FIRST(&nfs_bind_resv_request_queue))) {
411 TAILQ_REMOVE(&nfs_bind_resv_request_queue, brreq, brr_chain);
412 simple_unlock(&nfs_bind_resv_slock);
413 brreq->brr_error = nfs_bind_resv(brreq->brr_nmp);
414 wakeup(brreq);
415 simple_lock(&nfs_bind_resv_slock);
416 }
417 simple_unlock(&nfs_bind_resv_slock);
418 (void)tsleep((caddr_t)&nfs_bind_resv_request_queue, PSOCK,
419 "nfs_bind_resv_request_queue", 0);
420 }
421
422 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
423 (void) thread_funnel_set(network_flock, funnel_state);
424 (void) thread_terminate(current_act());
425}
426
427int
428nfs_bind_resv_thread_wake(void)
429{
430 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING)
431 return (EIO);
432 wakeup(&nfs_bind_resv_request_queue);
433 return (0);
434}
435
436/*
437 * underprivileged procs call this to request nfs_bind_resv_thread
438 * to perform the reserved port binding for them.
439 */
440static int
441nfs_bind_resv_nopriv(struct nfsmount *nmp)
442{
443 struct nfs_bind_resv_request brreq;
444 int error;
445
446 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING) {
447 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_INITTED) {
448 simple_lock_init(&nfs_bind_resv_slock);
449 TAILQ_INIT(&nfs_bind_resv_request_queue);
450 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
451 }
452 kernel_thread(kernel_task, nfs_bind_resv_thread);
453 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
454 }
455
456 brreq.brr_nmp = nmp;
457 brreq.brr_error = 0;
458
459 simple_lock(&nfs_bind_resv_slock);
460 TAILQ_INSERT_TAIL(&nfs_bind_resv_request_queue, &brreq, brr_chain);
461 simple_unlock(&nfs_bind_resv_slock);
462
463 error = nfs_bind_resv_thread_wake();
464 if (error) {
465 TAILQ_REMOVE(&nfs_bind_resv_request_queue, &brreq, brr_chain);
466 /* Note: we might be able to simply restart the thread */
467 return (error);
468 }
469
470 (void) tsleep((caddr_t)&brreq, PSOCK, "nfsbindresv", 0);
471
472 return (brreq.brr_error);
473}
474
1c79356b
A
475/*
476 * Initialize sockets and congestion for a new NFS connection.
477 * We do not free the sockaddr if error.
478 */
479int
480nfs_connect(nmp, rep)
4a249263 481 struct nfsmount *nmp;
1c79356b
A
482 struct nfsreq *rep;
483{
4a249263 484 struct socket *so;
1c79356b
A
485 int s, error, rcvreserve, sndreserve;
486 struct sockaddr *saddr;
1c79356b
A
487
488 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
489 nmp->nm_so = (struct socket *)0;
490 saddr = mtod(nmp->nm_nam, struct sockaddr *);
491 error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
492 nmp->nm_soproto);
493 if (error) {
494 goto bad;
495 }
496 so = nmp->nm_so;
497 nmp->nm_soflags = so->so_proto->pr_flags;
498
499 /*
500 * Some servers require that the client port be a reserved port number.
501 */
502 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
4a249263
A
503 struct proc *p;
504 /*
505 * sobind() requires current_proc() to have superuser privs.
506 * If this bind is part of a reconnect, and the current proc
507 * doesn't have superuser privs, we hand the sobind() off to
508 * a kernel thread to process.
509 */
510 if ((nmp->nm_state & NFSSTA_MOUNTED) &&
511 (p = current_proc()) && suser(p->p_ucred, &p->p_acflag)) {
512 /* request nfs_bind_resv_thread() to do bind */
513 error = nfs_bind_resv_nopriv(nmp);
514 } else {
515 error = nfs_bind_resv(nmp);
1c79356b 516 }
4a249263
A
517 if (error)
518 goto bad;
1c79356b
A
519 }
520
521 /*
522 * Protocols that do not require connections may be optionally left
523 * unconnected for servers that reply from a port other than NFS_PORT.
524 */
525 if (nmp->nm_flag & NFSMNT_NOCONN) {
526 if (nmp->nm_soflags & PR_CONNREQUIRED) {
527 error = ENOTCONN;
528 goto bad;
529 }
530 } else {
531 error = soconnect(so, mtod(nmp->nm_nam, struct sockaddr *));
532 if (error) {
533 goto bad;
534 }
535
536 /*
537 * Wait for the connection to complete. Cribbed from the
538 * connect system call but with the wait timing out so
539 * that interruptible mounts don't hang here for a long time.
540 */
541 s = splnet();
542 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
543 (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
544 "nfscon", 2 * hz);
545 if ((so->so_state & SS_ISCONNECTING) &&
546 so->so_error == 0 && rep &&
547 (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
548 so->so_state &= ~SS_ISCONNECTING;
549 splx(s);
550 goto bad;
551 }
552 }
553 if (so->so_error) {
554 error = so->so_error;
555 so->so_error = 0;
556 splx(s);
557 goto bad;
558 }
559 splx(s);
560 }
55e303ae
A
561 /*
562 * Always time out on recieve, this allows us to reconnect the
563 * socket to deal with network changes.
564 */
565 so->so_rcv.sb_timeo = (2 * hz);
1c79356b 566 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
1c79356b
A
567 so->so_snd.sb_timeo = (5 * hz);
568 } else {
1c79356b
A
569 so->so_snd.sb_timeo = 0;
570 }
571 if (nmp->nm_sotype == SOCK_DGRAM) {
55e303ae
A
572 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
573 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
574 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
1c79356b 575 } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
55e303ae
A
576 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
577 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
578 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
1c79356b
A
579 } else {
580 if (nmp->nm_sotype != SOCK_STREAM)
581 panic("nfscon sotype");
582
583 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
584 struct sockopt sopt;
585 int val;
586
587 bzero(&sopt, sizeof sopt);
55e303ae 588 sopt.sopt_dir = SOPT_SET;
1c79356b
A
589 sopt.sopt_level = SOL_SOCKET;
590 sopt.sopt_name = SO_KEEPALIVE;
591 sopt.sopt_val = &val;
592 sopt.sopt_valsize = sizeof val;
593 val = 1;
594 sosetopt(so, &sopt);
595 }
596 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
597 struct sockopt sopt;
598 int val;
599
600 bzero(&sopt, sizeof sopt);
55e303ae 601 sopt.sopt_dir = SOPT_SET;
1c79356b
A
602 sopt.sopt_level = IPPROTO_TCP;
603 sopt.sopt_name = TCP_NODELAY;
604 sopt.sopt_val = &val;
605 sopt.sopt_valsize = sizeof val;
606 val = 1;
607 sosetopt(so, &sopt);
608 }
609
55e303ae
A
610 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 3;
611 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) *
612 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
1c79356b
A
613 }
614
55e303ae
A
615 if (sndreserve > NFS_MAXSOCKBUF)
616 sndreserve = NFS_MAXSOCKBUF;
617 if (rcvreserve > NFS_MAXSOCKBUF)
618 rcvreserve = NFS_MAXSOCKBUF;
1c79356b
A
619 error = soreserve(so, sndreserve, rcvreserve);
620 if (error) {
621 goto bad;
622 }
623 so->so_rcv.sb_flags |= SB_NOINTR;
624 so->so_snd.sb_flags |= SB_NOINTR;
625
626 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
627
628 /* Initialize other non-zero congestion variables */
629 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
630 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
631 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
632 nmp->nm_sdrtt[3] = 0;
633 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
634 nmp->nm_sent = 0;
55e303ae 635 FSDBG(529, nmp, nmp->nm_state, nmp->nm_soflags, nmp->nm_cwnd);
1c79356b
A
636 nmp->nm_timeouts = 0;
637 return (0);
638
639bad:
640 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
641 nfs_disconnect(nmp);
642 return (error);
643}
644
645/*
646 * Reconnect routine:
647 * Called when a connection is broken on a reliable protocol.
648 * - clean up the old socket
649 * - nfs_connect() again
650 * - set R_MUSTRESEND for all outstanding requests on mount point
651 * If this fails the mount point is DEAD!
652 * nb: Must be called with the nfs_sndlock() set on the mount point.
653 */
654static int
655nfs_reconnect(rep)
656 register struct nfsreq *rep;
657{
658 register struct nfsreq *rp;
659 register struct nfsmount *nmp = rep->r_nmp;
660 int error;
661
662 nfs_disconnect(nmp);
663 while ((error = nfs_connect(nmp, rep))) {
664 if (error == EINTR || error == ERESTART)
665 return (EINTR);
55e303ae
A
666 if (error == EIO)
667 return (EIO);
e5568f75
A
668 nfs_down(rep, rep->r_nmp, rep->r_procp, "can not connect",
669 error, NFSSTA_TIMEO);
4a249263
A
670 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
671 /* we're not yet completely mounted and */
672 /* we can't reconnect, so we fail */
673 return (error);
674 }
675 if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp)))
676 return (error);
1c79356b
A
677 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
678 }
679
680 NFS_DPF(DUP, ("nfs_reconnect RESEND\n"));
681 /*
682 * Loop through outstanding request list and fix up all requests
683 * on old socket.
684 */
55e303ae 685 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
1c79356b
A
686 if (rp->r_nmp == nmp)
687 rp->r_flags |= R_MUSTRESEND;
688 }
689 return (0);
690}
691
692/*
693 * NFS disconnect. Clean up and unlink.
694 */
695void
696nfs_disconnect(nmp)
697 register struct nfsmount *nmp;
698{
699 register struct socket *so;
700
701 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
702 if (nmp->nm_so) {
703 so = nmp->nm_so;
704 nmp->nm_so = (struct socket *)0;
705 soshutdown(so, 2);
706 soclose(so);
707 }
708 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
709}
710
711/*
712 * This is the nfs send routine. For connection based socket types, it
713 * must be called with an nfs_sndlock() on the socket.
714 * "rep == NULL" indicates that it has been called from a server.
715 * For the client side:
716 * - return EINTR if the RPC is terminated, 0 otherwise
717 * - set R_MUSTRESEND if the send fails for any reason
718 * - do any cleanup required by recoverable socket errors (???)
719 * For the server side:
720 * - return EINTR or ERESTART if interrupted by a signal
721 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
722 * - do any cleanup required by recoverable socket errors (???)
723 */
724int
725nfs_send(so, nam, top, rep)
726 register struct socket *so;
727 struct mbuf *nam;
728 register struct mbuf *top;
729 struct nfsreq *rep;
730{
731 struct sockaddr *sendnam;
55e303ae 732 int error, error2, soflags, flags;
1c79356b
A
733 int xidqueued = 0;
734 struct nfsreq *rp;
735 char savenametolog[MNAMELEN];
736
737 if (rep) {
55e303ae
A
738 error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
739 if (error) {
1c79356b 740 m_freem(top);
55e303ae 741 return (error);
1c79356b
A
742 }
743 if ((so = rep->r_nmp->nm_so) == NULL) {
744 rep->r_flags |= R_MUSTRESEND;
745 m_freem(top);
746 return (0);
747 }
748 rep->r_flags &= ~R_MUSTRESEND;
749 soflags = rep->r_nmp->nm_soflags;
55e303ae 750 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
1c79356b
A
751 if (rp == rep)
752 break;
753 if (rp)
754 xidqueued = rp->r_xid;
755 } else
756 soflags = so->so_proto->pr_flags;
757 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED) ||
758 (nam == 0))
759 sendnam = (struct sockaddr *)0;
760 else
761 sendnam = mtod(nam, struct sockaddr *);
762
763 if (so->so_type == SOCK_SEQPACKET)
764 flags = MSG_EOR;
765 else
766 flags = 0;
767
768#if NFSDIAG
769 if (rep)
770 nfsdup(rep);
771#endif
772 /*
773 * Save the name here in case mount point goes away when we switch
774 * funnels. The name is using local stack and is large, but don't
775 * want to block if we malloc.
776 */
777 if (rep)
778 strncpy(savenametolog,
779 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname,
780 MNAMELEN);
781 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
782 error = sosend(so, sendnam, (struct uio *)0, top,
783 (struct mbuf *)0, flags);
784 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
785
786 if (error) {
787 if (rep) {
788 if (xidqueued) {
55e303ae 789 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
1c79356b
A
790 if (rp == rep && rp->r_xid == xidqueued)
791 break;
792 if (!rp)
793 panic("nfs_send: error %d xid %x gone",
794 error, xidqueued);
795 }
796 log(LOG_INFO, "nfs send error %d for server %s\n",
797 error, savenametolog);
798 /*
799 * Deal with errors for the client side.
800 */
55e303ae
A
801 error2 = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
802 if (error2) {
803 error = error2;
804 } else {
1c79356b
A
805 rep->r_flags |= R_MUSTRESEND;
806 NFS_DPF(DUP,
807 ("nfs_send RESEND error=%d\n", error));
808 }
809 } else
810 log(LOG_INFO, "nfsd send error %d\n", error);
811
812 /*
813 * Handle any recoverable (soft) socket errors here. (???)
814 */
55e303ae
A
815 if (error != EINTR && error != ERESTART && error != EIO &&
816 error != EWOULDBLOCK && error != EPIPE) {
1c79356b 817 error = 0;
55e303ae 818 }
1c79356b
A
819 }
820 return (error);
821}
822
823/*
824 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
825 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
826 * Mark and consolidate the data into a new mbuf list.
827 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
828 * small mbufs.
829 * For SOCK_STREAM we must be very careful to read an entire record once
830 * we have read any of it, even if the system call has been interrupted.
831 */
832static int
833nfs_receive(rep, aname, mp)
834 register struct nfsreq *rep;
835 struct mbuf **aname;
836 struct mbuf **mp;
837{
838 register struct socket *so;
839 struct uio auio;
840 struct iovec aio;
841 register struct mbuf *m;
842 struct mbuf *control;
843 u_long len;
844 struct sockaddr **getnam;
845 struct sockaddr *tmp_nam;
846 struct mbuf *mhck;
847 struct sockaddr_in *sin;
55e303ae 848 int error, error2, sotype, rcvflg;
1c79356b
A
849 struct proc *p = current_proc(); /* XXX */
850
851 /*
852 * Set up arguments for soreceive()
853 */
854 *mp = (struct mbuf *)0;
855 *aname = (struct mbuf *)0;
856 sotype = rep->r_nmp->nm_sotype;
857
858 /*
859 * For reliable protocols, lock against other senders/receivers
860 * in case a reconnect is necessary.
861 * For SOCK_STREAM, first get the Record Mark to find out how much
862 * more there is to get.
863 * We must lock the socket against other receivers
864 * until we have an entire rpc request/reply.
865 */
866 if (sotype != SOCK_DGRAM) {
55e303ae 867 error = nfs_sndlock(rep);
1c79356b
A
868 if (error)
869 return (error);
870tryagain:
871 /*
872 * Check for fatal errors and resending request.
873 */
874 /*
875 * Ugh: If a reconnect attempt just happened, nm_so
876 * would have changed. NULL indicates a failed
877 * attempt that has essentially shut down this
878 * mount point.
879 */
55e303ae
A
880 if ((error = nfs_sigintr(rep->r_nmp, rep, p)) || rep->r_mrep) {
881 nfs_sndunlock(rep);
882 if (error)
883 return (error);
1c79356b
A
884 return (EINTR);
885 }
886 so = rep->r_nmp->nm_so;
887 if (!so) {
888 error = nfs_reconnect(rep);
889 if (error) {
55e303ae 890 nfs_sndunlock(rep);
1c79356b
A
891 return (error);
892 }
893 goto tryagain;
894 }
895 while (rep->r_flags & R_MUSTRESEND) {
896 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
897 nfsstats.rpcretries++;
898 NFS_DPF(DUP,
899 ("nfs_receive RESEND %s\n",
900 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname));
901 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
902 /*
903 * we also hold rcv lock so rep is still
904 * legit this point
905 */
906 if (error) {
907 if (error == EINTR || error == ERESTART ||
908 (error = nfs_reconnect(rep))) {
55e303ae 909 nfs_sndunlock(rep);
1c79356b
A
910 return (error);
911 }
912 goto tryagain;
913 }
914 }
55e303ae 915 nfs_sndunlock(rep);
1c79356b
A
916 if (sotype == SOCK_STREAM) {
917 aio.iov_base = (caddr_t) &len;
918 aio.iov_len = sizeof(u_long);
919 auio.uio_iov = &aio;
920 auio.uio_iovcnt = 1;
921 auio.uio_segflg = UIO_SYSSPACE;
922 auio.uio_rw = UIO_READ;
923 auio.uio_offset = 0;
924 auio.uio_resid = sizeof(u_long);
925 auio.uio_procp = p;
926 do {
927 rcvflg = MSG_WAITALL;
928 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
929 error = soreceive(so, (struct sockaddr **)0, &auio,
930 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
55e303ae 931 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1c79356b
A
932 if (!rep->r_nmp) /* if unmounted then bailout */
933 goto shutout;
934 if (error == EWOULDBLOCK && rep) {
55e303ae
A
935 error2 = nfs_sigintr(rep->r_nmp, rep, p);
936 if (error2)
937 error = error2;
1c79356b
A
938 }
939 } while (error == EWOULDBLOCK);
940 if (!error && auio.uio_resid > 0) {
941 log(LOG_INFO,
942 "short receive (%d/%d) from nfs server %s\n",
943 sizeof(u_long) - auio.uio_resid,
944 sizeof(u_long),
945 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
946 error = EPIPE;
947 }
948 if (error)
949 goto errout;
950 len = ntohl(len) & ~0x80000000;
951 /*
952 * This is SERIOUS! We are out of sync with the sender
953 * and forcing a disconnect/reconnect is all I can do.
954 */
955 if (len > NFS_MAXPACKET) {
956 log(LOG_ERR, "%s (%d) from nfs server %s\n",
957 "impossible packet length",
958 len,
959 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
960 error = EFBIG;
961 goto errout;
962 }
963 auio.uio_resid = len;
964
965 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
966 do {
967 rcvflg = MSG_WAITALL;
968 error = soreceive(so, (struct sockaddr **)0,
969 &auio, mp, (struct mbuf **)0, &rcvflg);
970 if (!rep->r_nmp) /* if unmounted then bailout */ {
971 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
972 goto shutout;
973 }
974 } while (error == EWOULDBLOCK || error == EINTR ||
975 error == ERESTART);
976
977 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
978
979 if (!error && auio.uio_resid > 0) {
980 log(LOG_INFO,
981 "short receive (%d/%d) from nfs server %s\n",
982 len - auio.uio_resid, len,
983 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
984 error = EPIPE;
985 }
986 } else {
987 /*
988 * NB: Since uio_resid is big, MSG_WAITALL is ignored
989 * and soreceive() will return when it has either a
990 * control msg or a data msg.
991 * We have no use for control msg., but must grab them
992 * and then throw them away so we know what is going
993 * on.
994 */
995 auio.uio_resid = len = 100000000; /* Anything Big */
996 auio.uio_procp = p;
997
998 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
999 do {
e5568f75 1000 control = NULL;
1c79356b
A
1001 rcvflg = 0;
1002 error = soreceive(so, (struct sockaddr **)0,
1003 &auio, mp, &control, &rcvflg);
55e303ae
A
1004 if (control)
1005 m_freem(control);
1c79356b
A
1006 if (!rep->r_nmp) /* if unmounted then bailout */ {
1007 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1008 goto shutout;
1009 }
1c79356b 1010 if (error == EWOULDBLOCK && rep) {
55e303ae
A
1011 error2 = nfs_sigintr(rep->r_nmp, rep, p);
1012 if (error2) {
1013 thread_funnel_switch(NETWORK_FUNNEL,
1014 KERNEL_FUNNEL);
1015 return (error2);
1c79356b
A
1016 }
1017 }
1018 } while (error == EWOULDBLOCK ||
1019 (!error && *mp == NULL && control));
1020
1021 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1022
1023 if ((rcvflg & MSG_EOR) == 0)
1024 printf("Egad!!\n");
1025 if (!error && *mp == NULL)
1026 error = EPIPE;
1027 len -= auio.uio_resid;
1028 }
1029errout:
1030 if (error && error != EINTR && error != ERESTART) {
1031 m_freem(*mp);
1032 *mp = (struct mbuf *)0;
1033 if (error != EPIPE)
1034 log(LOG_INFO,
1035 "receive error %d from nfs server %s\n",
1036 error,
1037 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
55e303ae 1038 error = nfs_sndlock(rep);
ccc36f2f 1039 if (!error) {
1c79356b 1040 error = nfs_reconnect(rep);
ccc36f2f
A
1041 if (!error)
1042 goto tryagain;
1043 nfs_sndunlock(rep);
1044 }
1c79356b
A
1045 }
1046 } else {
55e303ae
A
1047 /*
1048 * We could have failed while rebinding the datagram socket
1049 * so we need to attempt to rebind here.
1050 */
1051 if ((so = rep->r_nmp->nm_so) == NULL) {
1052 error = nfs_sndlock(rep);
1053 if (!error) {
1054 error = nfs_reconnect(rep);
1055 nfs_sndunlock(rep);
1056 }
1057 if (error)
1058 return (error);
1059 if (!rep->r_nmp) /* if unmounted then bailout */
1060 return (ENXIO);
1061 so = rep->r_nmp->nm_so;
1062 }
1c79356b
A
1063 if (so->so_state & SS_ISCONNECTED)
1064 getnam = (struct sockaddr **)0;
1065 else
1066 getnam = &tmp_nam;;
1067 auio.uio_resid = len = 1000000;
1068 auio.uio_procp = p;
1069
1070 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1071 do {
1072 rcvflg = 0;
1073 error = soreceive(so, getnam, &auio, mp,
1074 (struct mbuf **)0, &rcvflg);
1075
1076 if ((getnam) && (*getnam)) {
1077 MGET(mhck, M_WAIT, MT_SONAME);
1078 mhck->m_len = (*getnam)->sa_len;
1079 sin = mtod(mhck, struct sockaddr_in *);
1080 bcopy(*getnam, sin, sizeof(struct sockaddr_in));
1081 mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
1082 FREE(*getnam, M_SONAME);
1083 *aname = mhck;
1084 }
55e303ae
A
1085 if (!rep->r_nmp) /* if unmounted then bailout */
1086 goto dgramout;
1087 if (error) {
1088 error2 = nfs_sigintr(rep->r_nmp, rep, p);
1089 if (error2) {
1090 error = error2;
1091 goto dgramout;
1092 }
1093 }
1094 /* Reconnect for all errors. We may be receiving
1095 * soft/hard/blocking errors because of a network
1096 * change.
1097 * XXX: we should rate limit or delay this
1098 * to once every N attempts or something.
1099 * although TCP doesn't seem to.
1100 */
1101 if (error) {
1102 thread_funnel_switch(NETWORK_FUNNEL,
1103 KERNEL_FUNNEL);
1104 error2 = nfs_sndlock(rep);
1105 if (!error2) {
1106 error2 = nfs_reconnect(rep);
1107 if (error2)
1108 error = error2;
1109 else if (!rep->r_nmp) /* if unmounted then bailout */
1110 error = ENXIO;
1111 else
1112 so = rep->r_nmp->nm_so;
1113 nfs_sndunlock(rep);
1114 } else {
1115 error = error2;
1116 }
1117 thread_funnel_switch(KERNEL_FUNNEL,
1118 NETWORK_FUNNEL);
1c79356b
A
1119 }
1120 } while (error == EWOULDBLOCK);
1121
55e303ae 1122dgramout:
1c79356b
A
1123 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1124 len -= auio.uio_resid;
1125 }
1126shutout:
1127 if (error) {
1128 m_freem(*mp);
1129 *mp = (struct mbuf *)0;
1130 }
1131 return (error);
1132}
1133
1134/*
1135 * Implement receipt of reply on a socket.
1136 * We must search through the list of received datagrams matching them
1137 * with outstanding requests using the xid, until ours is found.
1138 */
1139/* ARGSUSED */
1140int
1141nfs_reply(myrep)
1142 struct nfsreq *myrep;
1143{
1144 register struct nfsreq *rep;
1145 register struct nfsmount *nmp = myrep->r_nmp;
1146 register long t1;
1147 struct mbuf *mrep, *md;
1148 struct mbuf *nam;
1149 u_long rxid, *tl;
1150 caddr_t dpos, cp2;
1151 int error;
1152
1153 /*
1154 * Loop around until we get our own reply
1155 */
1156 for (;;) {
1157 /*
1158 * Lock against other receivers so that I don't get stuck in
1159 * sbwait() after someone else has received my reply for me.
1160 * Also necessary for connection based protocols to avoid
1161 * race conditions during a reconnect.
1162 * If nfs_rcvlock() returns EALREADY, that means that
1163 * the reply has already been recieved by another
1164 * process and we can return immediately. In this
1165 * case, the lock is not taken to avoid races with
1166 * other processes.
1167 */
1168 error = nfs_rcvlock(myrep);
1169 if (error == EALREADY)
1170 return (0);
1171 if (error)
1172 return (error);
0b4e3aa0 1173
1c79356b
A
1174 /*
1175 * If we slept after putting bits otw, then reply may have
1176 * arrived. In which case returning is required, or we
1177 * would hang trying to nfs_receive an already received reply.
1178 */
1179 if (myrep->r_mrep != NULL) {
55e303ae 1180 nfs_rcvunlock(myrep);
fa4905b1 1181 FSDBG(530, myrep->r_xid, myrep, myrep->r_nmp, -1);
1c79356b
A
1182 return (0);
1183 }
1184 /*
0b4e3aa0 1185 * Get the next Rpc reply off the socket. Assume myrep->r_nmp
fa4905b1 1186 * is still intact by checks done in nfs_rcvlock.
1c79356b 1187 */
e5568f75 1188 /* XXX why do we ask for nam here? we don't use it! */
1c79356b 1189 error = nfs_receive(myrep, &nam, &mrep);
55e303ae
A
1190 if (nam)
1191 m_freem(nam);
1c79356b 1192 /*
0b4e3aa0 1193 * Bailout asap if nfsmount struct gone (unmounted).
1c79356b 1194 */
ccc36f2f 1195 if (!myrep->r_nmp) {
fa4905b1 1196 FSDBG(530, myrep->r_xid, myrep, nmp, -2);
55e303ae 1197 return (ENXIO);
1c79356b
A
1198 }
1199 if (error) {
fa4905b1 1200 FSDBG(530, myrep->r_xid, myrep, nmp, error);
55e303ae 1201 nfs_rcvunlock(myrep);
1c79356b 1202
d12e1678 1203 /* Bailout asap if nfsmount struct gone (unmounted). */
ccc36f2f 1204 if (!myrep->r_nmp)
55e303ae 1205 return (ENXIO);
d12e1678 1206
1c79356b
A
1207 /*
1208 * Ignore routing errors on connectionless protocols??
1209 */
1210 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
ccc36f2f
A
1211 if (nmp->nm_so)
1212 nmp->nm_so->so_error = 0;
1c79356b
A
1213 if (myrep->r_flags & R_GETONEREP)
1214 return (0);
1215 continue;
1216 }
1217 return (error);
1218 }
1c79356b
A
1219
1220 /*
1221 * We assume all is fine, but if we did not have an error
1222 * and mrep is 0, better not dereference it. nfs_receieve
1223 * calls soreceive which carefully sets error=0 when it got
1224 * errors on sbwait (tsleep). In most cases, I assume that's
1225 * so we could go back again. In tcp case, EPIPE is returned.
1226 * In udp, case nfs_receive gets back here with no error and no
1227 * mrep. Is the right fix to have soreceive check for process
1228 * aborted after sbwait and return something non-zero? Should
1229 * nfs_receive give an EPIPE? Too risky to play with those
1230 * two this late in game for a shutdown problem. Instead,
1231 * just check here and get out. (ekn)
1232 */
1233 if (!mrep) {
ccc36f2f 1234 nfs_rcvunlock(myrep);
fa4905b1 1235 FSDBG(530, myrep->r_xid, myrep, nmp, -3);
55e303ae 1236 return (ENXIO); /* sounds good */
1c79356b
A
1237 }
1238
1239 /*
1240 * Get the xid and check that it is an rpc reply
1241 */
1242 md = mrep;
1243 dpos = mtod(md, caddr_t);
1244 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
1245 rxid = *tl++;
1246 if (*tl != rpc_reply) {
1247#ifndef NFS_NOSERVER
1248 if (nmp->nm_flag & NFSMNT_NQNFS) {
1249 if (nqnfs_callback(nmp, mrep, md, dpos))
1250 nfsstats.rpcinvalid++;
1251 } else {
1252 nfsstats.rpcinvalid++;
1253 m_freem(mrep);
1254 }
1255#else
1256 nfsstats.rpcinvalid++;
1257 m_freem(mrep);
1258#endif
1259nfsmout:
55e303ae
A
1260 if (nmp->nm_state & NFSSTA_RCVLOCK)
1261 nfs_rcvunlock(myrep);
1c79356b
A
1262 if (myrep->r_flags & R_GETONEREP)
1263 return (0); /* this path used by NQNFS */
1264 continue;
1265 }
1266
1267 /*
1268 * Loop through the request list to match up the reply
1269 * Iff no match, just drop the datagram
1270 */
55e303ae 1271 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
1c79356b
A
1272 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
1273 /* Found it.. */
1274 rep->r_mrep = mrep;
1275 rep->r_md = md;
1276 rep->r_dpos = dpos;
55e303ae
A
1277 /*
1278 * If we're tracking the round trip time
1279 * then we update the circular log here
1280 * with the stats from our current request.
1281 */
1c79356b
A
1282 if (nfsrtton) {
1283 struct rttl *rt;
1284
1285 rt = &nfsrtt.rttl[nfsrtt.pos];
1286 rt->proc = rep->r_procnum;
1287 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
1288 rt->sent = nmp->nm_sent;
1289 rt->cwnd = nmp->nm_cwnd;
1290 if (proct[rep->r_procnum] == 0)
1291 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1292 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
1293 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
1294 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
55e303ae 1295 microtime(&rt->tstamp); // XXX unused
1c79356b
A
1296 if (rep->r_flags & R_TIMING)
1297 rt->rtt = rep->r_rtt;
1298 else
1299 rt->rtt = 1000000;
1300 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
1301 }
1302 /*
1303 * Update congestion window.
1304 * Do the additive increase of
1305 * one rpc/rtt.
1306 */
fa4905b1
A
1307 FSDBG(530, rep->r_xid, rep, nmp->nm_sent,
1308 nmp->nm_cwnd);
1c79356b
A
1309 if (nmp->nm_cwnd <= nmp->nm_sent) {
1310 nmp->nm_cwnd +=
1311 (NFS_CWNDSCALE * NFS_CWNDSCALE +
1312 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
1313 if (nmp->nm_cwnd > NFS_MAXCWND)
1314 nmp->nm_cwnd = NFS_MAXCWND;
1315 }
55e303ae
A
1316 if (rep->r_flags & R_SENT) {
1317 rep->r_flags &= ~R_SENT;
1318 nmp->nm_sent -= NFS_CWNDSCALE;
1319 }
1c79356b
A
1320 /*
1321 * Update rtt using a gain of 0.125 on the mean
1322 * and a gain of 0.25 on the deviation.
1323 */
1324 if (rep->r_flags & R_TIMING) {
1325 /*
1326 * Since the timer resolution of
1327 * NFS_HZ is so course, it can often
1328 * result in r_rtt == 0. Since
1329 * r_rtt == N means that the actual
1330 * rtt is between N+dt and N+2-dt ticks,
1331 * add 1.
1332 */
1333 if (proct[rep->r_procnum] == 0)
1334 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1335 t1 = rep->r_rtt + 1;
1336 t1 -= (NFS_SRTT(rep) >> 3);
1337 NFS_SRTT(rep) += t1;
1338 if (t1 < 0)
1339 t1 = -t1;
1340 t1 -= (NFS_SDRTT(rep) >> 2);
1341 NFS_SDRTT(rep) += t1;
1342 }
1343 nmp->nm_timeouts = 0;
1344 break;
1345 }
1346 }
55e303ae 1347 nfs_rcvunlock(myrep);
1c79356b
A
1348 /*
1349 * If not matched to a request, drop it.
1350 * If it's mine, get out.
1351 */
1352 if (rep == 0) {
1353 nfsstats.rpcunexpected++;
1354 m_freem(mrep);
1355 } else if (rep == myrep) {
1356 if (rep->r_mrep == NULL)
1357 panic("nfs_reply: nil r_mrep");
1358 return (0);
1359 }
fa4905b1
A
1360 FSDBG(530, myrep->r_xid, myrep, rep,
1361 rep ? rep->r_xid : myrep->r_flags);
1c79356b
A
1362 if (myrep->r_flags & R_GETONEREP)
1363 return (0); /* this path used by NQNFS */
1364 }
1365}
1366
1367/*
1368 * nfs_request - goes something like this
1369 * - fill in request struct
1370 * - links it into list
1371 * - calls nfs_send() for first transmit
1372 * - calls nfs_receive() to get reply
1373 * - break down rpc header and return with nfs reply pointed to
1374 * by mrep or error
1375 * nb: always frees up mreq mbuf list
1376 */
1377int
fa4905b1 1378nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp)
1c79356b
A
1379 struct vnode *vp;
1380 struct mbuf *mrest;
1381 int procnum;
1382 struct proc *procp;
1383 struct ucred *cred;
1384 struct mbuf **mrp;
1385 struct mbuf **mdp;
1386 caddr_t *dposp;
fa4905b1 1387 u_int64_t *xidp;
1c79356b 1388{
55e303ae 1389 register struct mbuf *m, *mrep, *m2;
1c79356b
A
1390 register struct nfsreq *rep, *rp;
1391 register u_long *tl;
1392 register int i;
1393 struct nfsmount *nmp;
1394 struct mbuf *md, *mheadend;
1395 struct nfsnode *np;
1396 char nickv[RPCX_NICKVERF];
1397 time_t reqtime, waituntil;
1398 caddr_t dpos, cp2;
1399 int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
1400 int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
1401 int verf_len, verf_type;
1402 u_long xid;
1403 u_quad_t frev;
1404 char *auth_str, *verf_str;
1405 NFSKERBKEY_T key; /* save session key */
55e303ae
A
1406 int nmsotype;
1407 struct timeval now;
1c79356b 1408
e5568f75
A
1409 if (mrp)
1410 *mrp = NULL;
fa4905b1
A
1411 if (xidp)
1412 *xidp = 0;
55e303ae 1413
1c79356b
A
1414 MALLOC_ZONE(rep, struct nfsreq *,
1415 sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
1c79356b 1416
55e303ae
A
1417 nmp = VFSTONFS(vp->v_mount);
1418 if (nmp == NULL ||
1419 (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1420 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
1421 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1422 return (ENXIO);
1c79356b 1423 }
55e303ae
A
1424 nmsotype = nmp->nm_sotype;
1425
1426 FSDBG_TOP(531, vp, procnum, nmp, rep);
1427
1c79356b
A
1428 rep->r_nmp = nmp;
1429 rep->r_vp = vp;
1430 rep->r_procp = procp;
1431 rep->r_procnum = procnum;
55e303ae
A
1432 microuptime(&now);
1433 rep->r_lastmsg = now.tv_sec -
1434 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
1c79356b
A
1435 i = 0;
1436 m = mrest;
1437 while (m) {
1438 i += m->m_len;
1439 m = m->m_next;
1440 }
1441 mrest_len = i;
1442
1443 /*
1444 * Get the RPC header with authorization.
1445 */
1446kerbauth:
55e303ae
A
1447 nmp = VFSTONFS(vp->v_mount);
1448 if (!nmp) {
1449 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1450 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1451 return (ENXIO);
1452 }
1c79356b
A
1453 verf_str = auth_str = (char *)0;
1454 if (nmp->nm_flag & NFSMNT_KERB) {
1455 verf_str = nickv;
1456 verf_len = sizeof (nickv);
1457 auth_type = RPCAUTH_KERB4;
1458 bzero((caddr_t)key, sizeof (key));
1459 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
1460 &auth_len, verf_str, verf_len)) {
55e303ae
A
1461 nmp = VFSTONFS(vp->v_mount);
1462 if (!nmp) {
1463 FSDBG_BOT(531, 2, vp, error, rep);
1464 FREE_ZONE((caddr_t)rep,
1465 sizeof (struct nfsreq), M_NFSREQ);
1466 m_freem(mrest);
1467 return (ENXIO);
1468 }
1c79356b
A
1469 error = nfs_getauth(nmp, rep, cred, &auth_str,
1470 &auth_len, verf_str, &verf_len, key);
55e303ae
A
1471 nmp = VFSTONFS(vp->v_mount);
1472 if (!error && !nmp)
1473 error = ENXIO;
1c79356b 1474 if (error) {
fa4905b1 1475 FSDBG_BOT(531, 2, vp, error, rep);
55e303ae 1476 FREE_ZONE((caddr_t)rep,
1c79356b
A
1477 sizeof (struct nfsreq), M_NFSREQ);
1478 m_freem(mrest);
1479 return (error);
1480 }
1481 }
1482 } else {
1483 auth_type = RPCAUTH_UNIX;
1484 if (cred->cr_ngroups < 1)
1485 panic("nfsreq nogrps");
1486 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1487 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
1488 5 * NFSX_UNSIGNED;
1489 }
1490 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
1491 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
fa4905b1 1492 if (xidp)
9bccf70c 1493 *xidp = ntohl(xid) + ((u_int64_t)nfs_xidwrap << 32);
1c79356b
A
1494 if (auth_str)
1495 _FREE(auth_str, M_TEMP);
1496
1497 /*
1498 * For stream protocols, insert a Sun RPC Record Mark.
1499 */
55e303ae 1500 if (nmsotype == SOCK_STREAM) {
1c79356b
A
1501 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
1502 *mtod(m, u_long *) = htonl(0x80000000 |
1503 (m->m_pkthdr.len - NFSX_UNSIGNED));
1504 }
1505 rep->r_mreq = m;
1506 rep->r_xid = xid;
1507tryagain:
55e303ae
A
1508 nmp = VFSTONFS(vp->v_mount);
1509 if (nmp && (nmp->nm_flag & NFSMNT_SOFT))
1c79356b
A
1510 rep->r_retry = nmp->nm_retry;
1511 else
1512 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1513 rep->r_rtt = rep->r_rexmit = 0;
1514 if (proct[procnum] > 0)
1515 rep->r_flags = R_TIMING;
1516 else
1517 rep->r_flags = 0;
1518 rep->r_mrep = NULL;
1519
1520 /*
1521 * Do the client side RPC.
1522 */
1523 nfsstats.rpcrequests++;
1524 /*
1525 * Chain request into list of outstanding requests. Be sure
1526 * to put it LAST so timer finds oldest requests first.
1527 */
1528 s = splsoftclock();
1529 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
1530
1531 /* Get send time for nqnfs */
55e303ae
A
1532 microtime(&now);
1533 reqtime = now.tv_sec;
1c79356b
A
1534
1535 /*
1536 * If backing off another request or avoiding congestion, don't
1537 * send this one now but let timer do it. If not timing a request,
1538 * do it now.
1539 */
55e303ae 1540 if (nmp && nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
1c79356b
A
1541 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1542 nmp->nm_sent < nmp->nm_cwnd)) {
55e303ae
A
1543 int connrequired = (nmp->nm_soflags & PR_CONNREQUIRED);
1544
1c79356b 1545 splx(s);
55e303ae
A
1546 if (connrequired)
1547 error = nfs_sndlock(rep);
1c79356b
A
1548
1549 /*
1550 * Set the R_SENT before doing the send in case another thread
1551 * processes the reply before the nfs_send returns here
1552 */
1553 if (!error) {
1554 if ((rep->r_flags & R_MUSTRESEND) == 0) {
fa4905b1
A
1555 FSDBG(531, rep->r_xid, rep, nmp->nm_sent,
1556 nmp->nm_cwnd);
1c79356b
A
1557 nmp->nm_sent += NFS_CWNDSCALE;
1558 rep->r_flags |= R_SENT;
1559 }
1560
55e303ae
A
1561 m2 = m_copym(m, 0, M_COPYALL, M_WAIT);
1562 error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
1563 if (connrequired)
1564 nfs_sndunlock(rep);
1c79356b 1565 }
55e303ae 1566 nmp = VFSTONFS(vp->v_mount);
1c79356b 1567 if (error) {
55e303ae
A
1568 if (nmp)
1569 nmp->nm_sent -= NFS_CWNDSCALE;
1c79356b
A
1570 rep->r_flags &= ~R_SENT;
1571 }
1572 } else {
1573 splx(s);
1574 rep->r_rtt = -1;
1575 }
1576
1577 /*
1578 * Wait for the reply from our send or the timer's.
1579 */
1580 if (!error || error == EPIPE)
1581 error = nfs_reply(rep);
1582
1583 /*
1584 * RPC done, unlink the request.
1585 */
55e303ae
A
1586 nfs_repdequeue(rep);
1587
1588 nmp = VFSTONFS(vp->v_mount);
1c79356b
A
1589
1590 /*
1591 * Decrement the outstanding request count.
1592 */
1593 if (rep->r_flags & R_SENT) {
1c79356b 1594 rep->r_flags &= ~R_SENT; /* paranoia */
55e303ae
A
1595 if (nmp) {
1596 FSDBG(531, rep->r_xid, rep, nmp->nm_sent, nmp->nm_cwnd);
1597 nmp->nm_sent -= NFS_CWNDSCALE;
1598 }
1c79356b
A
1599 }
1600
1601 /*
1602 * If there was a successful reply and a tprintf msg.
1603 * tprintf a response.
1604 */
e5568f75
A
1605 if (!error)
1606 nfs_up(rep, nmp, procp, "is alive again", NFSSTA_TIMEO);
1c79356b
A
1607 mrep = rep->r_mrep;
1608 md = rep->r_md;
1609 dpos = rep->r_dpos;
55e303ae
A
1610 if (!error && !nmp)
1611 error = ENXIO;
1c79356b
A
1612 if (error) {
1613 m_freem(rep->r_mreq);
fa4905b1 1614 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
55e303ae 1615 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1c79356b
A
1616 return (error);
1617 }
1618
1619 /*
1620 * break down the rpc header and check if ok
1621 */
1622 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
1623 if (*tl++ == rpc_msgdenied) {
1624 if (*tl == rpc_mismatch)
1625 error = EOPNOTSUPP;
1626 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1627 if (!failed_auth) {
1628 failed_auth++;
1629 mheadend->m_next = (struct mbuf *)0;
1630 m_freem(mrep);
1631 m_freem(rep->r_mreq);
1632 goto kerbauth;
1633 } else
1634 error = EAUTH;
1635 } else
1636 error = EACCES;
1637 m_freem(mrep);
1638 m_freem(rep->r_mreq);
fa4905b1 1639 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
55e303ae 1640 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1c79356b
A
1641 return (error);
1642 }
1643
1644 /*
1645 * Grab any Kerberos verifier, otherwise just throw it away.
1646 */
1647 verf_type = fxdr_unsigned(int, *tl++);
1648 i = fxdr_unsigned(int, *tl);
1649 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1650 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1651 if (error)
1652 goto nfsmout;
1653 } else if (i > 0)
1654 nfsm_adv(nfsm_rndup(i));
1655 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1656 /* 0 == ok */
1657 if (*tl == 0) {
1658 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1659 if (*tl != 0) {
1660 error = fxdr_unsigned(int, *tl);
1661 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1662 error == NFSERR_TRYLATER) {
1663 m_freem(mrep);
1664 error = 0;
55e303ae
A
1665 microuptime(&now);
1666 waituntil = now.tv_sec + trylater_delay;
1c79356b
A
1667 NFS_DPF(DUP,
1668 ("nfs_request %s flag=%x trylater_cnt=%x waituntil=%lx trylater_delay=%x\n",
1669 nmp->nm_mountp->mnt_stat.f_mntfromname,
1670 nmp->nm_flag, trylater_cnt, waituntil,
1671 trylater_delay));
55e303ae 1672 while (now.tv_sec < waituntil) {
1c79356b
A
1673 (void)tsleep((caddr_t)&lbolt,
1674 PSOCK, "nqnfstry", 0);
55e303ae
A
1675 microuptime(&now);
1676 }
1677 trylater_delay *= 2;
1678 if (trylater_delay > 60)
1679 trylater_delay = 60;
1c79356b
A
1680 if (trylater_cnt < 7)
1681 trylater_cnt++;
1682 goto tryagain;
1683 }
1684
1685 /*
1686 * If the File Handle was stale, invalidate the
1687 * lookup cache, just in case.
1688 */
1689 if (error == ESTALE)
1690 cache_purge(vp);
1691 if (nmp->nm_flag & NFSMNT_NFSV3) {
1692 *mrp = mrep;
1693 *mdp = md;
1694 *dposp = dpos;
1695 error |= NFSERR_RETERR;
e5568f75 1696 } else {
1c79356b 1697 m_freem(mrep);
e5568f75
A
1698 error &= ~NFSERR_RETERR;
1699 }
1c79356b 1700 m_freem(rep->r_mreq);
fa4905b1 1701 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
55e303ae 1702 FREE_ZONE((caddr_t)rep,
1c79356b
A
1703 sizeof (struct nfsreq), M_NFSREQ);
1704 return (error);
1705 }
1706
1707 /*
1708 * For nqnfs, get any lease in reply
1709 */
1710 if (nmp->nm_flag & NFSMNT_NQNFS) {
1711 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1712 if (*tl) {
1713 np = VTONFS(vp);
1714 nqlflag = fxdr_unsigned(int, *tl);
1715 nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
1716 cachable = fxdr_unsigned(int, *tl++);
1717 reqtime += fxdr_unsigned(int, *tl++);
55e303ae
A
1718 microtime(&now);
1719 if (reqtime > now.tv_sec) {
1c79356b
A
1720 fxdr_hyper(tl, &frev);
1721 nqnfs_clientlease(nmp, np, nqlflag,
1722 cachable, reqtime, frev);
1723 }
1724 }
1725 }
1726 *mrp = mrep;
1727 *mdp = md;
1728 *dposp = dpos;
1729 m_freem(rep->r_mreq);
fa4905b1 1730 FSDBG_BOT(531, 0xf0f0f0f0, rep->r_xid, nmp, rep);
1c79356b
A
1731 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1732 return (0);
1733 }
1734 m_freem(mrep);
1735 error = EPROTONOSUPPORT;
1736nfsmout:
1737 m_freem(rep->r_mreq);
fa4905b1 1738 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
55e303ae 1739 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1c79356b
A
1740 return (error);
1741}
1742
1743#ifndef NFS_NOSERVER
1744/*
1745 * Generate the rpc reply header
1746 * siz arg. is used to decide if adding a cluster is worthwhile
1747 */
1748int
1749nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
1750 int siz;
1751 struct nfsrv_descript *nd;
1752 struct nfssvc_sock *slp;
1753 int err;
1754 int cache;
1755 u_quad_t *frev;
1756 struct mbuf **mrq;
1757 struct mbuf **mbp;
1758 caddr_t *bposp;
1759{
1760 register u_long *tl;
1761 register struct mbuf *mreq;
1762 caddr_t bpos;
1763 struct mbuf *mb, *mb2;
1764
1765 MGETHDR(mreq, M_WAIT, MT_DATA);
1766 mb = mreq;
1767 /*
1768 * If this is a big reply, use a cluster else
1769 * try and leave leading space for the lower level headers.
1770 */
1771 siz += RPC_REPLYSIZ;
1772 if (siz >= MINCLSIZE) {
1773 MCLGET(mreq, M_WAIT);
1774 } else
1775 mreq->m_data += max_hdr;
1776 tl = mtod(mreq, u_long *);
1777 mreq->m_len = 6 * NFSX_UNSIGNED;
1778 bpos = ((caddr_t)tl) + mreq->m_len;
1779 *tl++ = txdr_unsigned(nd->nd_retxid);
1780 *tl++ = rpc_reply;
1781 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1782 *tl++ = rpc_msgdenied;
1783 if (err & NFSERR_AUTHERR) {
1784 *tl++ = rpc_autherr;
1785 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1786 mreq->m_len -= NFSX_UNSIGNED;
1787 bpos -= NFSX_UNSIGNED;
1788 } else {
1789 *tl++ = rpc_mismatch;
1790 *tl++ = txdr_unsigned(RPC_VER2);
1791 *tl = txdr_unsigned(RPC_VER2);
1792 }
1793 } else {
1794 *tl++ = rpc_msgaccepted;
1795
1796 /*
1797 * For Kerberos authentication, we must send the nickname
1798 * verifier back, otherwise just RPCAUTH_NULL.
1799 */
1800 if (nd->nd_flag & ND_KERBFULL) {
1801 register struct nfsuid *nuidp;
1802 struct timeval ktvin, ktvout;
1803
1804 for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
1805 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1806 if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
1807 (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
1808 &nuidp->nu_haddr, nd->nd_nam2)))
1809 break;
1810 }
1811 if (nuidp) {
1812 ktvin.tv_sec =
1813 txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
1814 ktvin.tv_usec =
1815 txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1816
1817 /*
1818 * Encrypt the timestamp in ecb mode using the
1819 * session key.
1820 */
1821#if NFSKERB
1822 XXX
1823#endif
1824
1825 *tl++ = rpc_auth_kerb;
1826 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1827 *tl = ktvout.tv_sec;
1828 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1829 *tl++ = ktvout.tv_usec;
1830 *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
1831 } else {
1832 *tl++ = 0;
1833 *tl++ = 0;
1834 }
1835 } else {
1836 *tl++ = 0;
1837 *tl++ = 0;
1838 }
1839 switch (err) {
1840 case EPROGUNAVAIL:
1841 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1842 break;
1843 case EPROGMISMATCH:
1844 *tl = txdr_unsigned(RPC_PROGMISMATCH);
1845 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1846 if (nd->nd_flag & ND_NQNFS) {
1847 *tl++ = txdr_unsigned(3);
1848 *tl = txdr_unsigned(3);
1849 } else {
1850 *tl++ = txdr_unsigned(2);
1851 *tl = txdr_unsigned(3);
1852 }
1853 break;
1854 case EPROCUNAVAIL:
1855 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1856 break;
1857 case EBADRPC:
1858 *tl = txdr_unsigned(RPC_GARBAGE);
1859 break;
1860 default:
1861 *tl = 0;
1862 if (err != NFSERR_RETVOID) {
1863 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1864 if (err)
1865 *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1866 else
1867 *tl = 0;
1868 }
1869 break;
1870 };
1871 }
1872
1873 /*
1874 * For nqnfs, piggyback lease as requested.
1875 */
1876 if ((nd->nd_flag & ND_NQNFS) && err == 0) {
1877 if (nd->nd_flag & ND_LEASE) {
1878 nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
1879 *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
1880 *tl++ = txdr_unsigned(cache);
1881 *tl++ = txdr_unsigned(nd->nd_duration);
1882 txdr_hyper(frev, tl);
1883 } else {
1884 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1885 *tl = 0;
1886 }
1887 }
1888 if (mrq != NULL)
1889 *mrq = mreq;
1890 *mbp = mb;
1891 *bposp = bpos;
1892 if (err != 0 && err != NFSERR_RETVOID)
1893 nfsstats.srvrpc_errs++;
1894 return (0);
1895}
1896
1897
1898#endif /* NFS_NOSERVER */
1899
1900
1901/*
1902 * From FreeBSD 1.58, a Matt Dillon fix...
1903 * Flag a request as being about to terminate.
1904 * The nm_sent count is decremented now to avoid deadlocks when the process
1905 * in soreceive() hasn't yet managed to send its own request.
1906 */
1907static void
1908nfs_softterm(struct nfsreq *rep)
1909{
55e303ae 1910
1c79356b
A
1911 rep->r_flags |= R_SOFTTERM;
1912 if (rep->r_flags & R_SENT) {
fa4905b1
A
1913 FSDBG(532, rep->r_xid, rep, rep->r_nmp->nm_sent,
1914 rep->r_nmp->nm_cwnd);
1c79356b
A
1915 rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
1916 rep->r_flags &= ~R_SENT;
1917 }
1918}
1919
1920void
1921nfs_timer_funnel(arg)
1922 void * arg;
1923{
1924 (void) thread_funnel_set(kernel_flock, TRUE);
1925 nfs_timer(arg);
1926 (void) thread_funnel_set(kernel_flock, FALSE);
1927
1928}
1929
55e303ae
A
1930/*
1931 * Ensure rep isn't in use by the timer, then dequeue it.
1932 */
1933void
1934nfs_repdequeue(struct nfsreq *rep)
1935{
1936 int s;
1937
1938 while ((rep->r_flags & R_BUSY)) {
1939 rep->r_flags |= R_WAITING;
1940 tsleep(rep, PSOCK, "repdeq", 0);
1941 }
1942 s = splsoftclock();
1943 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
1944 splx(s);
1945}
1946
1947/*
1948 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
1949 * free()'d out from under it.
1950 */
1951void
1952nfs_repbusy(struct nfsreq *rep)
1953{
1954
1955 if ((rep->r_flags & R_BUSY))
1956 panic("rep locked");
1957 rep->r_flags |= R_BUSY;
1958}
1959
1960/*
1961 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
1962 */
1963struct nfsreq *
1964nfs_repnext(struct nfsreq *rep)
1965{
1966 struct nfsreq * nextrep;
1967
1968 if (rep == NULL)
1969 return (NULL);
1970 /*
1971 * We need to get and busy the next req before signalling the
1972 * current one, otherwise wakeup() may block us and we'll race to
1973 * grab the next req.
1974 */
1975 nextrep = TAILQ_NEXT(rep, r_chain);
1976 if (nextrep != NULL)
1977 nfs_repbusy(nextrep);
1978 /* unbusy and signal. */
1979 rep->r_flags &= ~R_BUSY;
1980 if ((rep->r_flags & R_WAITING)) {
1981 rep->r_flags &= ~R_WAITING;
1982 wakeup(rep);
1983 }
1984 return (nextrep);
1985}
1986
1c79356b
A
1987/*
1988 * Nfs timer routine
1989 * Scan the nfsreq list and retranmit any requests that have timed out
1990 * To avoid retransmission attempts on STREAM sockets (in the future) make
1991 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1992 */
1993void
1994nfs_timer(arg)
1995 void *arg; /* never used */
1996{
55e303ae 1997 register struct nfsreq *rep;
1c79356b
A
1998 register struct mbuf *m;
1999 register struct socket *so;
2000 register struct nfsmount *nmp;
2001 register int timeo;
2002 int s, error;
2003#ifndef NFS_NOSERVER
2004 static long lasttime = 0;
2005 register struct nfssvc_sock *slp;
2006 u_quad_t cur_usec;
2007#endif /* NFS_NOSERVER */
2008#if NFSDIAG
2009 int rttdiag;
2010#endif
2011 int flags, rexmit, cwnd, sent;
2012 u_long xid;
55e303ae 2013 struct timeval now;
1c79356b
A
2014
2015 s = splnet();
2016 /*
2017 * XXX If preemptable threads are implemented the spls used for the
2018 * outstanding request queue must be replaced with mutexes.
2019 */
1c79356b
A
2020#ifdef NFSTRACESUSPENDERS
2021 if (NFSTRACE_SUSPENDING) {
55e303ae 2022 TAILQ_FOREACH(rep, &nfs_reqq, r_chain)
1c79356b
A
2023 if (rep->r_xid == nfstracexid)
2024 break;
2025 if (!rep) {
2026 NFSTRACE_RESUME;
2027 } else if (NFSTRACE_SUSPENSEOVER) {
2028 NFSTRACE_SUSPEND;
2029 }
2030 }
2031#endif
55e303ae
A
2032 rep = TAILQ_FIRST(&nfs_reqq);
2033 if (rep != NULL)
2034 nfs_repbusy(rep);
2035 microuptime(&now);
2036 for ( ; rep != NULL ; rep = nfs_repnext(rep)) {
1c79356b
A
2037#ifdef NFSTRACESUSPENDERS
2038 if (rep->r_mrep && !NFSTRACE_SUSPENDING) {
2039 nfstracexid = rep->r_xid;
2040 NFSTRACE_STARTSUSPENDCOUNTDOWN;
2041 }
2042#endif
2043 nmp = rep->r_nmp;
2044 if (!nmp) /* unmounted */
2045 continue;
2046 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
2047 continue;
55e303ae 2048 if (nfs_sigintr(nmp, rep, rep->r_procp))
1c79356b 2049 continue;
55e303ae
A
2050 if (nmp->nm_tprintf_initial_delay != 0 &&
2051 (rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) &&
2052 rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
2053 rep->r_lastmsg = now.tv_sec;
e5568f75
A
2054 nfs_down(rep, rep->r_nmp, rep->r_procp, "not responding",
2055 0, NFSSTA_TIMEO);
4a249263
A
2056 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
2057 /* we're not yet completely mounted and */
2058 /* we can't complete an RPC, so we fail */
2059 nfsstats.rpctimeouts++;
2060 nfs_softterm(rep);
2061 continue;
2062 }
1c79356b
A
2063 }
2064 if (rep->r_rtt >= 0) {
2065 rep->r_rtt++;
2066 if (nmp->nm_flag & NFSMNT_DUMBTIMR)
2067 timeo = nmp->nm_timeo;
2068 else
2069 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
2070 /* ensure 62.5 ms floor */
2071 while (16 * timeo < hz)
2072 timeo *= 2;
2073 if (nmp->nm_timeouts > 0)
2074 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
2075 if (rep->r_rtt <= timeo)
2076 continue;
2077 if (nmp->nm_timeouts < 8)
2078 nmp->nm_timeouts++;
2079 }
2080 /*
55e303ae
A
2081 * Check for too many retransmits. This is never true for
2082 * 'hard' mounts because we set r_retry to NFS_MAXREXMIT + 1
2083 * and never allow r_rexmit to be more than NFS_MAXREXMIT.
1c79356b 2084 */
1c79356b
A
2085 if (rep->r_rexmit >= rep->r_retry) { /* too many */
2086 nfsstats.rpctimeouts++;
2087 nfs_softterm(rep);
2088 continue;
2089 }
2090 if (nmp->nm_sotype != SOCK_DGRAM) {
2091 if (++rep->r_rexmit > NFS_MAXREXMIT)
2092 rep->r_rexmit = NFS_MAXREXMIT;
2093 continue;
2094 }
2095 if ((so = nmp->nm_so) == NULL)
2096 continue;
2097
2098 /*
2099 * If there is enough space and the window allows..
2100 * Resend it
2101 * Set r_rtt to -1 in case we fail to send it now.
2102 */
2103#if NFSDIAG
2104 rttdiag = rep->r_rtt;
2105#endif
2106 rep->r_rtt = -1;
2107 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
2108 ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
2109 (rep->r_flags & R_SENT) ||
2110 nmp->nm_sent < nmp->nm_cwnd) &&
2111 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
2112
2113 struct proc *p = current_proc();
2114
2115#if NFSDIAG
2116 if (rep->r_flags & R_SENT && nfsprnttimo &&
2117 nmp->nm_timeouts >= nfsprnttimo) {
2118 int t = proct[rep->r_procnum];
2119 if (t)
2120 NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d A=%d D=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum, nmp->nm_srtt[t-1], nmp->nm_sdrtt[t-1]));
2121 else
2122 NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum));
2123 }
2124 nfsdup(rep);
2125#endif /* NFSDIAG */
2126 /*
2127 * Iff first send, start timing
2128 * else turn timing off, backoff timer
2129 * and divide congestion window by 2.
2130 * We update these *before* the send to avoid
2131 * racing against receiving the reply.
2132 * We save them so we can restore them on send error.
2133 */
2134 flags = rep->r_flags;
2135 rexmit = rep->r_rexmit;
2136 cwnd = nmp->nm_cwnd;
2137 sent = nmp->nm_sent;
2138 xid = rep->r_xid;
2139 if (rep->r_flags & R_SENT) {
2140 rep->r_flags &= ~R_TIMING;
2141 if (++rep->r_rexmit > NFS_MAXREXMIT)
2142 rep->r_rexmit = NFS_MAXREXMIT;
2143 nmp->nm_cwnd >>= 1;
2144 if (nmp->nm_cwnd < NFS_CWNDSCALE)
2145 nmp->nm_cwnd = NFS_CWNDSCALE;
2146 nfsstats.rpcretries++;
2147 } else {
2148 rep->r_flags |= R_SENT;
2149 nmp->nm_sent += NFS_CWNDSCALE;
2150 }
fa4905b1 2151 FSDBG(535, xid, rep, nmp->nm_sent, nmp->nm_cwnd);
1c79356b
A
2152
2153 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
2154
2155 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
2156 error = (*so->so_proto->pr_usrreqs->pru_send)
2157 (so, 0, m, 0, 0, p);
2158 else
2159 error = (*so->so_proto->pr_usrreqs->pru_send)
2160 (so, 0, m, mtod(nmp->nm_nam, struct sockaddr *), 0, p);
2161
2162 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
2163
fa4905b1 2164 FSDBG(535, xid, error, sent, cwnd);
1c79356b
A
2165
2166 if (error) {
2167 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
2168 so->so_error = 0;
55e303ae 2169 rep->r_flags = flags | R_RESENDERR;
1c79356b
A
2170 rep->r_rexmit = rexmit;
2171 nmp->nm_cwnd = cwnd;
2172 nmp->nm_sent = sent;
2173 if (flags & R_SENT)
2174 nfsstats.rpcretries--;
2175 } else
2176 rep->r_rtt = 0;
2177 }
2178 }
483a1d10 2179 microuptime(&now);
1c79356b
A
2180#ifndef NFS_NOSERVER
2181 /*
2182 * Call the nqnfs server timer once a second to handle leases.
2183 */
55e303ae
A
2184 if (lasttime != now.tv_sec) {
2185 lasttime = now.tv_sec;
1c79356b
A
2186 nqnfs_serverd();
2187 }
2188
2189 /*
2190 * Scan the write gathering queues for writes that need to be
2191 * completed now.
2192 */
55e303ae
A
2193 cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
2194 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
2195 if (LIST_FIRST(&slp->ns_tq) &&
2196 LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
1c79356b
A
2197 nfsrv_wakenfsd(slp);
2198 }
2199#endif /* NFS_NOSERVER */
2200 splx(s);
483a1d10
A
2201
2202 if (nfsbuffreeuptimestamp + 30 <= now.tv_sec) {
2203 /*
2204 * We haven't called nfs_buf_freeup() in a little while.
2205 * So, see if we can free up any stale/unused bufs now.
2206 */
2207 nfs_buf_freeup(1);
2208 }
2209
1c79356b
A
2210 timeout(nfs_timer_funnel, (void *)0, nfs_ticks);
2211
2212}
2213
2214
2215/*
2216 * Test for a termination condition pending on the process.
55e303ae
A
2217 * This is used to determine if we need to bail on a mount.
2218 * EIO is returned if there has been a soft timeout.
2219 * EINTR is returned if there is a signal pending that is not being ignored
2220 * and the mount is interruptable, or if we are a thread that is in the process
2221 * of cancellation (also SIGKILL posted).
1c79356b
A
2222 */
2223int
2224nfs_sigintr(nmp, rep, p)
2225 struct nfsmount *nmp;
2226 struct nfsreq *rep;
55e303ae 2227 struct proc *p;
1c79356b 2228{
55e303ae
A
2229 struct uthread *curr_td;
2230 sigset_t pending_sigs;
2231 int context_good = 0;
2232 struct nfsmount *repnmp;
2233
2234 if (nmp == NULL)
2235 return (ENXIO);
2236 if (rep != NULL) {
2237 repnmp = rep->r_nmp;
2238 /* we've had a forced unmount. */
2239 if (repnmp == NULL)
2240 return (ENXIO);
2241 /* request has timed out on a 'soft' mount. */
2242 if (rep->r_flags & R_SOFTTERM)
2243 return (EIO);
2244 /*
2245 * We're in the progress of a force unmount and there's
2246 * been a timeout we're dead and fail IO.
2247 */
2248 if ((repnmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
2249 (NFSSTA_FORCE|NFSSTA_TIMEO))
2250 return (EIO);
2251 /* Someone is unmounting us, go soft and mark it. */
2252 if ((repnmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT)) {
2253 repnmp->nm_flag |= NFSMNT_SOFT;
2254 nmp->nm_state |= NFSSTA_FORCE;
2255 }
2256 /*
2257 * If the mount is hung and we've requested not to hang
2258 * on remote filesystems, then bail now.
2259 */
2260 if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0 &&
2261 (repnmp->nm_state & NFSSTA_TIMEO) != 0)
2262 return (EIO);
2263 }
2264 /* XXX: is this valid? this probably should be an assertion. */
2265 if (p == NULL)
2266 return (0);
1c79356b 2267
55e303ae
A
2268 /*
2269 * XXX: Since nfs doesn't have a good shot at getting the current
2270 * thread we take a guess. (only struct proc * are passed to VOPs)
2271 * What we do is look at the current thread, if it belongs to the
2272 * passed in proc pointer then we have a "good/accurate" context
2273 * and can make an accurate guess as to what to do.
2274 * However if we have a bad context we have to make due with what
2275 * is in the proc struct which may not be as up to date as we'd
2276 * like.
2277 * This is ok because the process will call us with the correct
2278 * context after a short timeout while waiting for a response.
2279 */
2280 curr_td = (struct uthread *)get_bsdthread_info(current_act());
2281 if (curr_td->uu_proc == p)
2282 context_good = 1;
2283 if (context_good && current_thread_aborted())
1c79356b 2284 return (EINTR);
55e303ae
A
2285 /* mask off thread and process blocked signals. */
2286 if (context_good)
2287 pending_sigs = curr_td->uu_siglist & ~curr_td->uu_sigmask;
2288 else
2289 pending_sigs = p->p_siglist;
2290 /* mask off process level and NFS ignored signals. */
2291 pending_sigs &= ~p->p_sigignore & NFSINT_SIGMASK;
2292 if (pending_sigs && (nmp->nm_flag & NFSMNT_INT) != 0)
1c79356b
A
2293 return (EINTR);
2294 return (0);
2295}
2296
2297/*
2298 * Lock a socket against others.
2299 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2300 * and also to avoid race conditions between the processes with nfs requests
2301 * in progress when a reconnect is necessary.
2302 */
2303int
55e303ae 2304nfs_sndlock(rep)
1c79356b
A
2305 struct nfsreq *rep;
2306{
55e303ae 2307 register int *statep;
1c79356b 2308 struct proc *p;
55e303ae 2309 int error, slpflag = 0, slptimeo = 0;
1c79356b 2310
55e303ae
A
2311 if (rep->r_nmp == NULL)
2312 return (ENXIO);
2313 statep = &rep->r_nmp->nm_state;
2314
2315 p = rep->r_procp;
2316 if (rep->r_nmp->nm_flag & NFSMNT_INT)
2317 slpflag = PCATCH;
2318 while (*statep & NFSSTA_SNDLOCK) {
2319 error = nfs_sigintr(rep->r_nmp, rep, p);
2320 if (error)
2321 return (error);
2322 *statep |= NFSSTA_WANTSND;
2323 if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0)
2324 slptimeo = hz;
2325 (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
2326 "nfsndlck", slptimeo);
1c79356b
A
2327 if (slpflag == PCATCH) {
2328 slpflag = 0;
2329 slptimeo = 2 * hz;
2330 }
0b4e3aa0
A
2331 /*
2332 * Make sure while we slept that the mountpoint didn't go away.
2333 * nfs_sigintr and callers expect it in tact.
2334 */
2335 if (!rep->r_nmp)
55e303ae 2336 return (ENXIO); /* don't have lock until out of loop */
1c79356b 2337 }
55e303ae 2338 *statep |= NFSSTA_SNDLOCK;
1c79356b
A
2339 return (0);
2340}
2341
2342/*
2343 * Unlock the stream socket for others.
2344 */
2345void
55e303ae
A
2346nfs_sndunlock(rep)
2347 struct nfsreq *rep;
1c79356b 2348{
55e303ae 2349 register int *statep;
1c79356b 2350
55e303ae
A
2351 if (rep->r_nmp == NULL)
2352 return;
2353 statep = &rep->r_nmp->nm_state;
2354 if ((*statep & NFSSTA_SNDLOCK) == 0)
1c79356b 2355 panic("nfs sndunlock");
55e303ae
A
2356 *statep &= ~NFSSTA_SNDLOCK;
2357 if (*statep & NFSSTA_WANTSND) {
2358 *statep &= ~NFSSTA_WANTSND;
2359 wakeup((caddr_t)statep);
1c79356b
A
2360 }
2361}
2362
2363static int
2364nfs_rcvlock(rep)
2365 register struct nfsreq *rep;
2366{
55e303ae
A
2367 register int *statep;
2368 int error, slpflag, slptimeo = 0;
1c79356b 2369
d12e1678
A
2370 /* make sure we still have our mountpoint */
2371 if (!rep->r_nmp) {
2372 if (rep->r_mrep != NULL)
2373 return (EALREADY);
55e303ae 2374 return (ENXIO);
d12e1678
A
2375 }
2376
55e303ae
A
2377 statep = &rep->r_nmp->nm_state;
2378 FSDBG_TOP(534, rep->r_xid, rep, rep->r_nmp, *statep);
2379 if (rep->r_nmp->nm_flag & NFSMNT_INT)
1c79356b
A
2380 slpflag = PCATCH;
2381 else
2382 slpflag = 0;
55e303ae
A
2383 while (*statep & NFSSTA_RCVLOCK) {
2384 if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp))) {
fa4905b1 2385 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x100);
55e303ae 2386 return (error);
1c79356b
A
2387 } else if (rep->r_mrep != NULL) {
2388 /*
2389 * Don't bother sleeping if reply already arrived
2390 */
fa4905b1 2391 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x101);
1c79356b
A
2392 return (EALREADY);
2393 }
fa4905b1 2394 FSDBG(534, rep->r_xid, rep, rep->r_nmp, 0x102);
55e303ae
A
2395 *statep |= NFSSTA_WANTRCV;
2396 /*
2397 * We need to poll if we're P_NOREMOTEHANG so that we
2398 * call nfs_sigintr periodically above.
2399 */
2400 if (rep->r_procp != NULL &&
2401 (rep->r_procp->p_flag & P_NOREMOTEHANG) != 0)
2402 slptimeo = hz;
2403 (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
2404 "nfsrcvlk", slptimeo);
1c79356b
A
2405 if (slpflag == PCATCH) {
2406 slpflag = 0;
2407 slptimeo = 2 * hz;
2408 }
0b4e3aa0
A
2409 /*
2410 * Make sure while we slept that the mountpoint didn't go away.
fa4905b1 2411 * nfs_sigintr and caller nfs_reply expect it intact.
0b4e3aa0 2412 */
fa4905b1
A
2413 if (!rep->r_nmp) {
2414 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x103);
55e303ae 2415 return (ENXIO); /* don't have lock until out of loop */
fa4905b1 2416 }
1c79356b
A
2417 }
2418 /*
2419 * nfs_reply will handle it if reply already arrived.
2420 * (We may have slept or been preempted while on network funnel).
2421 */
55e303ae
A
2422 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *statep);
2423 *statep |= NFSSTA_RCVLOCK;
1c79356b
A
2424 return (0);
2425}
2426
2427/*
2428 * Unlock the stream socket for others.
2429 */
2430static void
55e303ae
A
2431nfs_rcvunlock(rep)
2432 register struct nfsreq *rep;
1c79356b 2433{
55e303ae
A
2434 register int *statep;
2435
2436 if (rep->r_nmp == NULL)
2437 return;
2438 statep = &rep->r_nmp->nm_state;
1c79356b 2439
55e303ae
A
2440 FSDBG(533, statep, *statep, 0, 0);
2441 if ((*statep & NFSSTA_RCVLOCK) == 0)
1c79356b 2442 panic("nfs rcvunlock");
55e303ae
A
2443 *statep &= ~NFSSTA_RCVLOCK;
2444 if (*statep & NFSSTA_WANTRCV) {
2445 *statep &= ~NFSSTA_WANTRCV;
2446 wakeup((caddr_t)statep);
1c79356b
A
2447 }
2448}
2449
2450
2451#ifndef NFS_NOSERVER
2452/*
2453 * Socket upcall routine for the nfsd sockets.
2454 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2455 * Essentially do as much as possible non-blocking, else punt and it will
2456 * be called with M_WAIT from an nfsd.
2457 */
2458 /*
55e303ae 2459 * Needs to run under network funnel
1c79356b
A
2460 */
2461void
2462nfsrv_rcv(so, arg, waitflag)
2463 struct socket *so;
2464 caddr_t arg;
2465 int waitflag;
2466{
2467 register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
2468 register struct mbuf *m;
2469 struct mbuf *mp, *mhck;
4a249263 2470 struct sockaddr *nam;
1c79356b 2471 struct uio auio;
55e303ae 2472 int flags, ns_nflag=0, error;
1c79356b
A
2473 struct sockaddr_in *sin;
2474
2475 if ((slp->ns_flag & SLP_VALID) == 0)
2476 return;
2477#ifdef notdef
2478 /*
2479 * Define this to test for nfsds handling this under heavy load.
2480 */
2481 if (waitflag == M_DONTWAIT) {
55e303ae
A
2482 ns_nflag = SLPN_NEEDQ;
2483 goto dorecs;
1c79356b
A
2484 }
2485#endif
2486 auio.uio_procp = NULL;
2487 if (so->so_type == SOCK_STREAM) {
2488 /*
2489 * If there are already records on the queue, defer soreceive()
2490 * to an nfsd so that there is feedback to the TCP layer that
2491 * the nfs servers are heavily loaded.
2492 */
2493 if (slp->ns_rec && waitflag == M_DONTWAIT) {
55e303ae 2494 ns_nflag = SLPN_NEEDQ;
1c79356b
A
2495 goto dorecs;
2496 }
2497
2498 /*
2499 * Do soreceive().
2500 */
2501 auio.uio_resid = 1000000000;
2502 flags = MSG_DONTWAIT;
2503 error = soreceive(so, (struct sockaddr **) 0, &auio, &mp, (struct mbuf **)0, &flags);
2504 if (error || mp == (struct mbuf *)0) {
2505 if (error == EWOULDBLOCK)
55e303ae 2506 ns_nflag = SLPN_NEEDQ;
1c79356b 2507 else
55e303ae 2508 ns_nflag = SLPN_DISCONN;
1c79356b
A
2509 goto dorecs;
2510 }
2511 m = mp;
2512 if (slp->ns_rawend) {
2513 slp->ns_rawend->m_next = m;
2514 slp->ns_cc += 1000000000 - auio.uio_resid;
2515 } else {
2516 slp->ns_raw = m;
2517 slp->ns_cc = 1000000000 - auio.uio_resid;
2518 }
2519 while (m->m_next)
2520 m = m->m_next;
2521 slp->ns_rawend = m;
2522
2523 /*
2524 * Now try and parse record(s) out of the raw stream data.
2525 */
2526 error = nfsrv_getstream(slp, waitflag);
2527 if (error) {
2528 if (error == EPERM)
55e303ae 2529 ns_nflag = SLPN_DISCONN;
1c79356b 2530 else
55e303ae 2531 ns_nflag = SLPN_NEEDQ;
1c79356b
A
2532 }
2533 } else {
2534 do {
2535 auio.uio_resid = 1000000000;
4a249263 2536 flags = MSG_DONTWAIT | MSG_NEEDSA;
1c79356b 2537 nam = 0;
4a249263 2538 mp = 0;
1c79356b
A
2539 error = soreceive(so, &nam, &auio, &mp,
2540 (struct mbuf **)0, &flags);
2541
2542 if (mp) {
2543 if (nam) {
2544 MGET(mhck, M_WAIT, MT_SONAME);
2545 mhck->m_len = nam->sa_len;
2546 sin = mtod(mhck, struct sockaddr_in *);
2547 bcopy(nam, sin, sizeof(struct sockaddr_in));
2548 mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
1c79356b
A
2549
2550 m = mhck;
2551 m->m_next = mp;
2552 } else
2553 m = mp;
2554 if (slp->ns_recend)
2555 slp->ns_recend->m_nextpkt = m;
2556 else
2557 slp->ns_rec = m;
2558 slp->ns_recend = m;
2559 m->m_nextpkt = (struct mbuf *)0;
2560 }
4a249263
A
2561 if (nam) {
2562 FREE(nam, M_SONAME);
2563 }
1c79356b
A
2564 if (error) {
2565 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
2566 && error != EWOULDBLOCK) {
55e303ae 2567 ns_nflag = SLPN_DISCONN;
1c79356b
A
2568 goto dorecs;
2569 }
2570 }
2571 } while (mp);
2572 }
2573
2574 /*
2575 * Now try and process the request records, non-blocking.
2576 */
2577dorecs:
55e303ae
A
2578 if (ns_nflag)
2579 slp->ns_nflag |= ns_nflag;
1c79356b 2580 if (waitflag == M_DONTWAIT &&
55e303ae 2581 (slp->ns_rec || (slp->ns_nflag & (SLPN_NEEDQ | SLPN_DISCONN)))) {
1c79356b
A
2582 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
2583 nfsrv_wakenfsd(slp);
2584 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
2585 }
2586}
2587
2588/*
2589 * Try and extract an RPC request from the mbuf data list received on a
2590 * stream socket. The "waitflag" argument indicates whether or not it
2591 * can sleep.
2592 */
2593static int
2594nfsrv_getstream(slp, waitflag)
2595 register struct nfssvc_sock *slp;
2596 int waitflag;
2597{
2598 register struct mbuf *m, **mpp;
2599 register char *cp1, *cp2;
2600 register int len;
b4c24cb9 2601 struct mbuf *om, *m2, *recm;
1c79356b
A
2602 u_long recmark;
2603
55e303ae 2604 if (slp->ns_nflag & SLPN_GETSTREAM)
1c79356b 2605 panic("nfs getstream");
55e303ae 2606 slp->ns_nflag |= SLPN_GETSTREAM;
1c79356b
A
2607 for (;;) {
2608 if (slp->ns_reclen == 0) {
2609 if (slp->ns_cc < NFSX_UNSIGNED) {
55e303ae 2610 slp->ns_nflag &= ~SLPN_GETSTREAM;
1c79356b
A
2611 return (0);
2612 }
2613 m = slp->ns_raw;
2614 if (m->m_len >= NFSX_UNSIGNED) {
2615 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
2616 m->m_data += NFSX_UNSIGNED;
2617 m->m_len -= NFSX_UNSIGNED;
2618 } else {
2619 cp1 = (caddr_t)&recmark;
2620 cp2 = mtod(m, caddr_t);
2621 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2622 while (m->m_len == 0) {
2623 m = m->m_next;
2624 cp2 = mtod(m, caddr_t);
2625 }
2626 *cp1++ = *cp2++;
2627 m->m_data++;
2628 m->m_len--;
2629 }
2630 }
2631 slp->ns_cc -= NFSX_UNSIGNED;
2632 recmark = ntohl(recmark);
2633 slp->ns_reclen = recmark & ~0x80000000;
2634 if (recmark & 0x80000000)
55e303ae 2635 slp->ns_nflag |= SLPN_LASTFRAG;
1c79356b 2636 else
55e303ae 2637 slp->ns_nflag &= ~SLPN_LASTFRAG;
1c79356b 2638 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
55e303ae 2639 slp->ns_nflag &= ~SLPN_GETSTREAM;
1c79356b
A
2640 return (EPERM);
2641 }
2642 }
2643
2644 /*
2645 * Now get the record part.
b4c24cb9
A
2646 *
2647 * Note that slp->ns_reclen may be 0. Linux sometimes
2648 * generates 0-length RPCs
1c79356b 2649 */
b4c24cb9 2650 recm = NULL;
1c79356b
A
2651 if (slp->ns_cc == slp->ns_reclen) {
2652 recm = slp->ns_raw;
2653 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
2654 slp->ns_cc = slp->ns_reclen = 0;
2655 } else if (slp->ns_cc > slp->ns_reclen) {
2656 len = 0;
2657 m = slp->ns_raw;
2658 om = (struct mbuf *)0;
2659 while (len < slp->ns_reclen) {
2660 if ((len + m->m_len) > slp->ns_reclen) {
2661 m2 = m_copym(m, 0, slp->ns_reclen - len,
2662 waitflag);
2663 if (m2) {
2664 if (om) {
2665 om->m_next = m2;
2666 recm = slp->ns_raw;
2667 } else
2668 recm = m2;
2669 m->m_data += slp->ns_reclen - len;
2670 m->m_len -= slp->ns_reclen - len;
2671 len = slp->ns_reclen;
2672 } else {
55e303ae 2673 slp->ns_nflag &= ~SLPN_GETSTREAM;
1c79356b
A
2674 return (EWOULDBLOCK);
2675 }
2676 } else if ((len + m->m_len) == slp->ns_reclen) {
2677 om = m;
2678 len += m->m_len;
2679 m = m->m_next;
2680 recm = slp->ns_raw;
2681 om->m_next = (struct mbuf *)0;
2682 } else {
2683 om = m;
2684 len += m->m_len;
2685 m = m->m_next;
2686 }
2687 }
2688 slp->ns_raw = m;
2689 slp->ns_cc -= len;
2690 slp->ns_reclen = 0;
2691 } else {
55e303ae 2692 slp->ns_nflag &= ~SLPN_GETSTREAM;
1c79356b
A
2693 return (0);
2694 }
2695
2696 /*
2697 * Accumulate the fragments into a record.
2698 */
2699 mpp = &slp->ns_frag;
2700 while (*mpp)
2701 mpp = &((*mpp)->m_next);
2702 *mpp = recm;
55e303ae 2703 if (slp->ns_nflag & SLPN_LASTFRAG) {
1c79356b
A
2704 if (slp->ns_recend)
2705 slp->ns_recend->m_nextpkt = slp->ns_frag;
2706 else
2707 slp->ns_rec = slp->ns_frag;
2708 slp->ns_recend = slp->ns_frag;
2709 slp->ns_frag = (struct mbuf *)0;
2710 }
2711 }
2712}
2713
2714/*
2715 * Parse an RPC header.
2716 */
2717int
2718nfsrv_dorec(slp, nfsd, ndp)
2719 register struct nfssvc_sock *slp;
2720 struct nfsd *nfsd;
2721 struct nfsrv_descript **ndp;
2722{
2723 register struct mbuf *m;
2724 register struct mbuf *nam;
2725 register struct nfsrv_descript *nd;
2726 int error;
2727
2728 *ndp = NULL;
2729 if ((slp->ns_flag & SLP_VALID) == 0 ||
2730 (m = slp->ns_rec) == (struct mbuf *)0)
2731 return (ENOBUFS);
2732 slp->ns_rec = m->m_nextpkt;
2733 if (slp->ns_rec)
2734 m->m_nextpkt = (struct mbuf *)0;
2735 else
2736 slp->ns_recend = (struct mbuf *)0;
2737 if (m->m_type == MT_SONAME) {
2738 nam = m;
2739 m = m->m_next;
2740 nam->m_next = NULL;
2741 } else
2742 nam = NULL;
2743 MALLOC_ZONE(nd, struct nfsrv_descript *,
2744 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2745 nd->nd_md = nd->nd_mrep = m;
2746 nd->nd_nam2 = nam;
2747 nd->nd_dpos = mtod(m, caddr_t);
2748 error = nfs_getreq(nd, nfsd, TRUE);
2749 if (error) {
55e303ae
A
2750 if (nam)
2751 m_freem(nam);
2752 FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
1c79356b
A
2753 return (error);
2754 }
2755 *ndp = nd;
2756 nfsd->nfsd_nd = nd;
2757 return (0);
2758}
2759
2760/*
2761 * Parse an RPC request
2762 * - verify it
2763 * - fill in the cred struct.
2764 */
2765int
2766nfs_getreq(nd, nfsd, has_header)
2767 register struct nfsrv_descript *nd;
2768 struct nfsd *nfsd;
2769 int has_header;
2770{
2771 register int len, i;
2772 register u_long *tl;
2773 register long t1;
2774 struct uio uio;
2775 struct iovec iov;
2776 caddr_t dpos, cp2, cp;
2777 u_long nfsvers, auth_type;
2778 uid_t nickuid;
2779 int error = 0, nqnfs = 0, ticklen;
2780 struct mbuf *mrep, *md;
2781 register struct nfsuid *nuidp;
55e303ae 2782 struct timeval tvin, tvout, now;
1c79356b
A
2783#if 0 /* until encrypted keys are implemented */
2784 NFSKERBKEYSCHED_T keys; /* stores key schedule */
2785#endif
2786
2787 mrep = nd->nd_mrep;
2788 md = nd->nd_md;
2789 dpos = nd->nd_dpos;
2790 if (has_header) {
2791 nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED);
2792 nd->nd_retxid = fxdr_unsigned(u_long, *tl++);
2793 if (*tl++ != rpc_call) {
2794 m_freem(mrep);
2795 return (EBADRPC);
2796 }
2797 } else
2798 nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED);
2799 nd->nd_repstat = 0;
2800 nd->nd_flag = 0;
2801 if (*tl++ != rpc_vers) {
2802 nd->nd_repstat = ERPCMISMATCH;
2803 nd->nd_procnum = NFSPROC_NOOP;
2804 return (0);
2805 }
2806 if (*tl != nfs_prog) {
2807 if (*tl == nqnfs_prog)
2808 nqnfs++;
2809 else {
2810 nd->nd_repstat = EPROGUNAVAIL;
2811 nd->nd_procnum = NFSPROC_NOOP;
2812 return (0);
2813 }
2814 }
2815 tl++;
2816 nfsvers = fxdr_unsigned(u_long, *tl++);
2817 if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
2818 (nfsvers != NQNFS_VER3 && nqnfs)) {
2819 nd->nd_repstat = EPROGMISMATCH;
2820 nd->nd_procnum = NFSPROC_NOOP;
2821 return (0);
2822 }
2823 if (nqnfs)
2824 nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
2825 else if (nfsvers == NFS_VER3)
2826 nd->nd_flag = ND_NFSV3;
2827 nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
2828 if (nd->nd_procnum == NFSPROC_NULL)
2829 return (0);
2830 if (nd->nd_procnum >= NFS_NPROCS ||
2831 (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
2832 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
2833 nd->nd_repstat = EPROCUNAVAIL;
2834 nd->nd_procnum = NFSPROC_NOOP;
2835 return (0);
2836 }
2837 if ((nd->nd_flag & ND_NFSV3) == 0)
2838 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
2839 auth_type = *tl++;
2840 len = fxdr_unsigned(int, *tl++);
2841 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2842 m_freem(mrep);
2843 return (EBADRPC);
2844 }
2845
2846 nd->nd_flag &= ~ND_KERBAUTH;
2847 /*
2848 * Handle auth_unix or auth_kerb.
2849 */
2850 if (auth_type == rpc_auth_unix) {
2851 len = fxdr_unsigned(int, *++tl);
2852 if (len < 0 || len > NFS_MAXNAMLEN) {
2853 m_freem(mrep);
2854 return (EBADRPC);
2855 }
2856 nfsm_adv(nfsm_rndup(len));
2857 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2858 bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
2859 nd->nd_cr.cr_ref = 1;
2860 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
2861 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
2862 len = fxdr_unsigned(int, *tl);
2863 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
2864 m_freem(mrep);
2865 return (EBADRPC);
2866 }
2867 nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED);
2868 for (i = 1; i <= len; i++)
2869 if (i < NGROUPS)
2870 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
2871 else
2872 tl++;
2873 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
2874 if (nd->nd_cr.cr_ngroups > 1)
2875 nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
2876 len = fxdr_unsigned(int, *++tl);
2877 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2878 m_freem(mrep);
2879 return (EBADRPC);
2880 }
2881 if (len > 0)
2882 nfsm_adv(nfsm_rndup(len));
2883 } else if (auth_type == rpc_auth_kerb) {
2884 switch (fxdr_unsigned(int, *tl++)) {
2885 case RPCAKN_FULLNAME:
2886 ticklen = fxdr_unsigned(int, *tl);
2887 *((u_long *)nfsd->nfsd_authstr) = *tl;
2888 uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
2889 nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
2890 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
2891 m_freem(mrep);
2892 return (EBADRPC);
2893 }
2894 uio.uio_offset = 0;
2895 uio.uio_iov = &iov;
2896 uio.uio_iovcnt = 1;
2897 uio.uio_segflg = UIO_SYSSPACE;
2898 iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
2899 iov.iov_len = RPCAUTH_MAXSIZ - 4;
2900 nfsm_mtouio(&uio, uio.uio_resid);
2901 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2902 if (*tl++ != rpc_auth_kerb ||
2903 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
2904 printf("Bad kerb verifier\n");
2905 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2906 nd->nd_procnum = NFSPROC_NOOP;
2907 return (0);
2908 }
2909 nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
2910 tl = (u_long *)cp;
2911 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
2912 printf("Not fullname kerb verifier\n");
2913 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2914 nd->nd_procnum = NFSPROC_NOOP;
2915 return (0);
2916 }
2917 cp += NFSX_UNSIGNED;
2918 bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
2919 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
2920 nd->nd_flag |= ND_KERBFULL;
2921 nfsd->nfsd_flag |= NFSD_NEEDAUTH;
2922 break;
2923 case RPCAKN_NICKNAME:
2924 if (len != 2 * NFSX_UNSIGNED) {
2925 printf("Kerb nickname short\n");
2926 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
2927 nd->nd_procnum = NFSPROC_NOOP;
2928 return (0);
2929 }
2930 nickuid = fxdr_unsigned(uid_t, *tl);
2931 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2932 if (*tl++ != rpc_auth_kerb ||
2933 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
2934 printf("Kerb nick verifier bad\n");
2935 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2936 nd->nd_procnum = NFSPROC_NOOP;
2937 return (0);
2938 }
2939 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2940 tvin.tv_sec = *tl++;
2941 tvin.tv_usec = *tl;
2942
2943 for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
2944 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
2945 if (nuidp->nu_cr.cr_uid == nickuid &&
2946 (!nd->nd_nam2 ||
2947 netaddr_match(NU_NETFAM(nuidp),
2948 &nuidp->nu_haddr, nd->nd_nam2)))
2949 break;
2950 }
2951 if (!nuidp) {
2952 nd->nd_repstat =
2953 (NFSERR_AUTHERR|AUTH_REJECTCRED);
2954 nd->nd_procnum = NFSPROC_NOOP;
2955 return (0);
2956 }
2957
2958 /*
2959 * Now, decrypt the timestamp using the session key
2960 * and validate it.
2961 */
2962#if NFSKERB
2963 XXX
2964#endif
2965
2966 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
2967 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
55e303ae
A
2968 microtime(&now);
2969 if (nuidp->nu_expire < now.tv_sec ||
1c79356b
A
2970 nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
2971 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
2972 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
2973 nuidp->nu_expire = 0;
2974 nd->nd_repstat =
2975 (NFSERR_AUTHERR|AUTH_REJECTVERF);
2976 nd->nd_procnum = NFSPROC_NOOP;
2977 return (0);
2978 }
2979 nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
2980 nd->nd_flag |= ND_KERBNICK;
2981 };
2982 } else {
2983 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
2984 nd->nd_procnum = NFSPROC_NOOP;
2985 return (0);
2986 }
2987
2988 /*
2989 * For nqnfs, get piggybacked lease request.
2990 */
2991 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
2992 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2993 nd->nd_flag |= fxdr_unsigned(int, *tl);
2994 if (nd->nd_flag & ND_LEASE) {
2995 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2996 nd->nd_duration = fxdr_unsigned(int, *tl);
2997 } else
2998 nd->nd_duration = NQ_MINLEASE;
2999 } else
3000 nd->nd_duration = NQ_MINLEASE;
3001 nd->nd_md = md;
3002 nd->nd_dpos = dpos;
3003 return (0);
3004nfsmout:
3005 return (error);
3006}
3007
3008/*
3009 * Search for a sleeping nfsd and wake it up.
3010 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
3011 * running nfsds will go look for the work in the nfssvc_sock list.
3012 */
3013void
3014nfsrv_wakenfsd(slp)
3015 struct nfssvc_sock *slp;
3016{
3017 register struct nfsd *nd;
3018
3019 if ((slp->ns_flag & SLP_VALID) == 0)
3020 return;
55e303ae 3021 TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
1c79356b
A
3022 if (nd->nfsd_flag & NFSD_WAITING) {
3023 nd->nfsd_flag &= ~NFSD_WAITING;
3024 if (nd->nfsd_slp)
3025 panic("nfsd wakeup");
3026 slp->ns_sref++;
3027 nd->nfsd_slp = slp;
3028 wakeup((caddr_t)nd);
3029 return;
3030 }
3031 }
3032 slp->ns_flag |= SLP_DOREC;
3033 nfsd_head_flag |= NFSD_CHECKSLP;
3034}
3035#endif /* NFS_NOSERVER */
3036
3037static int
55e303ae 3038nfs_msg(p, server, msg, error)
1c79356b 3039 struct proc *p;
55e303ae
A
3040 const char *server, *msg;
3041 int error;
1c79356b
A
3042{
3043 tpr_t tpr;
3044
3045 if (p)
3046 tpr = tprintf_open(p);
3047 else
3048 tpr = NULL;
55e303ae
A
3049 if (error)
3050 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg,
3051 error);
3052 else
3053 tprintf(tpr, "nfs server %s: %s\n", server, msg);
1c79356b
A
3054 tprintf_close(tpr);
3055 return (0);
3056}
55e303ae 3057
e5568f75
A
3058void
3059nfs_down(rep, nmp, proc, msg, error, flags)
55e303ae 3060 struct nfsreq *rep;
e5568f75
A
3061 struct nfsmount *nmp;
3062 struct proc *proc;
55e303ae 3063 const char *msg;
e5568f75 3064 int error, flags;
55e303ae 3065{
e5568f75 3066 if (nmp == NULL)
55e303ae 3067 return;
e5568f75
A
3068 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
3069 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
55e303ae 3070 VQ_NOTRESP, 0);
e5568f75 3071 nmp->nm_state |= NFSSTA_TIMEO;
55e303ae 3072 }
e5568f75
A
3073 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
3074 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
3075 VQ_NOTRESPLOCK, 0);
3076 nmp->nm_state |= NFSSTA_LOCKTIMEO;
3077 }
3078 if (rep)
3079 rep->r_flags |= R_TPRINTFMSG;
3080 nfs_msg(proc, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
55e303ae
A
3081}
3082
e5568f75
A
3083void
3084nfs_up(rep, nmp, proc, msg, flags)
55e303ae 3085 struct nfsreq *rep;
e5568f75
A
3086 struct nfsmount *nmp;
3087 struct proc *proc;
55e303ae 3088 const char *msg;
e5568f75 3089 int flags;
55e303ae 3090{
e5568f75 3091 if (nmp == NULL)
55e303ae 3092 return;
e5568f75
A
3093 if ((rep == NULL) || (rep->r_flags & R_TPRINTFMSG) != 0)
3094 nfs_msg(proc, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
3095 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
3096 nmp->nm_state &= ~NFSSTA_TIMEO;
3097 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
55e303ae
A
3098 VQ_NOTRESP, 1);
3099 }
e5568f75
A
3100 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
3101 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
3102 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
3103 VQ_NOTRESPLOCK, 1);
3104 }
55e303ae 3105}
e5568f75 3106