]> git.saurik.com Git - apple/xnu.git/blame - bsd/nfs/nfs_socket.c
xnu-517.7.7.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
CommitLineData
1c79356b 1/*
e5568f75 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23/*
24 * Copyright (c) 1989, 1991, 1993, 1995
25 * The Regents of the University of California. All rights reserved.
26 *
27 * This code is derived from software contributed to Berkeley by
28 * Rick Macklem at The University of Guelph.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
59 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
60 */
61
62/*
63 * Socket operations for use by nfs
64 */
65
66#include <sys/param.h>
67#include <sys/systm.h>
68#include <sys/proc.h>
69#include <sys/mount.h>
70#include <sys/kernel.h>
71#include <sys/mbuf.h>
72#include <sys/malloc.h>
73#include <sys/vnode.h>
74#include <sys/domain.h>
75#include <sys/protosw.h>
76#include <sys/socket.h>
77#include <sys/socketvar.h>
78#include <sys/syslog.h>
79#include <sys/tprintf.h>
80#include <machine/spl.h>
81
82#include <sys/time.h>
83#include <kern/clock.h>
4a249263
A
84#include <kern/task.h>
85#include <kern/thread.h>
9bccf70c 86#include <sys/user.h>
1c79356b
A
87
88#include <netinet/in.h>
89#include <netinet/tcp.h>
90
91#include <nfs/rpcv2.h>
92#include <nfs/nfsproto.h>
93#include <nfs/nfs.h>
94#include <nfs/xdr_subs.h>
95#include <nfs/nfsm_subs.h>
96#include <nfs/nfsmount.h>
97#include <nfs/nfsnode.h>
98#include <nfs/nfsrtt.h>
99#include <nfs/nqnfs.h>
100
fa4905b1
A
101#include <sys/kdebug.h>
102
103#define FSDBG(A, B, C, D, E) \
104 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
105 (int)(B), (int)(C), (int)(D), (int)(E), 0)
106#define FSDBG_TOP(A, B, C, D, E) \
107 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
108 (int)(B), (int)(C), (int)(D), (int)(E), 0)
109#define FSDBG_BOT(A, B, C, D, E) \
110 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
111 (int)(B), (int)(C), (int)(D), (int)(E), 0)
112
1c79356b
A
113#define TRUE 1
114#define FALSE 0
115
116/*
117 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
118 * Use the mean and mean deviation of rtt for the appropriate type of rpc
119 * for the frequent rpcs and a default for the others.
120 * The justification for doing "other" this way is that these rpcs
121 * happen so infrequently that timer est. would probably be stale.
122 * Also, since many of these rpcs are
123 * non-idempotent, a conservative timeout is desired.
124 * getattr, lookup - A+2D
125 * read, write - A+4D
126 * other - nm_timeo
127 */
128#define NFS_RTO(n, t) \
129 ((t) == 0 ? (n)->nm_timeo : \
130 ((t) < 3 ? \
131 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
132 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
133#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
134#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
135/*
136 * External data, mostly RPC constants in XDR form
137 */
138extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
139 rpc_msgaccepted, rpc_call, rpc_autherr,
140 rpc_auth_kerb;
141extern u_long nfs_prog, nqnfs_prog;
142extern time_t nqnfsstarttime;
143extern struct nfsstats nfsstats;
144extern int nfsv3_procid[NFS_NPROCS];
145extern int nfs_ticks;
fa4905b1 146extern u_long nfs_xidwrap;
1c79356b
A
147
148/*
149 * Defines which timer to use for the procnum.
150 * 0 - default
151 * 1 - getattr
152 * 2 - lookup
153 * 3 - read
154 * 4 - write
155 */
156static int proct[NFS_NPROCS] = {
157 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
158 0, 0, 0,
159};
160
161/*
162 * There is a congestion window for outstanding rpcs maintained per mount
163 * point. The cwnd size is adjusted in roughly the way that:
164 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
165 * SIGCOMM '88". ACM, August 1988.
166 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
167 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
168 * of rpcs is in progress.
169 * (The sent count and cwnd are scaled for integer arith.)
170 * Variants of "slow start" were tried and were found to be too much of a
171 * performance hit (ave. rtt 3 times larger),
172 * I suspect due to the large rtt that nfs rpcs have.
173 */
174#define NFS_CWNDSCALE 256
175#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
176static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
177int nfsrtton = 0;
178struct nfsrtt nfsrtt;
179
55e303ae 180static int nfs_msg __P((struct proc *, const char *, const char *, int));
1c79356b 181static int nfs_rcvlock __P((struct nfsreq *));
55e303ae 182static void nfs_rcvunlock __P((struct nfsreq *));
1c79356b
A
183static int nfs_receive __P((struct nfsreq *rep, struct mbuf **aname,
184 struct mbuf **mp));
185static int nfs_reconnect __P((struct nfsreq *rep));
55e303ae
A
186static void nfs_repbusy(struct nfsreq *rep);
187static struct nfsreq * nfs_repnext(struct nfsreq *rep);
188static void nfs_repdequeue(struct nfsreq *rep);
4a249263
A
189
190/* XXX */
191boolean_t current_thread_aborted(void);
192kern_return_t thread_terminate(thread_act_t);
193
1c79356b
A
194#ifndef NFS_NOSERVER
195static int nfsrv_getstream __P((struct nfssvc_sock *,int));
196
197int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
198 struct nfssvc_sock *slp,
199 struct proc *procp,
200 struct mbuf **mreqp)) = {
201 nfsrv_null,
202 nfsrv_getattr,
203 nfsrv_setattr,
204 nfsrv_lookup,
205 nfsrv3_access,
206 nfsrv_readlink,
207 nfsrv_read,
208 nfsrv_write,
209 nfsrv_create,
210 nfsrv_mkdir,
211 nfsrv_symlink,
212 nfsrv_mknod,
213 nfsrv_remove,
214 nfsrv_rmdir,
215 nfsrv_rename,
216 nfsrv_link,
217 nfsrv_readdir,
218 nfsrv_readdirplus,
219 nfsrv_statfs,
220 nfsrv_fsinfo,
221 nfsrv_pathconf,
222 nfsrv_commit,
223 nqnfsrv_getlease,
224 nqnfsrv_vacated,
225 nfsrv_noop,
226 nfsrv_noop
227};
228#endif /* NFS_NOSERVER */
229
fa4905b1
A
230/*
231 * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG)
232 * But some of this code may prove useful someday...
233 */
234#undef NFSDIAG
1c79356b
A
235#if NFSDIAG
236int nfstraceindx = 0;
237struct nfstracerec nfstracebuf[NFSTBUFSIZ] = {{0,0,0,0}};
238
239#define NFSTRACESUSPENDERS
240#ifdef NFSTRACESUSPENDERS
241uint nfstracemask = 0xfff00200;
242int nfstracexid = -1;
243uint onfstracemask = 0;
244int nfstracesuspend = -1;
245#define NFSTRACE_SUSPEND \
246 { \
247 if (nfstracemask) { \
248 onfstracemask = nfstracemask; \
249 nfstracemask = 0; \
250 } \
251 }
252#define NFSTRACE_RESUME \
253 { \
254 nfstracesuspend = -1; \
255 if (!nfstracemask) \
256 nfstracemask = onfstracemask; \
257 }
258#define NFSTRACE_STARTSUSPENDCOUNTDOWN \
259 { \
260 nfstracesuspend = (nfstraceindx+100) % NFSTBUFSIZ; \
261 }
262#define NFSTRACE_SUSPENDING (nfstracesuspend != -1)
263#define NFSTRACE_SUSPENSEOVER \
264 (nfstracesuspend > 100 ? \
265 (nfstraceindx >= nfstracesuspend || \
266 nfstraceindx < nfstracesuspend - 100) : \
267 (nfstraceindx >= nfstracesuspend && \
268 nfstraceindx < nfstracesuspend + 8192 - 100))
269#else
270uint nfstracemask = 0;
271#endif /* NFSTRACESUSPENDERS */
272
273int nfsprnttimo = 1;
274
275int nfsodata[1024];
276int nfsoprocnum, nfsolen;
277int nfsbt[32], nfsbtlen;
278
279#if defined(__ppc__)
280int
281backtrace(int *where, int size)
282{
283 int register sp, *fp, numsaved;
284
285 __asm__ volatile("mr %0,r1" : "=r" (sp));
286
287 fp = (int *)*((int *)sp);
288 size /= sizeof(int);
289 for (numsaved = 0; numsaved < size; numsaved++) {
290 *where++ = fp[2];
291 if ((int)fp <= 0)
292 break;
293 fp = (int *)*fp;
294 }
295 return (numsaved);
296}
297#elif defined(__i386__)
298int
299backtrace()
300{
301 return (0); /* Till someone implements a real routine */
302}
303#else
304#error architecture not implemented.
305#endif
306
307void
308nfsdup(struct nfsreq *rep)
309{
310 int *ip, i, first = 1, end;
311 char *s, b[240];
312 struct mbuf *mb;
313
314 if ((nfs_debug & NFS_DEBUG_DUP) == 0)
315 return;
316 /* last mbuf in chain will be nfs content */
317 for (mb = rep->r_mreq; mb->m_next; mb = mb->m_next)
318 ;
319 if (rep->r_procnum == nfsoprocnum && mb->m_len == nfsolen &&
320 !bcmp((caddr_t)nfsodata, mb->m_data, nfsolen)) {
321 s = b + sprintf(b, "nfsdup x=%x p=%d h=", rep->r_xid,
322 rep->r_procnum);
323 end = (int)(VTONFS(rep->r_vp)->n_fhp);
324 ip = (int *)(end & ~3);
325 end += VTONFS(rep->r_vp)->n_fhsize;
326 while ((int)ip < end) {
327 i = *ip++;
328 if (first) { /* avoid leading zeroes */
329 if (i == 0)
330 continue;
331 first = 0;
332 s += sprintf(s, "%x", i);
333 } else
334 s += sprintf(s, "%08x", i);
335 }
336 if (first)
337 sprintf(s, "%x", 0);
338 else /* eliminate trailing zeroes */
339 while (*--s == '0')
340 *s = 0;
341 /*
342 * set a breakpoint here and you can view the
343 * current backtrace and the one saved in nfsbt
344 */
345 kprintf("%s\n", b);
346 }
347 nfsoprocnum = rep->r_procnum;
348 nfsolen = mb->m_len;
349 bcopy(mb->m_data, (caddr_t)nfsodata, mb->m_len);
350 nfsbtlen = backtrace(&nfsbt, sizeof(nfsbt));
351}
352#endif /* NFSDIAG */
353
4a249263
A
354
355/*
356 * attempt to bind a socket to a reserved port
357 */
358static int
359nfs_bind_resv(struct nfsmount *nmp)
360{
361 struct socket *so = nmp->nm_so;
362 struct sockaddr_in sin;
363 int error;
364 u_short tport;
365
366 if (!so)
367 return (EINVAL);
368
369 sin.sin_len = sizeof (struct sockaddr_in);
370 sin.sin_family = AF_INET;
371 sin.sin_addr.s_addr = INADDR_ANY;
372 tport = IPPORT_RESERVED - 1;
373 sin.sin_port = htons(tport);
374
375 while (((error = sobind(so, (struct sockaddr *) &sin)) == EADDRINUSE) &&
376 (--tport > IPPORT_RESERVED / 2))
377 sin.sin_port = htons(tport);
378 return (error);
379}
380
381/*
382 * variables for managing the nfs_bind_resv_thread
383 */
384int nfs_resv_mounts = 0;
385static int nfs_bind_resv_thread_state = 0;
386#define NFS_BIND_RESV_THREAD_STATE_INITTED 1
387#define NFS_BIND_RESV_THREAD_STATE_RUNNING 2
388static struct slock nfs_bind_resv_slock;
389struct nfs_bind_resv_request {
390 TAILQ_ENTRY(nfs_bind_resv_request) brr_chain;
391 struct nfsmount *brr_nmp;
392 int brr_error;
393};
394static TAILQ_HEAD(, nfs_bind_resv_request) nfs_bind_resv_request_queue;
395
396/*
397 * thread to handle any reserved port bind requests
398 */
399static void
400nfs_bind_resv_thread(void)
401{
402 struct nfs_bind_resv_request *brreq;
403 boolean_t funnel_state;
404
405 funnel_state = thread_funnel_set(network_flock, TRUE);
406 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
407
408 while (nfs_resv_mounts > 0) {
409 simple_lock(&nfs_bind_resv_slock);
410 while ((brreq = TAILQ_FIRST(&nfs_bind_resv_request_queue))) {
411 TAILQ_REMOVE(&nfs_bind_resv_request_queue, brreq, brr_chain);
412 simple_unlock(&nfs_bind_resv_slock);
413 brreq->brr_error = nfs_bind_resv(brreq->brr_nmp);
414 wakeup(brreq);
415 simple_lock(&nfs_bind_resv_slock);
416 }
417 simple_unlock(&nfs_bind_resv_slock);
418 (void)tsleep((caddr_t)&nfs_bind_resv_request_queue, PSOCK,
419 "nfs_bind_resv_request_queue", 0);
420 }
421
422 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
423 (void) thread_funnel_set(network_flock, funnel_state);
424 (void) thread_terminate(current_act());
425}
426
427int
428nfs_bind_resv_thread_wake(void)
429{
430 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING)
431 return (EIO);
432 wakeup(&nfs_bind_resv_request_queue);
433 return (0);
434}
435
436/*
437 * underprivileged procs call this to request nfs_bind_resv_thread
438 * to perform the reserved port binding for them.
439 */
440static int
441nfs_bind_resv_nopriv(struct nfsmount *nmp)
442{
443 struct nfs_bind_resv_request brreq;
444 int error;
445
446 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING) {
447 if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_INITTED) {
448 simple_lock_init(&nfs_bind_resv_slock);
449 TAILQ_INIT(&nfs_bind_resv_request_queue);
450 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
451 }
452 kernel_thread(kernel_task, nfs_bind_resv_thread);
453 nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
454 }
455
456 brreq.brr_nmp = nmp;
457 brreq.brr_error = 0;
458
459 simple_lock(&nfs_bind_resv_slock);
460 TAILQ_INSERT_TAIL(&nfs_bind_resv_request_queue, &brreq, brr_chain);
461 simple_unlock(&nfs_bind_resv_slock);
462
463 error = nfs_bind_resv_thread_wake();
464 if (error) {
465 TAILQ_REMOVE(&nfs_bind_resv_request_queue, &brreq, brr_chain);
466 /* Note: we might be able to simply restart the thread */
467 return (error);
468 }
469
470 (void) tsleep((caddr_t)&brreq, PSOCK, "nfsbindresv", 0);
471
472 return (brreq.brr_error);
473}
474
1c79356b
A
475/*
476 * Initialize sockets and congestion for a new NFS connection.
477 * We do not free the sockaddr if error.
478 */
479int
480nfs_connect(nmp, rep)
4a249263 481 struct nfsmount *nmp;
1c79356b
A
482 struct nfsreq *rep;
483{
4a249263 484 struct socket *so;
1c79356b
A
485 int s, error, rcvreserve, sndreserve;
486 struct sockaddr *saddr;
1c79356b
A
487
488 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
489 nmp->nm_so = (struct socket *)0;
490 saddr = mtod(nmp->nm_nam, struct sockaddr *);
491 error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
492 nmp->nm_soproto);
493 if (error) {
494 goto bad;
495 }
496 so = nmp->nm_so;
497 nmp->nm_soflags = so->so_proto->pr_flags;
498
499 /*
500 * Some servers require that the client port be a reserved port number.
501 */
502 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
4a249263
A
503 struct proc *p;
504 /*
505 * sobind() requires current_proc() to have superuser privs.
506 * If this bind is part of a reconnect, and the current proc
507 * doesn't have superuser privs, we hand the sobind() off to
508 * a kernel thread to process.
509 */
510 if ((nmp->nm_state & NFSSTA_MOUNTED) &&
511 (p = current_proc()) && suser(p->p_ucred, &p->p_acflag)) {
512 /* request nfs_bind_resv_thread() to do bind */
513 error = nfs_bind_resv_nopriv(nmp);
514 } else {
515 error = nfs_bind_resv(nmp);
1c79356b 516 }
4a249263
A
517 if (error)
518 goto bad;
1c79356b
A
519 }
520
521 /*
522 * Protocols that do not require connections may be optionally left
523 * unconnected for servers that reply from a port other than NFS_PORT.
524 */
525 if (nmp->nm_flag & NFSMNT_NOCONN) {
526 if (nmp->nm_soflags & PR_CONNREQUIRED) {
527 error = ENOTCONN;
528 goto bad;
529 }
530 } else {
531 error = soconnect(so, mtod(nmp->nm_nam, struct sockaddr *));
532 if (error) {
533 goto bad;
534 }
535
536 /*
537 * Wait for the connection to complete. Cribbed from the
538 * connect system call but with the wait timing out so
539 * that interruptible mounts don't hang here for a long time.
540 */
541 s = splnet();
542 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
543 (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
544 "nfscon", 2 * hz);
545 if ((so->so_state & SS_ISCONNECTING) &&
546 so->so_error == 0 && rep &&
547 (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
548 so->so_state &= ~SS_ISCONNECTING;
549 splx(s);
550 goto bad;
551 }
552 }
553 if (so->so_error) {
554 error = so->so_error;
555 so->so_error = 0;
556 splx(s);
557 goto bad;
558 }
559 splx(s);
560 }
55e303ae
A
561 /*
562 * Always time out on recieve, this allows us to reconnect the
563 * socket to deal with network changes.
564 */
565 so->so_rcv.sb_timeo = (2 * hz);
1c79356b 566 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
1c79356b
A
567 so->so_snd.sb_timeo = (5 * hz);
568 } else {
1c79356b
A
569 so->so_snd.sb_timeo = 0;
570 }
571 if (nmp->nm_sotype == SOCK_DGRAM) {
55e303ae
A
572 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
573 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
574 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
1c79356b 575 } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
55e303ae
A
576 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
577 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
578 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
1c79356b
A
579 } else {
580 if (nmp->nm_sotype != SOCK_STREAM)
581 panic("nfscon sotype");
582
583 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
584 struct sockopt sopt;
585 int val;
586
587 bzero(&sopt, sizeof sopt);
55e303ae 588 sopt.sopt_dir = SOPT_SET;
1c79356b
A
589 sopt.sopt_level = SOL_SOCKET;
590 sopt.sopt_name = SO_KEEPALIVE;
591 sopt.sopt_val = &val;
592 sopt.sopt_valsize = sizeof val;
593 val = 1;
594 sosetopt(so, &sopt);
595 }
596 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
597 struct sockopt sopt;
598 int val;
599
600 bzero(&sopt, sizeof sopt);
55e303ae 601 sopt.sopt_dir = SOPT_SET;
1c79356b
A
602 sopt.sopt_level = IPPROTO_TCP;
603 sopt.sopt_name = TCP_NODELAY;
604 sopt.sopt_val = &val;
605 sopt.sopt_valsize = sizeof val;
606 val = 1;
607 sosetopt(so, &sopt);
608 }
609
55e303ae
A
610 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 3;
611 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) *
612 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
1c79356b
A
613 }
614
55e303ae
A
615 if (sndreserve > NFS_MAXSOCKBUF)
616 sndreserve = NFS_MAXSOCKBUF;
617 if (rcvreserve > NFS_MAXSOCKBUF)
618 rcvreserve = NFS_MAXSOCKBUF;
1c79356b
A
619 error = soreserve(so, sndreserve, rcvreserve);
620 if (error) {
621 goto bad;
622 }
623 so->so_rcv.sb_flags |= SB_NOINTR;
624 so->so_snd.sb_flags |= SB_NOINTR;
625
626 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
627
628 /* Initialize other non-zero congestion variables */
629 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
630 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
631 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
632 nmp->nm_sdrtt[3] = 0;
633 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
634 nmp->nm_sent = 0;
55e303ae 635 FSDBG(529, nmp, nmp->nm_state, nmp->nm_soflags, nmp->nm_cwnd);
1c79356b
A
636 nmp->nm_timeouts = 0;
637 return (0);
638
639bad:
640 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
641 nfs_disconnect(nmp);
642 return (error);
643}
644
645/*
646 * Reconnect routine:
647 * Called when a connection is broken on a reliable protocol.
648 * - clean up the old socket
649 * - nfs_connect() again
650 * - set R_MUSTRESEND for all outstanding requests on mount point
651 * If this fails the mount point is DEAD!
652 * nb: Must be called with the nfs_sndlock() set on the mount point.
653 */
654static int
655nfs_reconnect(rep)
656 register struct nfsreq *rep;
657{
658 register struct nfsreq *rp;
659 register struct nfsmount *nmp = rep->r_nmp;
660 int error;
661
662 nfs_disconnect(nmp);
663 while ((error = nfs_connect(nmp, rep))) {
664 if (error == EINTR || error == ERESTART)
665 return (EINTR);
55e303ae
A
666 if (error == EIO)
667 return (EIO);
e5568f75
A
668 nfs_down(rep, rep->r_nmp, rep->r_procp, "can not connect",
669 error, NFSSTA_TIMEO);
4a249263
A
670 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
671 /* we're not yet completely mounted and */
672 /* we can't reconnect, so we fail */
673 return (error);
674 }
675 if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp)))
676 return (error);
1c79356b
A
677 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
678 }
679
680 NFS_DPF(DUP, ("nfs_reconnect RESEND\n"));
681 /*
682 * Loop through outstanding request list and fix up all requests
683 * on old socket.
684 */
55e303ae 685 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
1c79356b
A
686 if (rp->r_nmp == nmp)
687 rp->r_flags |= R_MUSTRESEND;
688 }
689 return (0);
690}
691
692/*
693 * NFS disconnect. Clean up and unlink.
694 */
695void
696nfs_disconnect(nmp)
697 register struct nfsmount *nmp;
698{
699 register struct socket *so;
700
701 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
702 if (nmp->nm_so) {
703 so = nmp->nm_so;
704 nmp->nm_so = (struct socket *)0;
705 soshutdown(so, 2);
706 soclose(so);
707 }
708 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
709}
710
711/*
712 * This is the nfs send routine. For connection based socket types, it
713 * must be called with an nfs_sndlock() on the socket.
714 * "rep == NULL" indicates that it has been called from a server.
715 * For the client side:
716 * - return EINTR if the RPC is terminated, 0 otherwise
717 * - set R_MUSTRESEND if the send fails for any reason
718 * - do any cleanup required by recoverable socket errors (???)
719 * For the server side:
720 * - return EINTR or ERESTART if interrupted by a signal
721 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
722 * - do any cleanup required by recoverable socket errors (???)
723 */
724int
725nfs_send(so, nam, top, rep)
726 register struct socket *so;
727 struct mbuf *nam;
728 register struct mbuf *top;
729 struct nfsreq *rep;
730{
731 struct sockaddr *sendnam;
55e303ae 732 int error, error2, soflags, flags;
1c79356b
A
733 int xidqueued = 0;
734 struct nfsreq *rp;
735 char savenametolog[MNAMELEN];
736
737 if (rep) {
55e303ae
A
738 error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
739 if (error) {
1c79356b 740 m_freem(top);
55e303ae 741 return (error);
1c79356b
A
742 }
743 if ((so = rep->r_nmp->nm_so) == NULL) {
744 rep->r_flags |= R_MUSTRESEND;
745 m_freem(top);
746 return (0);
747 }
748 rep->r_flags &= ~R_MUSTRESEND;
749 soflags = rep->r_nmp->nm_soflags;
55e303ae 750 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
1c79356b
A
751 if (rp == rep)
752 break;
753 if (rp)
754 xidqueued = rp->r_xid;
755 } else
756 soflags = so->so_proto->pr_flags;
757 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED) ||
758 (nam == 0))
759 sendnam = (struct sockaddr *)0;
760 else
761 sendnam = mtod(nam, struct sockaddr *);
762
763 if (so->so_type == SOCK_SEQPACKET)
764 flags = MSG_EOR;
765 else
766 flags = 0;
767
768#if NFSDIAG
769 if (rep)
770 nfsdup(rep);
771#endif
772 /*
773 * Save the name here in case mount point goes away when we switch
774 * funnels. The name is using local stack and is large, but don't
775 * want to block if we malloc.
776 */
777 if (rep)
778 strncpy(savenametolog,
779 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname,
780 MNAMELEN);
781 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
782 error = sosend(so, sendnam, (struct uio *)0, top,
783 (struct mbuf *)0, flags);
784 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
785
786 if (error) {
787 if (rep) {
788 if (xidqueued) {
55e303ae 789 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
1c79356b
A
790 if (rp == rep && rp->r_xid == xidqueued)
791 break;
792 if (!rp)
793 panic("nfs_send: error %d xid %x gone",
794 error, xidqueued);
795 }
796 log(LOG_INFO, "nfs send error %d for server %s\n",
797 error, savenametolog);
798 /*
799 * Deal with errors for the client side.
800 */
55e303ae
A
801 error2 = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
802 if (error2) {
803 error = error2;
804 } else {
1c79356b
A
805 rep->r_flags |= R_MUSTRESEND;
806 NFS_DPF(DUP,
807 ("nfs_send RESEND error=%d\n", error));
808 }
809 } else
810 log(LOG_INFO, "nfsd send error %d\n", error);
811
812 /*
813 * Handle any recoverable (soft) socket errors here. (???)
814 */
55e303ae
A
815 if (error != EINTR && error != ERESTART && error != EIO &&
816 error != EWOULDBLOCK && error != EPIPE) {
1c79356b 817 error = 0;
55e303ae 818 }
1c79356b
A
819 }
820 return (error);
821}
822
823/*
824 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
825 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
826 * Mark and consolidate the data into a new mbuf list.
827 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
828 * small mbufs.
829 * For SOCK_STREAM we must be very careful to read an entire record once
830 * we have read any of it, even if the system call has been interrupted.
831 */
832static int
833nfs_receive(rep, aname, mp)
834 register struct nfsreq *rep;
835 struct mbuf **aname;
836 struct mbuf **mp;
837{
838 register struct socket *so;
839 struct uio auio;
840 struct iovec aio;
841 register struct mbuf *m;
842 struct mbuf *control;
843 u_long len;
844 struct sockaddr **getnam;
845 struct sockaddr *tmp_nam;
846 struct mbuf *mhck;
847 struct sockaddr_in *sin;
55e303ae 848 int error, error2, sotype, rcvflg;
1c79356b
A
849 struct proc *p = current_proc(); /* XXX */
850
851 /*
852 * Set up arguments for soreceive()
853 */
854 *mp = (struct mbuf *)0;
855 *aname = (struct mbuf *)0;
856 sotype = rep->r_nmp->nm_sotype;
857
858 /*
859 * For reliable protocols, lock against other senders/receivers
860 * in case a reconnect is necessary.
861 * For SOCK_STREAM, first get the Record Mark to find out how much
862 * more there is to get.
863 * We must lock the socket against other receivers
864 * until we have an entire rpc request/reply.
865 */
866 if (sotype != SOCK_DGRAM) {
55e303ae 867 error = nfs_sndlock(rep);
1c79356b
A
868 if (error)
869 return (error);
870tryagain:
871 /*
872 * Check for fatal errors and resending request.
873 */
874 /*
875 * Ugh: If a reconnect attempt just happened, nm_so
876 * would have changed. NULL indicates a failed
877 * attempt that has essentially shut down this
878 * mount point.
879 */
55e303ae
A
880 if ((error = nfs_sigintr(rep->r_nmp, rep, p)) || rep->r_mrep) {
881 nfs_sndunlock(rep);
882 if (error)
883 return (error);
1c79356b
A
884 return (EINTR);
885 }
886 so = rep->r_nmp->nm_so;
887 if (!so) {
888 error = nfs_reconnect(rep);
889 if (error) {
55e303ae 890 nfs_sndunlock(rep);
1c79356b
A
891 return (error);
892 }
893 goto tryagain;
894 }
895 while (rep->r_flags & R_MUSTRESEND) {
896 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
897 nfsstats.rpcretries++;
898 NFS_DPF(DUP,
899 ("nfs_receive RESEND %s\n",
900 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname));
901 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
902 /*
903 * we also hold rcv lock so rep is still
904 * legit this point
905 */
906 if (error) {
907 if (error == EINTR || error == ERESTART ||
908 (error = nfs_reconnect(rep))) {
55e303ae 909 nfs_sndunlock(rep);
1c79356b
A
910 return (error);
911 }
912 goto tryagain;
913 }
914 }
55e303ae 915 nfs_sndunlock(rep);
1c79356b
A
916 if (sotype == SOCK_STREAM) {
917 aio.iov_base = (caddr_t) &len;
918 aio.iov_len = sizeof(u_long);
919 auio.uio_iov = &aio;
920 auio.uio_iovcnt = 1;
921 auio.uio_segflg = UIO_SYSSPACE;
922 auio.uio_rw = UIO_READ;
923 auio.uio_offset = 0;
924 auio.uio_resid = sizeof(u_long);
925 auio.uio_procp = p;
926 do {
927 rcvflg = MSG_WAITALL;
928 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
929 error = soreceive(so, (struct sockaddr **)0, &auio,
930 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
55e303ae 931 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1c79356b
A
932 if (!rep->r_nmp) /* if unmounted then bailout */
933 goto shutout;
934 if (error == EWOULDBLOCK && rep) {
55e303ae
A
935 error2 = nfs_sigintr(rep->r_nmp, rep, p);
936 if (error2)
937 error = error2;
1c79356b
A
938 }
939 } while (error == EWOULDBLOCK);
940 if (!error && auio.uio_resid > 0) {
941 log(LOG_INFO,
942 "short receive (%d/%d) from nfs server %s\n",
943 sizeof(u_long) - auio.uio_resid,
944 sizeof(u_long),
945 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
946 error = EPIPE;
947 }
948 if (error)
949 goto errout;
950 len = ntohl(len) & ~0x80000000;
951 /*
952 * This is SERIOUS! We are out of sync with the sender
953 * and forcing a disconnect/reconnect is all I can do.
954 */
955 if (len > NFS_MAXPACKET) {
956 log(LOG_ERR, "%s (%d) from nfs server %s\n",
957 "impossible packet length",
958 len,
959 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
960 error = EFBIG;
961 goto errout;
962 }
963 auio.uio_resid = len;
964
965 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
966 do {
967 rcvflg = MSG_WAITALL;
968 error = soreceive(so, (struct sockaddr **)0,
969 &auio, mp, (struct mbuf **)0, &rcvflg);
970 if (!rep->r_nmp) /* if unmounted then bailout */ {
971 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
972 goto shutout;
973 }
974 } while (error == EWOULDBLOCK || error == EINTR ||
975 error == ERESTART);
976
977 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
978
979 if (!error && auio.uio_resid > 0) {
980 log(LOG_INFO,
981 "short receive (%d/%d) from nfs server %s\n",
982 len - auio.uio_resid, len,
983 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
984 error = EPIPE;
985 }
986 } else {
987 /*
988 * NB: Since uio_resid is big, MSG_WAITALL is ignored
989 * and soreceive() will return when it has either a
990 * control msg or a data msg.
991 * We have no use for control msg., but must grab them
992 * and then throw them away so we know what is going
993 * on.
994 */
995 auio.uio_resid = len = 100000000; /* Anything Big */
996 auio.uio_procp = p;
997
998 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
999 do {
e5568f75 1000 control = NULL;
1c79356b
A
1001 rcvflg = 0;
1002 error = soreceive(so, (struct sockaddr **)0,
1003 &auio, mp, &control, &rcvflg);
55e303ae
A
1004 if (control)
1005 m_freem(control);
1c79356b
A
1006 if (!rep->r_nmp) /* if unmounted then bailout */ {
1007 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1008 goto shutout;
1009 }
1c79356b 1010 if (error == EWOULDBLOCK && rep) {
55e303ae
A
1011 error2 = nfs_sigintr(rep->r_nmp, rep, p);
1012 if (error2) {
1013 thread_funnel_switch(NETWORK_FUNNEL,
1014 KERNEL_FUNNEL);
1015 return (error2);
1c79356b
A
1016 }
1017 }
1018 } while (error == EWOULDBLOCK ||
1019 (!error && *mp == NULL && control));
1020
1021 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1022
1023 if ((rcvflg & MSG_EOR) == 0)
1024 printf("Egad!!\n");
1025 if (!error && *mp == NULL)
1026 error = EPIPE;
1027 len -= auio.uio_resid;
1028 }
1029errout:
1030 if (error && error != EINTR && error != ERESTART) {
1031 m_freem(*mp);
1032 *mp = (struct mbuf *)0;
1033 if (error != EPIPE)
1034 log(LOG_INFO,
1035 "receive error %d from nfs server %s\n",
1036 error,
1037 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
55e303ae 1038 error = nfs_sndlock(rep);
1c79356b
A
1039 if (!error)
1040 error = nfs_reconnect(rep);
1041 if (!error)
1042 goto tryagain;
1043 }
1044 } else {
55e303ae
A
1045 /*
1046 * We could have failed while rebinding the datagram socket
1047 * so we need to attempt to rebind here.
1048 */
1049 if ((so = rep->r_nmp->nm_so) == NULL) {
1050 error = nfs_sndlock(rep);
1051 if (!error) {
1052 error = nfs_reconnect(rep);
1053 nfs_sndunlock(rep);
1054 }
1055 if (error)
1056 return (error);
1057 if (!rep->r_nmp) /* if unmounted then bailout */
1058 return (ENXIO);
1059 so = rep->r_nmp->nm_so;
1060 }
1c79356b
A
1061 if (so->so_state & SS_ISCONNECTED)
1062 getnam = (struct sockaddr **)0;
1063 else
1064 getnam = &tmp_nam;;
1065 auio.uio_resid = len = 1000000;
1066 auio.uio_procp = p;
1067
1068 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1069 do {
1070 rcvflg = 0;
1071 error = soreceive(so, getnam, &auio, mp,
1072 (struct mbuf **)0, &rcvflg);
1073
1074 if ((getnam) && (*getnam)) {
1075 MGET(mhck, M_WAIT, MT_SONAME);
1076 mhck->m_len = (*getnam)->sa_len;
1077 sin = mtod(mhck, struct sockaddr_in *);
1078 bcopy(*getnam, sin, sizeof(struct sockaddr_in));
1079 mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
1080 FREE(*getnam, M_SONAME);
1081 *aname = mhck;
1082 }
55e303ae
A
1083 if (!rep->r_nmp) /* if unmounted then bailout */
1084 goto dgramout;
1085 if (error) {
1086 error2 = nfs_sigintr(rep->r_nmp, rep, p);
1087 if (error2) {
1088 error = error2;
1089 goto dgramout;
1090 }
1091 }
1092 /* Reconnect for all errors. We may be receiving
1093 * soft/hard/blocking errors because of a network
1094 * change.
1095 * XXX: we should rate limit or delay this
1096 * to once every N attempts or something.
1097 * although TCP doesn't seem to.
1098 */
1099 if (error) {
1100 thread_funnel_switch(NETWORK_FUNNEL,
1101 KERNEL_FUNNEL);
1102 error2 = nfs_sndlock(rep);
1103 if (!error2) {
1104 error2 = nfs_reconnect(rep);
1105 if (error2)
1106 error = error2;
1107 else if (!rep->r_nmp) /* if unmounted then bailout */
1108 error = ENXIO;
1109 else
1110 so = rep->r_nmp->nm_so;
1111 nfs_sndunlock(rep);
1112 } else {
1113 error = error2;
1114 }
1115 thread_funnel_switch(KERNEL_FUNNEL,
1116 NETWORK_FUNNEL);
1c79356b
A
1117 }
1118 } while (error == EWOULDBLOCK);
1119
55e303ae 1120dgramout:
1c79356b
A
1121 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1122 len -= auio.uio_resid;
1123 }
1124shutout:
1125 if (error) {
1126 m_freem(*mp);
1127 *mp = (struct mbuf *)0;
1128 }
1129 return (error);
1130}
1131
1132/*
1133 * Implement receipt of reply on a socket.
1134 * We must search through the list of received datagrams matching them
1135 * with outstanding requests using the xid, until ours is found.
1136 */
1137/* ARGSUSED */
1138int
1139nfs_reply(myrep)
1140 struct nfsreq *myrep;
1141{
1142 register struct nfsreq *rep;
1143 register struct nfsmount *nmp = myrep->r_nmp;
1144 register long t1;
1145 struct mbuf *mrep, *md;
1146 struct mbuf *nam;
1147 u_long rxid, *tl;
1148 caddr_t dpos, cp2;
1149 int error;
1150
1151 /*
1152 * Loop around until we get our own reply
1153 */
1154 for (;;) {
1155 /*
1156 * Lock against other receivers so that I don't get stuck in
1157 * sbwait() after someone else has received my reply for me.
1158 * Also necessary for connection based protocols to avoid
1159 * race conditions during a reconnect.
1160 * If nfs_rcvlock() returns EALREADY, that means that
1161 * the reply has already been recieved by another
1162 * process and we can return immediately. In this
1163 * case, the lock is not taken to avoid races with
1164 * other processes.
1165 */
1166 error = nfs_rcvlock(myrep);
1167 if (error == EALREADY)
1168 return (0);
1169 if (error)
1170 return (error);
0b4e3aa0 1171
1c79356b
A
1172 /*
1173 * If we slept after putting bits otw, then reply may have
1174 * arrived. In which case returning is required, or we
1175 * would hang trying to nfs_receive an already received reply.
1176 */
1177 if (myrep->r_mrep != NULL) {
55e303ae 1178 nfs_rcvunlock(myrep);
fa4905b1 1179 FSDBG(530, myrep->r_xid, myrep, myrep->r_nmp, -1);
1c79356b
A
1180 return (0);
1181 }
1182 /*
0b4e3aa0 1183 * Get the next Rpc reply off the socket. Assume myrep->r_nmp
fa4905b1 1184 * is still intact by checks done in nfs_rcvlock.
1c79356b 1185 */
e5568f75 1186 /* XXX why do we ask for nam here? we don't use it! */
1c79356b 1187 error = nfs_receive(myrep, &nam, &mrep);
55e303ae
A
1188 if (nam)
1189 m_freem(nam);
1c79356b 1190 /*
0b4e3aa0 1191 * Bailout asap if nfsmount struct gone (unmounted).
1c79356b 1192 */
9bccf70c 1193 if (!myrep->r_nmp || !nmp->nm_so) {
fa4905b1 1194 FSDBG(530, myrep->r_xid, myrep, nmp, -2);
55e303ae 1195 return (ENXIO);
1c79356b
A
1196 }
1197 if (error) {
fa4905b1 1198 FSDBG(530, myrep->r_xid, myrep, nmp, error);
55e303ae 1199 nfs_rcvunlock(myrep);
1c79356b 1200
d12e1678
A
1201 /* Bailout asap if nfsmount struct gone (unmounted). */
1202 if (!myrep->r_nmp || !nmp->nm_so)
55e303ae 1203 return (ENXIO);
d12e1678 1204
1c79356b
A
1205 /*
1206 * Ignore routing errors on connectionless protocols??
1207 */
1208 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
1209 nmp->nm_so->so_error = 0;
1210 if (myrep->r_flags & R_GETONEREP)
1211 return (0);
1212 continue;
1213 }
1214 return (error);
1215 }
1c79356b
A
1216
1217 /*
1218 * We assume all is fine, but if we did not have an error
1219 * and mrep is 0, better not dereference it. nfs_receieve
1220 * calls soreceive which carefully sets error=0 when it got
1221 * errors on sbwait (tsleep). In most cases, I assume that's
1222 * so we could go back again. In tcp case, EPIPE is returned.
1223 * In udp, case nfs_receive gets back here with no error and no
1224 * mrep. Is the right fix to have soreceive check for process
1225 * aborted after sbwait and return something non-zero? Should
1226 * nfs_receive give an EPIPE? Too risky to play with those
1227 * two this late in game for a shutdown problem. Instead,
1228 * just check here and get out. (ekn)
1229 */
1230 if (!mrep) {
fa4905b1 1231 FSDBG(530, myrep->r_xid, myrep, nmp, -3);
55e303ae 1232 return (ENXIO); /* sounds good */
1c79356b
A
1233 }
1234
1235 /*
1236 * Get the xid and check that it is an rpc reply
1237 */
1238 md = mrep;
1239 dpos = mtod(md, caddr_t);
1240 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
1241 rxid = *tl++;
1242 if (*tl != rpc_reply) {
1243#ifndef NFS_NOSERVER
1244 if (nmp->nm_flag & NFSMNT_NQNFS) {
1245 if (nqnfs_callback(nmp, mrep, md, dpos))
1246 nfsstats.rpcinvalid++;
1247 } else {
1248 nfsstats.rpcinvalid++;
1249 m_freem(mrep);
1250 }
1251#else
1252 nfsstats.rpcinvalid++;
1253 m_freem(mrep);
1254#endif
1255nfsmout:
55e303ae
A
1256 if (nmp->nm_state & NFSSTA_RCVLOCK)
1257 nfs_rcvunlock(myrep);
1c79356b
A
1258 if (myrep->r_flags & R_GETONEREP)
1259 return (0); /* this path used by NQNFS */
1260 continue;
1261 }
1262
1263 /*
1264 * Loop through the request list to match up the reply
1265 * Iff no match, just drop the datagram
1266 */
55e303ae 1267 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
1c79356b
A
1268 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
1269 /* Found it.. */
1270 rep->r_mrep = mrep;
1271 rep->r_md = md;
1272 rep->r_dpos = dpos;
55e303ae
A
1273 /*
1274 * If we're tracking the round trip time
1275 * then we update the circular log here
1276 * with the stats from our current request.
1277 */
1c79356b
A
1278 if (nfsrtton) {
1279 struct rttl *rt;
1280
1281 rt = &nfsrtt.rttl[nfsrtt.pos];
1282 rt->proc = rep->r_procnum;
1283 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
1284 rt->sent = nmp->nm_sent;
1285 rt->cwnd = nmp->nm_cwnd;
1286 if (proct[rep->r_procnum] == 0)
1287 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1288 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
1289 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
1290 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
55e303ae 1291 microtime(&rt->tstamp); // XXX unused
1c79356b
A
1292 if (rep->r_flags & R_TIMING)
1293 rt->rtt = rep->r_rtt;
1294 else
1295 rt->rtt = 1000000;
1296 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
1297 }
1298 /*
1299 * Update congestion window.
1300 * Do the additive increase of
1301 * one rpc/rtt.
1302 */
fa4905b1
A
1303 FSDBG(530, rep->r_xid, rep, nmp->nm_sent,
1304 nmp->nm_cwnd);
1c79356b
A
1305 if (nmp->nm_cwnd <= nmp->nm_sent) {
1306 nmp->nm_cwnd +=
1307 (NFS_CWNDSCALE * NFS_CWNDSCALE +
1308 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
1309 if (nmp->nm_cwnd > NFS_MAXCWND)
1310 nmp->nm_cwnd = NFS_MAXCWND;
1311 }
55e303ae
A
1312 if (rep->r_flags & R_SENT) {
1313 rep->r_flags &= ~R_SENT;
1314 nmp->nm_sent -= NFS_CWNDSCALE;
1315 }
1c79356b
A
1316 /*
1317 * Update rtt using a gain of 0.125 on the mean
1318 * and a gain of 0.25 on the deviation.
1319 */
1320 if (rep->r_flags & R_TIMING) {
1321 /*
1322 * Since the timer resolution of
1323 * NFS_HZ is so course, it can often
1324 * result in r_rtt == 0. Since
1325 * r_rtt == N means that the actual
1326 * rtt is between N+dt and N+2-dt ticks,
1327 * add 1.
1328 */
1329 if (proct[rep->r_procnum] == 0)
1330 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1331 t1 = rep->r_rtt + 1;
1332 t1 -= (NFS_SRTT(rep) >> 3);
1333 NFS_SRTT(rep) += t1;
1334 if (t1 < 0)
1335 t1 = -t1;
1336 t1 -= (NFS_SDRTT(rep) >> 2);
1337 NFS_SDRTT(rep) += t1;
1338 }
1339 nmp->nm_timeouts = 0;
1340 break;
1341 }
1342 }
55e303ae 1343 nfs_rcvunlock(myrep);
1c79356b
A
1344 /*
1345 * If not matched to a request, drop it.
1346 * If it's mine, get out.
1347 */
1348 if (rep == 0) {
1349 nfsstats.rpcunexpected++;
1350 m_freem(mrep);
1351 } else if (rep == myrep) {
1352 if (rep->r_mrep == NULL)
1353 panic("nfs_reply: nil r_mrep");
1354 return (0);
1355 }
fa4905b1
A
1356 FSDBG(530, myrep->r_xid, myrep, rep,
1357 rep ? rep->r_xid : myrep->r_flags);
1c79356b
A
1358 if (myrep->r_flags & R_GETONEREP)
1359 return (0); /* this path used by NQNFS */
1360 }
1361}
1362
1363/*
1364 * nfs_request - goes something like this
1365 * - fill in request struct
1366 * - links it into list
1367 * - calls nfs_send() for first transmit
1368 * - calls nfs_receive() to get reply
1369 * - break down rpc header and return with nfs reply pointed to
1370 * by mrep or error
1371 * nb: always frees up mreq mbuf list
1372 */
1373int
fa4905b1 1374nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp)
1c79356b
A
1375 struct vnode *vp;
1376 struct mbuf *mrest;
1377 int procnum;
1378 struct proc *procp;
1379 struct ucred *cred;
1380 struct mbuf **mrp;
1381 struct mbuf **mdp;
1382 caddr_t *dposp;
fa4905b1 1383 u_int64_t *xidp;
1c79356b 1384{
55e303ae 1385 register struct mbuf *m, *mrep, *m2;
1c79356b
A
1386 register struct nfsreq *rep, *rp;
1387 register u_long *tl;
1388 register int i;
1389 struct nfsmount *nmp;
1390 struct mbuf *md, *mheadend;
1391 struct nfsnode *np;
1392 char nickv[RPCX_NICKVERF];
1393 time_t reqtime, waituntil;
1394 caddr_t dpos, cp2;
1395 int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
1396 int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
1397 int verf_len, verf_type;
1398 u_long xid;
1399 u_quad_t frev;
1400 char *auth_str, *verf_str;
1401 NFSKERBKEY_T key; /* save session key */
55e303ae
A
1402 int nmsotype;
1403 struct timeval now;
1c79356b 1404
e5568f75
A
1405 if (mrp)
1406 *mrp = NULL;
fa4905b1
A
1407 if (xidp)
1408 *xidp = 0;
55e303ae 1409
1c79356b
A
1410 MALLOC_ZONE(rep, struct nfsreq *,
1411 sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
1c79356b 1412
55e303ae
A
1413 nmp = VFSTONFS(vp->v_mount);
1414 if (nmp == NULL ||
1415 (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1416 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
1417 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1418 return (ENXIO);
1c79356b 1419 }
55e303ae
A
1420 nmsotype = nmp->nm_sotype;
1421
1422 FSDBG_TOP(531, vp, procnum, nmp, rep);
1423
1c79356b
A
1424 rep->r_nmp = nmp;
1425 rep->r_vp = vp;
1426 rep->r_procp = procp;
1427 rep->r_procnum = procnum;
55e303ae
A
1428 microuptime(&now);
1429 rep->r_lastmsg = now.tv_sec -
1430 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
1c79356b
A
1431 i = 0;
1432 m = mrest;
1433 while (m) {
1434 i += m->m_len;
1435 m = m->m_next;
1436 }
1437 mrest_len = i;
1438
1439 /*
1440 * Get the RPC header with authorization.
1441 */
1442kerbauth:
55e303ae
A
1443 nmp = VFSTONFS(vp->v_mount);
1444 if (!nmp) {
1445 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1446 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1447 return (ENXIO);
1448 }
1c79356b
A
1449 verf_str = auth_str = (char *)0;
1450 if (nmp->nm_flag & NFSMNT_KERB) {
1451 verf_str = nickv;
1452 verf_len = sizeof (nickv);
1453 auth_type = RPCAUTH_KERB4;
1454 bzero((caddr_t)key, sizeof (key));
1455 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
1456 &auth_len, verf_str, verf_len)) {
55e303ae
A
1457 nmp = VFSTONFS(vp->v_mount);
1458 if (!nmp) {
1459 FSDBG_BOT(531, 2, vp, error, rep);
1460 FREE_ZONE((caddr_t)rep,
1461 sizeof (struct nfsreq), M_NFSREQ);
1462 m_freem(mrest);
1463 return (ENXIO);
1464 }
1c79356b
A
1465 error = nfs_getauth(nmp, rep, cred, &auth_str,
1466 &auth_len, verf_str, &verf_len, key);
55e303ae
A
1467 nmp = VFSTONFS(vp->v_mount);
1468 if (!error && !nmp)
1469 error = ENXIO;
1c79356b 1470 if (error) {
fa4905b1 1471 FSDBG_BOT(531, 2, vp, error, rep);
55e303ae 1472 FREE_ZONE((caddr_t)rep,
1c79356b
A
1473 sizeof (struct nfsreq), M_NFSREQ);
1474 m_freem(mrest);
1475 return (error);
1476 }
1477 }
1478 } else {
1479 auth_type = RPCAUTH_UNIX;
1480 if (cred->cr_ngroups < 1)
1481 panic("nfsreq nogrps");
1482 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1483 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
1484 5 * NFSX_UNSIGNED;
1485 }
1486 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
1487 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
fa4905b1 1488 if (xidp)
9bccf70c 1489 *xidp = ntohl(xid) + ((u_int64_t)nfs_xidwrap << 32);
1c79356b
A
1490 if (auth_str)
1491 _FREE(auth_str, M_TEMP);
1492
1493 /*
1494 * For stream protocols, insert a Sun RPC Record Mark.
1495 */
55e303ae 1496 if (nmsotype == SOCK_STREAM) {
1c79356b
A
1497 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
1498 *mtod(m, u_long *) = htonl(0x80000000 |
1499 (m->m_pkthdr.len - NFSX_UNSIGNED));
1500 }
1501 rep->r_mreq = m;
1502 rep->r_xid = xid;
1503tryagain:
55e303ae
A
1504 nmp = VFSTONFS(vp->v_mount);
1505 if (nmp && (nmp->nm_flag & NFSMNT_SOFT))
1c79356b
A
1506 rep->r_retry = nmp->nm_retry;
1507 else
1508 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1509 rep->r_rtt = rep->r_rexmit = 0;
1510 if (proct[procnum] > 0)
1511 rep->r_flags = R_TIMING;
1512 else
1513 rep->r_flags = 0;
1514 rep->r_mrep = NULL;
1515
1516 /*
1517 * Do the client side RPC.
1518 */
1519 nfsstats.rpcrequests++;
1520 /*
1521 * Chain request into list of outstanding requests. Be sure
1522 * to put it LAST so timer finds oldest requests first.
1523 */
1524 s = splsoftclock();
1525 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
1526
1527 /* Get send time for nqnfs */
55e303ae
A
1528 microtime(&now);
1529 reqtime = now.tv_sec;
1c79356b
A
1530
1531 /*
1532 * If backing off another request or avoiding congestion, don't
1533 * send this one now but let timer do it. If not timing a request,
1534 * do it now.
1535 */
55e303ae 1536 if (nmp && nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
1c79356b
A
1537 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1538 nmp->nm_sent < nmp->nm_cwnd)) {
55e303ae
A
1539 int connrequired = (nmp->nm_soflags & PR_CONNREQUIRED);
1540
1c79356b 1541 splx(s);
55e303ae
A
1542 if (connrequired)
1543 error = nfs_sndlock(rep);
1c79356b
A
1544
1545 /*
1546 * Set the R_SENT before doing the send in case another thread
1547 * processes the reply before the nfs_send returns here
1548 */
1549 if (!error) {
1550 if ((rep->r_flags & R_MUSTRESEND) == 0) {
fa4905b1
A
1551 FSDBG(531, rep->r_xid, rep, nmp->nm_sent,
1552 nmp->nm_cwnd);
1c79356b
A
1553 nmp->nm_sent += NFS_CWNDSCALE;
1554 rep->r_flags |= R_SENT;
1555 }
1556
55e303ae
A
1557 m2 = m_copym(m, 0, M_COPYALL, M_WAIT);
1558 error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
1559 if (connrequired)
1560 nfs_sndunlock(rep);
1c79356b 1561 }
55e303ae 1562 nmp = VFSTONFS(vp->v_mount);
1c79356b 1563 if (error) {
55e303ae
A
1564 if (nmp)
1565 nmp->nm_sent -= NFS_CWNDSCALE;
1c79356b
A
1566 rep->r_flags &= ~R_SENT;
1567 }
1568 } else {
1569 splx(s);
1570 rep->r_rtt = -1;
1571 }
1572
1573 /*
1574 * Wait for the reply from our send or the timer's.
1575 */
1576 if (!error || error == EPIPE)
1577 error = nfs_reply(rep);
1578
1579 /*
1580 * RPC done, unlink the request.
1581 */
55e303ae
A
1582 nfs_repdequeue(rep);
1583
1584 nmp = VFSTONFS(vp->v_mount);
1c79356b
A
1585
1586 /*
1587 * Decrement the outstanding request count.
1588 */
1589 if (rep->r_flags & R_SENT) {
1c79356b 1590 rep->r_flags &= ~R_SENT; /* paranoia */
55e303ae
A
1591 if (nmp) {
1592 FSDBG(531, rep->r_xid, rep, nmp->nm_sent, nmp->nm_cwnd);
1593 nmp->nm_sent -= NFS_CWNDSCALE;
1594 }
1c79356b
A
1595 }
1596
1597 /*
1598 * If there was a successful reply and a tprintf msg.
1599 * tprintf a response.
1600 */
e5568f75
A
1601 if (!error)
1602 nfs_up(rep, nmp, procp, "is alive again", NFSSTA_TIMEO);
1c79356b
A
1603 mrep = rep->r_mrep;
1604 md = rep->r_md;
1605 dpos = rep->r_dpos;
55e303ae
A
1606 if (!error && !nmp)
1607 error = ENXIO;
1c79356b
A
1608 if (error) {
1609 m_freem(rep->r_mreq);
fa4905b1 1610 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
55e303ae 1611 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1c79356b
A
1612 return (error);
1613 }
1614
1615 /*
1616 * break down the rpc header and check if ok
1617 */
1618 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
1619 if (*tl++ == rpc_msgdenied) {
1620 if (*tl == rpc_mismatch)
1621 error = EOPNOTSUPP;
1622 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1623 if (!failed_auth) {
1624 failed_auth++;
1625 mheadend->m_next = (struct mbuf *)0;
1626 m_freem(mrep);
1627 m_freem(rep->r_mreq);
1628 goto kerbauth;
1629 } else
1630 error = EAUTH;
1631 } else
1632 error = EACCES;
1633 m_freem(mrep);
1634 m_freem(rep->r_mreq);
fa4905b1 1635 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
55e303ae 1636 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1c79356b
A
1637 return (error);
1638 }
1639
1640 /*
1641 * Grab any Kerberos verifier, otherwise just throw it away.
1642 */
1643 verf_type = fxdr_unsigned(int, *tl++);
1644 i = fxdr_unsigned(int, *tl);
1645 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1646 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1647 if (error)
1648 goto nfsmout;
1649 } else if (i > 0)
1650 nfsm_adv(nfsm_rndup(i));
1651 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1652 /* 0 == ok */
1653 if (*tl == 0) {
1654 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1655 if (*tl != 0) {
1656 error = fxdr_unsigned(int, *tl);
1657 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1658 error == NFSERR_TRYLATER) {
1659 m_freem(mrep);
1660 error = 0;
55e303ae
A
1661 microuptime(&now);
1662 waituntil = now.tv_sec + trylater_delay;
1c79356b
A
1663 NFS_DPF(DUP,
1664 ("nfs_request %s flag=%x trylater_cnt=%x waituntil=%lx trylater_delay=%x\n",
1665 nmp->nm_mountp->mnt_stat.f_mntfromname,
1666 nmp->nm_flag, trylater_cnt, waituntil,
1667 trylater_delay));
55e303ae 1668 while (now.tv_sec < waituntil) {
1c79356b
A
1669 (void)tsleep((caddr_t)&lbolt,
1670 PSOCK, "nqnfstry", 0);
55e303ae
A
1671 microuptime(&now);
1672 }
1673 trylater_delay *= 2;
1674 if (trylater_delay > 60)
1675 trylater_delay = 60;
1c79356b
A
1676 if (trylater_cnt < 7)
1677 trylater_cnt++;
1678 goto tryagain;
1679 }
1680
1681 /*
1682 * If the File Handle was stale, invalidate the
1683 * lookup cache, just in case.
1684 */
1685 if (error == ESTALE)
1686 cache_purge(vp);
1687 if (nmp->nm_flag & NFSMNT_NFSV3) {
1688 *mrp = mrep;
1689 *mdp = md;
1690 *dposp = dpos;
1691 error |= NFSERR_RETERR;
e5568f75 1692 } else {
1c79356b 1693 m_freem(mrep);
e5568f75
A
1694 error &= ~NFSERR_RETERR;
1695 }
1c79356b 1696 m_freem(rep->r_mreq);
fa4905b1 1697 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
55e303ae 1698 FREE_ZONE((caddr_t)rep,
1c79356b
A
1699 sizeof (struct nfsreq), M_NFSREQ);
1700 return (error);
1701 }
1702
1703 /*
1704 * For nqnfs, get any lease in reply
1705 */
1706 if (nmp->nm_flag & NFSMNT_NQNFS) {
1707 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1708 if (*tl) {
1709 np = VTONFS(vp);
1710 nqlflag = fxdr_unsigned(int, *tl);
1711 nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
1712 cachable = fxdr_unsigned(int, *tl++);
1713 reqtime += fxdr_unsigned(int, *tl++);
55e303ae
A
1714 microtime(&now);
1715 if (reqtime > now.tv_sec) {
1c79356b
A
1716 fxdr_hyper(tl, &frev);
1717 nqnfs_clientlease(nmp, np, nqlflag,
1718 cachable, reqtime, frev);
1719 }
1720 }
1721 }
1722 *mrp = mrep;
1723 *mdp = md;
1724 *dposp = dpos;
1725 m_freem(rep->r_mreq);
fa4905b1 1726 FSDBG_BOT(531, 0xf0f0f0f0, rep->r_xid, nmp, rep);
1c79356b
A
1727 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1728 return (0);
1729 }
1730 m_freem(mrep);
1731 error = EPROTONOSUPPORT;
1732nfsmout:
1733 m_freem(rep->r_mreq);
fa4905b1 1734 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
55e303ae 1735 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1c79356b
A
1736 return (error);
1737}
1738
1739#ifndef NFS_NOSERVER
1740/*
1741 * Generate the rpc reply header
1742 * siz arg. is used to decide if adding a cluster is worthwhile
1743 */
1744int
1745nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
1746 int siz;
1747 struct nfsrv_descript *nd;
1748 struct nfssvc_sock *slp;
1749 int err;
1750 int cache;
1751 u_quad_t *frev;
1752 struct mbuf **mrq;
1753 struct mbuf **mbp;
1754 caddr_t *bposp;
1755{
1756 register u_long *tl;
1757 register struct mbuf *mreq;
1758 caddr_t bpos;
1759 struct mbuf *mb, *mb2;
1760
1761 MGETHDR(mreq, M_WAIT, MT_DATA);
1762 mb = mreq;
1763 /*
1764 * If this is a big reply, use a cluster else
1765 * try and leave leading space for the lower level headers.
1766 */
1767 siz += RPC_REPLYSIZ;
1768 if (siz >= MINCLSIZE) {
1769 MCLGET(mreq, M_WAIT);
1770 } else
1771 mreq->m_data += max_hdr;
1772 tl = mtod(mreq, u_long *);
1773 mreq->m_len = 6 * NFSX_UNSIGNED;
1774 bpos = ((caddr_t)tl) + mreq->m_len;
1775 *tl++ = txdr_unsigned(nd->nd_retxid);
1776 *tl++ = rpc_reply;
1777 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1778 *tl++ = rpc_msgdenied;
1779 if (err & NFSERR_AUTHERR) {
1780 *tl++ = rpc_autherr;
1781 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1782 mreq->m_len -= NFSX_UNSIGNED;
1783 bpos -= NFSX_UNSIGNED;
1784 } else {
1785 *tl++ = rpc_mismatch;
1786 *tl++ = txdr_unsigned(RPC_VER2);
1787 *tl = txdr_unsigned(RPC_VER2);
1788 }
1789 } else {
1790 *tl++ = rpc_msgaccepted;
1791
1792 /*
1793 * For Kerberos authentication, we must send the nickname
1794 * verifier back, otherwise just RPCAUTH_NULL.
1795 */
1796 if (nd->nd_flag & ND_KERBFULL) {
1797 register struct nfsuid *nuidp;
1798 struct timeval ktvin, ktvout;
1799
1800 for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
1801 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1802 if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
1803 (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
1804 &nuidp->nu_haddr, nd->nd_nam2)))
1805 break;
1806 }
1807 if (nuidp) {
1808 ktvin.tv_sec =
1809 txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
1810 ktvin.tv_usec =
1811 txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1812
1813 /*
1814 * Encrypt the timestamp in ecb mode using the
1815 * session key.
1816 */
1817#if NFSKERB
1818 XXX
1819#endif
1820
1821 *tl++ = rpc_auth_kerb;
1822 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1823 *tl = ktvout.tv_sec;
1824 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1825 *tl++ = ktvout.tv_usec;
1826 *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
1827 } else {
1828 *tl++ = 0;
1829 *tl++ = 0;
1830 }
1831 } else {
1832 *tl++ = 0;
1833 *tl++ = 0;
1834 }
1835 switch (err) {
1836 case EPROGUNAVAIL:
1837 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1838 break;
1839 case EPROGMISMATCH:
1840 *tl = txdr_unsigned(RPC_PROGMISMATCH);
1841 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1842 if (nd->nd_flag & ND_NQNFS) {
1843 *tl++ = txdr_unsigned(3);
1844 *tl = txdr_unsigned(3);
1845 } else {
1846 *tl++ = txdr_unsigned(2);
1847 *tl = txdr_unsigned(3);
1848 }
1849 break;
1850 case EPROCUNAVAIL:
1851 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1852 break;
1853 case EBADRPC:
1854 *tl = txdr_unsigned(RPC_GARBAGE);
1855 break;
1856 default:
1857 *tl = 0;
1858 if (err != NFSERR_RETVOID) {
1859 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1860 if (err)
1861 *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1862 else
1863 *tl = 0;
1864 }
1865 break;
1866 };
1867 }
1868
1869 /*
1870 * For nqnfs, piggyback lease as requested.
1871 */
1872 if ((nd->nd_flag & ND_NQNFS) && err == 0) {
1873 if (nd->nd_flag & ND_LEASE) {
1874 nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
1875 *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
1876 *tl++ = txdr_unsigned(cache);
1877 *tl++ = txdr_unsigned(nd->nd_duration);
1878 txdr_hyper(frev, tl);
1879 } else {
1880 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1881 *tl = 0;
1882 }
1883 }
1884 if (mrq != NULL)
1885 *mrq = mreq;
1886 *mbp = mb;
1887 *bposp = bpos;
1888 if (err != 0 && err != NFSERR_RETVOID)
1889 nfsstats.srvrpc_errs++;
1890 return (0);
1891}
1892
1893
1894#endif /* NFS_NOSERVER */
1895
1896
1897/*
1898 * From FreeBSD 1.58, a Matt Dillon fix...
1899 * Flag a request as being about to terminate.
1900 * The nm_sent count is decremented now to avoid deadlocks when the process
1901 * in soreceive() hasn't yet managed to send its own request.
1902 */
1903static void
1904nfs_softterm(struct nfsreq *rep)
1905{
55e303ae 1906
1c79356b
A
1907 rep->r_flags |= R_SOFTTERM;
1908 if (rep->r_flags & R_SENT) {
fa4905b1
A
1909 FSDBG(532, rep->r_xid, rep, rep->r_nmp->nm_sent,
1910 rep->r_nmp->nm_cwnd);
1c79356b
A
1911 rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
1912 rep->r_flags &= ~R_SENT;
1913 }
1914}
1915
1916void
1917nfs_timer_funnel(arg)
1918 void * arg;
1919{
1920 (void) thread_funnel_set(kernel_flock, TRUE);
1921 nfs_timer(arg);
1922 (void) thread_funnel_set(kernel_flock, FALSE);
1923
1924}
1925
55e303ae
A
1926/*
1927 * Ensure rep isn't in use by the timer, then dequeue it.
1928 */
1929void
1930nfs_repdequeue(struct nfsreq *rep)
1931{
1932 int s;
1933
1934 while ((rep->r_flags & R_BUSY)) {
1935 rep->r_flags |= R_WAITING;
1936 tsleep(rep, PSOCK, "repdeq", 0);
1937 }
1938 s = splsoftclock();
1939 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
1940 splx(s);
1941}
1942
1943/*
1944 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
1945 * free()'d out from under it.
1946 */
1947void
1948nfs_repbusy(struct nfsreq *rep)
1949{
1950
1951 if ((rep->r_flags & R_BUSY))
1952 panic("rep locked");
1953 rep->r_flags |= R_BUSY;
1954}
1955
1956/*
1957 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
1958 */
1959struct nfsreq *
1960nfs_repnext(struct nfsreq *rep)
1961{
1962 struct nfsreq * nextrep;
1963
1964 if (rep == NULL)
1965 return (NULL);
1966 /*
1967 * We need to get and busy the next req before signalling the
1968 * current one, otherwise wakeup() may block us and we'll race to
1969 * grab the next req.
1970 */
1971 nextrep = TAILQ_NEXT(rep, r_chain);
1972 if (nextrep != NULL)
1973 nfs_repbusy(nextrep);
1974 /* unbusy and signal. */
1975 rep->r_flags &= ~R_BUSY;
1976 if ((rep->r_flags & R_WAITING)) {
1977 rep->r_flags &= ~R_WAITING;
1978 wakeup(rep);
1979 }
1980 return (nextrep);
1981}
1982
1c79356b
A
1983/*
1984 * Nfs timer routine
1985 * Scan the nfsreq list and retranmit any requests that have timed out
1986 * To avoid retransmission attempts on STREAM sockets (in the future) make
1987 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1988 */
1989void
1990nfs_timer(arg)
1991 void *arg; /* never used */
1992{
55e303ae 1993 register struct nfsreq *rep;
1c79356b
A
1994 register struct mbuf *m;
1995 register struct socket *so;
1996 register struct nfsmount *nmp;
1997 register int timeo;
1998 int s, error;
1999#ifndef NFS_NOSERVER
2000 static long lasttime = 0;
2001 register struct nfssvc_sock *slp;
2002 u_quad_t cur_usec;
2003#endif /* NFS_NOSERVER */
2004#if NFSDIAG
2005 int rttdiag;
2006#endif
2007 int flags, rexmit, cwnd, sent;
2008 u_long xid;
55e303ae 2009 struct timeval now;
1c79356b
A
2010
2011 s = splnet();
2012 /*
2013 * XXX If preemptable threads are implemented the spls used for the
2014 * outstanding request queue must be replaced with mutexes.
2015 */
1c79356b
A
2016#ifdef NFSTRACESUSPENDERS
2017 if (NFSTRACE_SUSPENDING) {
55e303ae 2018 TAILQ_FOREACH(rep, &nfs_reqq, r_chain)
1c79356b
A
2019 if (rep->r_xid == nfstracexid)
2020 break;
2021 if (!rep) {
2022 NFSTRACE_RESUME;
2023 } else if (NFSTRACE_SUSPENSEOVER) {
2024 NFSTRACE_SUSPEND;
2025 }
2026 }
2027#endif
55e303ae
A
2028 rep = TAILQ_FIRST(&nfs_reqq);
2029 if (rep != NULL)
2030 nfs_repbusy(rep);
2031 microuptime(&now);
2032 for ( ; rep != NULL ; rep = nfs_repnext(rep)) {
1c79356b
A
2033#ifdef NFSTRACESUSPENDERS
2034 if (rep->r_mrep && !NFSTRACE_SUSPENDING) {
2035 nfstracexid = rep->r_xid;
2036 NFSTRACE_STARTSUSPENDCOUNTDOWN;
2037 }
2038#endif
2039 nmp = rep->r_nmp;
2040 if (!nmp) /* unmounted */
2041 continue;
2042 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
2043 continue;
55e303ae 2044 if (nfs_sigintr(nmp, rep, rep->r_procp))
1c79356b 2045 continue;
55e303ae
A
2046 if (nmp->nm_tprintf_initial_delay != 0 &&
2047 (rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) &&
2048 rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
2049 rep->r_lastmsg = now.tv_sec;
e5568f75
A
2050 nfs_down(rep, rep->r_nmp, rep->r_procp, "not responding",
2051 0, NFSSTA_TIMEO);
4a249263
A
2052 if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
2053 /* we're not yet completely mounted and */
2054 /* we can't complete an RPC, so we fail */
2055 nfsstats.rpctimeouts++;
2056 nfs_softterm(rep);
2057 continue;
2058 }
1c79356b
A
2059 }
2060 if (rep->r_rtt >= 0) {
2061 rep->r_rtt++;
2062 if (nmp->nm_flag & NFSMNT_DUMBTIMR)
2063 timeo = nmp->nm_timeo;
2064 else
2065 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
2066 /* ensure 62.5 ms floor */
2067 while (16 * timeo < hz)
2068 timeo *= 2;
2069 if (nmp->nm_timeouts > 0)
2070 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
2071 if (rep->r_rtt <= timeo)
2072 continue;
2073 if (nmp->nm_timeouts < 8)
2074 nmp->nm_timeouts++;
2075 }
2076 /*
55e303ae
A
2077 * Check for too many retransmits. This is never true for
2078 * 'hard' mounts because we set r_retry to NFS_MAXREXMIT + 1
2079 * and never allow r_rexmit to be more than NFS_MAXREXMIT.
1c79356b 2080 */
1c79356b
A
2081 if (rep->r_rexmit >= rep->r_retry) { /* too many */
2082 nfsstats.rpctimeouts++;
2083 nfs_softterm(rep);
2084 continue;
2085 }
2086 if (nmp->nm_sotype != SOCK_DGRAM) {
2087 if (++rep->r_rexmit > NFS_MAXREXMIT)
2088 rep->r_rexmit = NFS_MAXREXMIT;
2089 continue;
2090 }
2091 if ((so = nmp->nm_so) == NULL)
2092 continue;
2093
2094 /*
2095 * If there is enough space and the window allows..
2096 * Resend it
2097 * Set r_rtt to -1 in case we fail to send it now.
2098 */
2099#if NFSDIAG
2100 rttdiag = rep->r_rtt;
2101#endif
2102 rep->r_rtt = -1;
2103 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
2104 ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
2105 (rep->r_flags & R_SENT) ||
2106 nmp->nm_sent < nmp->nm_cwnd) &&
2107 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
2108
2109 struct proc *p = current_proc();
2110
2111#if NFSDIAG
2112 if (rep->r_flags & R_SENT && nfsprnttimo &&
2113 nmp->nm_timeouts >= nfsprnttimo) {
2114 int t = proct[rep->r_procnum];
2115 if (t)
2116 NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d A=%d D=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum, nmp->nm_srtt[t-1], nmp->nm_sdrtt[t-1]));
2117 else
2118 NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum));
2119 }
2120 nfsdup(rep);
2121#endif /* NFSDIAG */
2122 /*
2123 * Iff first send, start timing
2124 * else turn timing off, backoff timer
2125 * and divide congestion window by 2.
2126 * We update these *before* the send to avoid
2127 * racing against receiving the reply.
2128 * We save them so we can restore them on send error.
2129 */
2130 flags = rep->r_flags;
2131 rexmit = rep->r_rexmit;
2132 cwnd = nmp->nm_cwnd;
2133 sent = nmp->nm_sent;
2134 xid = rep->r_xid;
2135 if (rep->r_flags & R_SENT) {
2136 rep->r_flags &= ~R_TIMING;
2137 if (++rep->r_rexmit > NFS_MAXREXMIT)
2138 rep->r_rexmit = NFS_MAXREXMIT;
2139 nmp->nm_cwnd >>= 1;
2140 if (nmp->nm_cwnd < NFS_CWNDSCALE)
2141 nmp->nm_cwnd = NFS_CWNDSCALE;
2142 nfsstats.rpcretries++;
2143 } else {
2144 rep->r_flags |= R_SENT;
2145 nmp->nm_sent += NFS_CWNDSCALE;
2146 }
fa4905b1 2147 FSDBG(535, xid, rep, nmp->nm_sent, nmp->nm_cwnd);
1c79356b
A
2148
2149 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
2150
2151 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
2152 error = (*so->so_proto->pr_usrreqs->pru_send)
2153 (so, 0, m, 0, 0, p);
2154 else
2155 error = (*so->so_proto->pr_usrreqs->pru_send)
2156 (so, 0, m, mtod(nmp->nm_nam, struct sockaddr *), 0, p);
2157
2158 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
2159
fa4905b1 2160 FSDBG(535, xid, error, sent, cwnd);
1c79356b
A
2161
2162 if (error) {
2163 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
2164 so->so_error = 0;
55e303ae 2165 rep->r_flags = flags | R_RESENDERR;
1c79356b
A
2166 rep->r_rexmit = rexmit;
2167 nmp->nm_cwnd = cwnd;
2168 nmp->nm_sent = sent;
2169 if (flags & R_SENT)
2170 nfsstats.rpcretries--;
2171 } else
2172 rep->r_rtt = 0;
2173 }
2174 }
2175#ifndef NFS_NOSERVER
2176 /*
2177 * Call the nqnfs server timer once a second to handle leases.
2178 */
55e303ae
A
2179 microuptime(&now);
2180 if (lasttime != now.tv_sec) {
2181 lasttime = now.tv_sec;
1c79356b
A
2182 nqnfs_serverd();
2183 }
2184
2185 /*
2186 * Scan the write gathering queues for writes that need to be
2187 * completed now.
2188 */
55e303ae
A
2189 cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
2190 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
2191 if (LIST_FIRST(&slp->ns_tq) &&
2192 LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
1c79356b
A
2193 nfsrv_wakenfsd(slp);
2194 }
2195#endif /* NFS_NOSERVER */
2196 splx(s);
2197 timeout(nfs_timer_funnel, (void *)0, nfs_ticks);
2198
2199}
2200
2201
2202/*
2203 * Test for a termination condition pending on the process.
55e303ae
A
2204 * This is used to determine if we need to bail on a mount.
2205 * EIO is returned if there has been a soft timeout.
2206 * EINTR is returned if there is a signal pending that is not being ignored
2207 * and the mount is interruptable, or if we are a thread that is in the process
2208 * of cancellation (also SIGKILL posted).
1c79356b
A
2209 */
2210int
2211nfs_sigintr(nmp, rep, p)
2212 struct nfsmount *nmp;
2213 struct nfsreq *rep;
55e303ae 2214 struct proc *p;
1c79356b 2215{
55e303ae
A
2216 struct uthread *curr_td;
2217 sigset_t pending_sigs;
2218 int context_good = 0;
2219 struct nfsmount *repnmp;
2220
2221 if (nmp == NULL)
2222 return (ENXIO);
2223 if (rep != NULL) {
2224 repnmp = rep->r_nmp;
2225 /* we've had a forced unmount. */
2226 if (repnmp == NULL)
2227 return (ENXIO);
2228 /* request has timed out on a 'soft' mount. */
2229 if (rep->r_flags & R_SOFTTERM)
2230 return (EIO);
2231 /*
2232 * We're in the progress of a force unmount and there's
2233 * been a timeout we're dead and fail IO.
2234 */
2235 if ((repnmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
2236 (NFSSTA_FORCE|NFSSTA_TIMEO))
2237 return (EIO);
2238 /* Someone is unmounting us, go soft and mark it. */
2239 if ((repnmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT)) {
2240 repnmp->nm_flag |= NFSMNT_SOFT;
2241 nmp->nm_state |= NFSSTA_FORCE;
2242 }
2243 /*
2244 * If the mount is hung and we've requested not to hang
2245 * on remote filesystems, then bail now.
2246 */
2247 if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0 &&
2248 (repnmp->nm_state & NFSSTA_TIMEO) != 0)
2249 return (EIO);
2250 }
2251 /* XXX: is this valid? this probably should be an assertion. */
2252 if (p == NULL)
2253 return (0);
1c79356b 2254
55e303ae
A
2255 /*
2256 * XXX: Since nfs doesn't have a good shot at getting the current
2257 * thread we take a guess. (only struct proc * are passed to VOPs)
2258 * What we do is look at the current thread, if it belongs to the
2259 * passed in proc pointer then we have a "good/accurate" context
2260 * and can make an accurate guess as to what to do.
2261 * However if we have a bad context we have to make due with what
2262 * is in the proc struct which may not be as up to date as we'd
2263 * like.
2264 * This is ok because the process will call us with the correct
2265 * context after a short timeout while waiting for a response.
2266 */
2267 curr_td = (struct uthread *)get_bsdthread_info(current_act());
2268 if (curr_td->uu_proc == p)
2269 context_good = 1;
2270 if (context_good && current_thread_aborted())
1c79356b 2271 return (EINTR);
55e303ae
A
2272 /* mask off thread and process blocked signals. */
2273 if (context_good)
2274 pending_sigs = curr_td->uu_siglist & ~curr_td->uu_sigmask;
2275 else
2276 pending_sigs = p->p_siglist;
2277 /* mask off process level and NFS ignored signals. */
2278 pending_sigs &= ~p->p_sigignore & NFSINT_SIGMASK;
2279 if (pending_sigs && (nmp->nm_flag & NFSMNT_INT) != 0)
1c79356b
A
2280 return (EINTR);
2281 return (0);
2282}
2283
2284/*
2285 * Lock a socket against others.
2286 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2287 * and also to avoid race conditions between the processes with nfs requests
2288 * in progress when a reconnect is necessary.
2289 */
2290int
55e303ae 2291nfs_sndlock(rep)
1c79356b
A
2292 struct nfsreq *rep;
2293{
55e303ae 2294 register int *statep;
1c79356b 2295 struct proc *p;
55e303ae 2296 int error, slpflag = 0, slptimeo = 0;
1c79356b 2297
55e303ae
A
2298 if (rep->r_nmp == NULL)
2299 return (ENXIO);
2300 statep = &rep->r_nmp->nm_state;
2301
2302 p = rep->r_procp;
2303 if (rep->r_nmp->nm_flag & NFSMNT_INT)
2304 slpflag = PCATCH;
2305 while (*statep & NFSSTA_SNDLOCK) {
2306 error = nfs_sigintr(rep->r_nmp, rep, p);
2307 if (error)
2308 return (error);
2309 *statep |= NFSSTA_WANTSND;
2310 if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0)
2311 slptimeo = hz;
2312 (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
2313 "nfsndlck", slptimeo);
1c79356b
A
2314 if (slpflag == PCATCH) {
2315 slpflag = 0;
2316 slptimeo = 2 * hz;
2317 }
0b4e3aa0
A
2318 /*
2319 * Make sure while we slept that the mountpoint didn't go away.
2320 * nfs_sigintr and callers expect it in tact.
2321 */
2322 if (!rep->r_nmp)
55e303ae 2323 return (ENXIO); /* don't have lock until out of loop */
1c79356b 2324 }
55e303ae 2325 *statep |= NFSSTA_SNDLOCK;
1c79356b
A
2326 return (0);
2327}
2328
2329/*
2330 * Unlock the stream socket for others.
2331 */
2332void
55e303ae
A
2333nfs_sndunlock(rep)
2334 struct nfsreq *rep;
1c79356b 2335{
55e303ae 2336 register int *statep;
1c79356b 2337
55e303ae
A
2338 if (rep->r_nmp == NULL)
2339 return;
2340 statep = &rep->r_nmp->nm_state;
2341 if ((*statep & NFSSTA_SNDLOCK) == 0)
1c79356b 2342 panic("nfs sndunlock");
55e303ae
A
2343 *statep &= ~NFSSTA_SNDLOCK;
2344 if (*statep & NFSSTA_WANTSND) {
2345 *statep &= ~NFSSTA_WANTSND;
2346 wakeup((caddr_t)statep);
1c79356b
A
2347 }
2348}
2349
2350static int
2351nfs_rcvlock(rep)
2352 register struct nfsreq *rep;
2353{
55e303ae
A
2354 register int *statep;
2355 int error, slpflag, slptimeo = 0;
1c79356b 2356
d12e1678
A
2357 /* make sure we still have our mountpoint */
2358 if (!rep->r_nmp) {
2359 if (rep->r_mrep != NULL)
2360 return (EALREADY);
55e303ae 2361 return (ENXIO);
d12e1678
A
2362 }
2363
55e303ae
A
2364 statep = &rep->r_nmp->nm_state;
2365 FSDBG_TOP(534, rep->r_xid, rep, rep->r_nmp, *statep);
2366 if (rep->r_nmp->nm_flag & NFSMNT_INT)
1c79356b
A
2367 slpflag = PCATCH;
2368 else
2369 slpflag = 0;
55e303ae
A
2370 while (*statep & NFSSTA_RCVLOCK) {
2371 if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp))) {
fa4905b1 2372 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x100);
55e303ae 2373 return (error);
1c79356b
A
2374 } else if (rep->r_mrep != NULL) {
2375 /*
2376 * Don't bother sleeping if reply already arrived
2377 */
fa4905b1 2378 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x101);
1c79356b
A
2379 return (EALREADY);
2380 }
fa4905b1 2381 FSDBG(534, rep->r_xid, rep, rep->r_nmp, 0x102);
55e303ae
A
2382 *statep |= NFSSTA_WANTRCV;
2383 /*
2384 * We need to poll if we're P_NOREMOTEHANG so that we
2385 * call nfs_sigintr periodically above.
2386 */
2387 if (rep->r_procp != NULL &&
2388 (rep->r_procp->p_flag & P_NOREMOTEHANG) != 0)
2389 slptimeo = hz;
2390 (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
2391 "nfsrcvlk", slptimeo);
1c79356b
A
2392 if (slpflag == PCATCH) {
2393 slpflag = 0;
2394 slptimeo = 2 * hz;
2395 }
0b4e3aa0
A
2396 /*
2397 * Make sure while we slept that the mountpoint didn't go away.
fa4905b1 2398 * nfs_sigintr and caller nfs_reply expect it intact.
0b4e3aa0 2399 */
fa4905b1
A
2400 if (!rep->r_nmp) {
2401 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x103);
55e303ae 2402 return (ENXIO); /* don't have lock until out of loop */
fa4905b1 2403 }
1c79356b
A
2404 }
2405 /*
2406 * nfs_reply will handle it if reply already arrived.
2407 * (We may have slept or been preempted while on network funnel).
2408 */
55e303ae
A
2409 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *statep);
2410 *statep |= NFSSTA_RCVLOCK;
1c79356b
A
2411 return (0);
2412}
2413
2414/*
2415 * Unlock the stream socket for others.
2416 */
2417static void
55e303ae
A
2418nfs_rcvunlock(rep)
2419 register struct nfsreq *rep;
1c79356b 2420{
55e303ae
A
2421 register int *statep;
2422
2423 if (rep->r_nmp == NULL)
2424 return;
2425 statep = &rep->r_nmp->nm_state;
1c79356b 2426
55e303ae
A
2427 FSDBG(533, statep, *statep, 0, 0);
2428 if ((*statep & NFSSTA_RCVLOCK) == 0)
1c79356b 2429 panic("nfs rcvunlock");
55e303ae
A
2430 *statep &= ~NFSSTA_RCVLOCK;
2431 if (*statep & NFSSTA_WANTRCV) {
2432 *statep &= ~NFSSTA_WANTRCV;
2433 wakeup((caddr_t)statep);
1c79356b
A
2434 }
2435}
2436
2437
2438#ifndef NFS_NOSERVER
2439/*
2440 * Socket upcall routine for the nfsd sockets.
2441 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2442 * Essentially do as much as possible non-blocking, else punt and it will
2443 * be called with M_WAIT from an nfsd.
2444 */
2445 /*
55e303ae 2446 * Needs to run under network funnel
1c79356b
A
2447 */
2448void
2449nfsrv_rcv(so, arg, waitflag)
2450 struct socket *so;
2451 caddr_t arg;
2452 int waitflag;
2453{
2454 register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
2455 register struct mbuf *m;
2456 struct mbuf *mp, *mhck;
4a249263 2457 struct sockaddr *nam;
1c79356b 2458 struct uio auio;
55e303ae 2459 int flags, ns_nflag=0, error;
1c79356b
A
2460 struct sockaddr_in *sin;
2461
2462 if ((slp->ns_flag & SLP_VALID) == 0)
2463 return;
2464#ifdef notdef
2465 /*
2466 * Define this to test for nfsds handling this under heavy load.
2467 */
2468 if (waitflag == M_DONTWAIT) {
55e303ae
A
2469 ns_nflag = SLPN_NEEDQ;
2470 goto dorecs;
1c79356b
A
2471 }
2472#endif
2473 auio.uio_procp = NULL;
2474 if (so->so_type == SOCK_STREAM) {
2475 /*
2476 * If there are already records on the queue, defer soreceive()
2477 * to an nfsd so that there is feedback to the TCP layer that
2478 * the nfs servers are heavily loaded.
2479 */
2480 if (slp->ns_rec && waitflag == M_DONTWAIT) {
55e303ae 2481 ns_nflag = SLPN_NEEDQ;
1c79356b
A
2482 goto dorecs;
2483 }
2484
2485 /*
2486 * Do soreceive().
2487 */
2488 auio.uio_resid = 1000000000;
2489 flags = MSG_DONTWAIT;
2490 error = soreceive(so, (struct sockaddr **) 0, &auio, &mp, (struct mbuf **)0, &flags);
2491 if (error || mp == (struct mbuf *)0) {
2492 if (error == EWOULDBLOCK)
55e303ae 2493 ns_nflag = SLPN_NEEDQ;
1c79356b 2494 else
55e303ae 2495 ns_nflag = SLPN_DISCONN;
1c79356b
A
2496 goto dorecs;
2497 }
2498 m = mp;
2499 if (slp->ns_rawend) {
2500 slp->ns_rawend->m_next = m;
2501 slp->ns_cc += 1000000000 - auio.uio_resid;
2502 } else {
2503 slp->ns_raw = m;
2504 slp->ns_cc = 1000000000 - auio.uio_resid;
2505 }
2506 while (m->m_next)
2507 m = m->m_next;
2508 slp->ns_rawend = m;
2509
2510 /*
2511 * Now try and parse record(s) out of the raw stream data.
2512 */
2513 error = nfsrv_getstream(slp, waitflag);
2514 if (error) {
2515 if (error == EPERM)
55e303ae 2516 ns_nflag = SLPN_DISCONN;
1c79356b 2517 else
55e303ae 2518 ns_nflag = SLPN_NEEDQ;
1c79356b
A
2519 }
2520 } else {
2521 do {
2522 auio.uio_resid = 1000000000;
4a249263 2523 flags = MSG_DONTWAIT | MSG_NEEDSA;
1c79356b 2524 nam = 0;
4a249263 2525 mp = 0;
1c79356b
A
2526 error = soreceive(so, &nam, &auio, &mp,
2527 (struct mbuf **)0, &flags);
2528
2529 if (mp) {
2530 if (nam) {
2531 MGET(mhck, M_WAIT, MT_SONAME);
2532 mhck->m_len = nam->sa_len;
2533 sin = mtod(mhck, struct sockaddr_in *);
2534 bcopy(nam, sin, sizeof(struct sockaddr_in));
2535 mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
1c79356b
A
2536
2537 m = mhck;
2538 m->m_next = mp;
2539 } else
2540 m = mp;
2541 if (slp->ns_recend)
2542 slp->ns_recend->m_nextpkt = m;
2543 else
2544 slp->ns_rec = m;
2545 slp->ns_recend = m;
2546 m->m_nextpkt = (struct mbuf *)0;
2547 }
4a249263
A
2548 if (nam) {
2549 FREE(nam, M_SONAME);
2550 }
1c79356b
A
2551 if (error) {
2552 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
2553 && error != EWOULDBLOCK) {
55e303ae 2554 ns_nflag = SLPN_DISCONN;
1c79356b
A
2555 goto dorecs;
2556 }
2557 }
2558 } while (mp);
2559 }
2560
2561 /*
2562 * Now try and process the request records, non-blocking.
2563 */
2564dorecs:
55e303ae
A
2565 if (ns_nflag)
2566 slp->ns_nflag |= ns_nflag;
1c79356b 2567 if (waitflag == M_DONTWAIT &&
55e303ae 2568 (slp->ns_rec || (slp->ns_nflag & (SLPN_NEEDQ | SLPN_DISCONN)))) {
1c79356b
A
2569 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
2570 nfsrv_wakenfsd(slp);
2571 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
2572 }
2573}
2574
2575/*
2576 * Try and extract an RPC request from the mbuf data list received on a
2577 * stream socket. The "waitflag" argument indicates whether or not it
2578 * can sleep.
2579 */
2580static int
2581nfsrv_getstream(slp, waitflag)
2582 register struct nfssvc_sock *slp;
2583 int waitflag;
2584{
2585 register struct mbuf *m, **mpp;
2586 register char *cp1, *cp2;
2587 register int len;
b4c24cb9 2588 struct mbuf *om, *m2, *recm;
1c79356b
A
2589 u_long recmark;
2590
55e303ae 2591 if (slp->ns_nflag & SLPN_GETSTREAM)
1c79356b 2592 panic("nfs getstream");
55e303ae 2593 slp->ns_nflag |= SLPN_GETSTREAM;
1c79356b
A
2594 for (;;) {
2595 if (slp->ns_reclen == 0) {
2596 if (slp->ns_cc < NFSX_UNSIGNED) {
55e303ae 2597 slp->ns_nflag &= ~SLPN_GETSTREAM;
1c79356b
A
2598 return (0);
2599 }
2600 m = slp->ns_raw;
2601 if (m->m_len >= NFSX_UNSIGNED) {
2602 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
2603 m->m_data += NFSX_UNSIGNED;
2604 m->m_len -= NFSX_UNSIGNED;
2605 } else {
2606 cp1 = (caddr_t)&recmark;
2607 cp2 = mtod(m, caddr_t);
2608 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2609 while (m->m_len == 0) {
2610 m = m->m_next;
2611 cp2 = mtod(m, caddr_t);
2612 }
2613 *cp1++ = *cp2++;
2614 m->m_data++;
2615 m->m_len--;
2616 }
2617 }
2618 slp->ns_cc -= NFSX_UNSIGNED;
2619 recmark = ntohl(recmark);
2620 slp->ns_reclen = recmark & ~0x80000000;
2621 if (recmark & 0x80000000)
55e303ae 2622 slp->ns_nflag |= SLPN_LASTFRAG;
1c79356b 2623 else
55e303ae 2624 slp->ns_nflag &= ~SLPN_LASTFRAG;
1c79356b 2625 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
55e303ae 2626 slp->ns_nflag &= ~SLPN_GETSTREAM;
1c79356b
A
2627 return (EPERM);
2628 }
2629 }
2630
2631 /*
2632 * Now get the record part.
b4c24cb9
A
2633 *
2634 * Note that slp->ns_reclen may be 0. Linux sometimes
2635 * generates 0-length RPCs
1c79356b 2636 */
b4c24cb9 2637 recm = NULL;
1c79356b
A
2638 if (slp->ns_cc == slp->ns_reclen) {
2639 recm = slp->ns_raw;
2640 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
2641 slp->ns_cc = slp->ns_reclen = 0;
2642 } else if (slp->ns_cc > slp->ns_reclen) {
2643 len = 0;
2644 m = slp->ns_raw;
2645 om = (struct mbuf *)0;
2646 while (len < slp->ns_reclen) {
2647 if ((len + m->m_len) > slp->ns_reclen) {
2648 m2 = m_copym(m, 0, slp->ns_reclen - len,
2649 waitflag);
2650 if (m2) {
2651 if (om) {
2652 om->m_next = m2;
2653 recm = slp->ns_raw;
2654 } else
2655 recm = m2;
2656 m->m_data += slp->ns_reclen - len;
2657 m->m_len -= slp->ns_reclen - len;
2658 len = slp->ns_reclen;
2659 } else {
55e303ae 2660 slp->ns_nflag &= ~SLPN_GETSTREAM;
1c79356b
A
2661 return (EWOULDBLOCK);
2662 }
2663 } else if ((len + m->m_len) == slp->ns_reclen) {
2664 om = m;
2665 len += m->m_len;
2666 m = m->m_next;
2667 recm = slp->ns_raw;
2668 om->m_next = (struct mbuf *)0;
2669 } else {
2670 om = m;
2671 len += m->m_len;
2672 m = m->m_next;
2673 }
2674 }
2675 slp->ns_raw = m;
2676 slp->ns_cc -= len;
2677 slp->ns_reclen = 0;
2678 } else {
55e303ae 2679 slp->ns_nflag &= ~SLPN_GETSTREAM;
1c79356b
A
2680 return (0);
2681 }
2682
2683 /*
2684 * Accumulate the fragments into a record.
2685 */
2686 mpp = &slp->ns_frag;
2687 while (*mpp)
2688 mpp = &((*mpp)->m_next);
2689 *mpp = recm;
55e303ae 2690 if (slp->ns_nflag & SLPN_LASTFRAG) {
1c79356b
A
2691 if (slp->ns_recend)
2692 slp->ns_recend->m_nextpkt = slp->ns_frag;
2693 else
2694 slp->ns_rec = slp->ns_frag;
2695 slp->ns_recend = slp->ns_frag;
2696 slp->ns_frag = (struct mbuf *)0;
2697 }
2698 }
2699}
2700
2701/*
2702 * Parse an RPC header.
2703 */
2704int
2705nfsrv_dorec(slp, nfsd, ndp)
2706 register struct nfssvc_sock *slp;
2707 struct nfsd *nfsd;
2708 struct nfsrv_descript **ndp;
2709{
2710 register struct mbuf *m;
2711 register struct mbuf *nam;
2712 register struct nfsrv_descript *nd;
2713 int error;
2714
2715 *ndp = NULL;
2716 if ((slp->ns_flag & SLP_VALID) == 0 ||
2717 (m = slp->ns_rec) == (struct mbuf *)0)
2718 return (ENOBUFS);
2719 slp->ns_rec = m->m_nextpkt;
2720 if (slp->ns_rec)
2721 m->m_nextpkt = (struct mbuf *)0;
2722 else
2723 slp->ns_recend = (struct mbuf *)0;
2724 if (m->m_type == MT_SONAME) {
2725 nam = m;
2726 m = m->m_next;
2727 nam->m_next = NULL;
2728 } else
2729 nam = NULL;
2730 MALLOC_ZONE(nd, struct nfsrv_descript *,
2731 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2732 nd->nd_md = nd->nd_mrep = m;
2733 nd->nd_nam2 = nam;
2734 nd->nd_dpos = mtod(m, caddr_t);
2735 error = nfs_getreq(nd, nfsd, TRUE);
2736 if (error) {
55e303ae
A
2737 if (nam)
2738 m_freem(nam);
2739 FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
1c79356b
A
2740 return (error);
2741 }
2742 *ndp = nd;
2743 nfsd->nfsd_nd = nd;
2744 return (0);
2745}
2746
2747/*
2748 * Parse an RPC request
2749 * - verify it
2750 * - fill in the cred struct.
2751 */
2752int
2753nfs_getreq(nd, nfsd, has_header)
2754 register struct nfsrv_descript *nd;
2755 struct nfsd *nfsd;
2756 int has_header;
2757{
2758 register int len, i;
2759 register u_long *tl;
2760 register long t1;
2761 struct uio uio;
2762 struct iovec iov;
2763 caddr_t dpos, cp2, cp;
2764 u_long nfsvers, auth_type;
2765 uid_t nickuid;
2766 int error = 0, nqnfs = 0, ticklen;
2767 struct mbuf *mrep, *md;
2768 register struct nfsuid *nuidp;
55e303ae 2769 struct timeval tvin, tvout, now;
1c79356b
A
2770#if 0 /* until encrypted keys are implemented */
2771 NFSKERBKEYSCHED_T keys; /* stores key schedule */
2772#endif
2773
2774 mrep = nd->nd_mrep;
2775 md = nd->nd_md;
2776 dpos = nd->nd_dpos;
2777 if (has_header) {
2778 nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED);
2779 nd->nd_retxid = fxdr_unsigned(u_long, *tl++);
2780 if (*tl++ != rpc_call) {
2781 m_freem(mrep);
2782 return (EBADRPC);
2783 }
2784 } else
2785 nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED);
2786 nd->nd_repstat = 0;
2787 nd->nd_flag = 0;
2788 if (*tl++ != rpc_vers) {
2789 nd->nd_repstat = ERPCMISMATCH;
2790 nd->nd_procnum = NFSPROC_NOOP;
2791 return (0);
2792 }
2793 if (*tl != nfs_prog) {
2794 if (*tl == nqnfs_prog)
2795 nqnfs++;
2796 else {
2797 nd->nd_repstat = EPROGUNAVAIL;
2798 nd->nd_procnum = NFSPROC_NOOP;
2799 return (0);
2800 }
2801 }
2802 tl++;
2803 nfsvers = fxdr_unsigned(u_long, *tl++);
2804 if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
2805 (nfsvers != NQNFS_VER3 && nqnfs)) {
2806 nd->nd_repstat = EPROGMISMATCH;
2807 nd->nd_procnum = NFSPROC_NOOP;
2808 return (0);
2809 }
2810 if (nqnfs)
2811 nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
2812 else if (nfsvers == NFS_VER3)
2813 nd->nd_flag = ND_NFSV3;
2814 nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
2815 if (nd->nd_procnum == NFSPROC_NULL)
2816 return (0);
2817 if (nd->nd_procnum >= NFS_NPROCS ||
2818 (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
2819 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
2820 nd->nd_repstat = EPROCUNAVAIL;
2821 nd->nd_procnum = NFSPROC_NOOP;
2822 return (0);
2823 }
2824 if ((nd->nd_flag & ND_NFSV3) == 0)
2825 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
2826 auth_type = *tl++;
2827 len = fxdr_unsigned(int, *tl++);
2828 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2829 m_freem(mrep);
2830 return (EBADRPC);
2831 }
2832
2833 nd->nd_flag &= ~ND_KERBAUTH;
2834 /*
2835 * Handle auth_unix or auth_kerb.
2836 */
2837 if (auth_type == rpc_auth_unix) {
2838 len = fxdr_unsigned(int, *++tl);
2839 if (len < 0 || len > NFS_MAXNAMLEN) {
2840 m_freem(mrep);
2841 return (EBADRPC);
2842 }
2843 nfsm_adv(nfsm_rndup(len));
2844 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2845 bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
2846 nd->nd_cr.cr_ref = 1;
2847 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
2848 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
2849 len = fxdr_unsigned(int, *tl);
2850 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
2851 m_freem(mrep);
2852 return (EBADRPC);
2853 }
2854 nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED);
2855 for (i = 1; i <= len; i++)
2856 if (i < NGROUPS)
2857 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
2858 else
2859 tl++;
2860 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
2861 if (nd->nd_cr.cr_ngroups > 1)
2862 nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
2863 len = fxdr_unsigned(int, *++tl);
2864 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2865 m_freem(mrep);
2866 return (EBADRPC);
2867 }
2868 if (len > 0)
2869 nfsm_adv(nfsm_rndup(len));
2870 } else if (auth_type == rpc_auth_kerb) {
2871 switch (fxdr_unsigned(int, *tl++)) {
2872 case RPCAKN_FULLNAME:
2873 ticklen = fxdr_unsigned(int, *tl);
2874 *((u_long *)nfsd->nfsd_authstr) = *tl;
2875 uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
2876 nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
2877 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
2878 m_freem(mrep);
2879 return (EBADRPC);
2880 }
2881 uio.uio_offset = 0;
2882 uio.uio_iov = &iov;
2883 uio.uio_iovcnt = 1;
2884 uio.uio_segflg = UIO_SYSSPACE;
2885 iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
2886 iov.iov_len = RPCAUTH_MAXSIZ - 4;
2887 nfsm_mtouio(&uio, uio.uio_resid);
2888 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2889 if (*tl++ != rpc_auth_kerb ||
2890 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
2891 printf("Bad kerb verifier\n");
2892 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2893 nd->nd_procnum = NFSPROC_NOOP;
2894 return (0);
2895 }
2896 nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
2897 tl = (u_long *)cp;
2898 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
2899 printf("Not fullname kerb verifier\n");
2900 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2901 nd->nd_procnum = NFSPROC_NOOP;
2902 return (0);
2903 }
2904 cp += NFSX_UNSIGNED;
2905 bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
2906 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
2907 nd->nd_flag |= ND_KERBFULL;
2908 nfsd->nfsd_flag |= NFSD_NEEDAUTH;
2909 break;
2910 case RPCAKN_NICKNAME:
2911 if (len != 2 * NFSX_UNSIGNED) {
2912 printf("Kerb nickname short\n");
2913 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
2914 nd->nd_procnum = NFSPROC_NOOP;
2915 return (0);
2916 }
2917 nickuid = fxdr_unsigned(uid_t, *tl);
2918 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2919 if (*tl++ != rpc_auth_kerb ||
2920 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
2921 printf("Kerb nick verifier bad\n");
2922 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2923 nd->nd_procnum = NFSPROC_NOOP;
2924 return (0);
2925 }
2926 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2927 tvin.tv_sec = *tl++;
2928 tvin.tv_usec = *tl;
2929
2930 for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
2931 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
2932 if (nuidp->nu_cr.cr_uid == nickuid &&
2933 (!nd->nd_nam2 ||
2934 netaddr_match(NU_NETFAM(nuidp),
2935 &nuidp->nu_haddr, nd->nd_nam2)))
2936 break;
2937 }
2938 if (!nuidp) {
2939 nd->nd_repstat =
2940 (NFSERR_AUTHERR|AUTH_REJECTCRED);
2941 nd->nd_procnum = NFSPROC_NOOP;
2942 return (0);
2943 }
2944
2945 /*
2946 * Now, decrypt the timestamp using the session key
2947 * and validate it.
2948 */
2949#if NFSKERB
2950 XXX
2951#endif
2952
2953 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
2954 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
55e303ae
A
2955 microtime(&now);
2956 if (nuidp->nu_expire < now.tv_sec ||
1c79356b
A
2957 nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
2958 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
2959 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
2960 nuidp->nu_expire = 0;
2961 nd->nd_repstat =
2962 (NFSERR_AUTHERR|AUTH_REJECTVERF);
2963 nd->nd_procnum = NFSPROC_NOOP;
2964 return (0);
2965 }
2966 nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
2967 nd->nd_flag |= ND_KERBNICK;
2968 };
2969 } else {
2970 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
2971 nd->nd_procnum = NFSPROC_NOOP;
2972 return (0);
2973 }
2974
2975 /*
2976 * For nqnfs, get piggybacked lease request.
2977 */
2978 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
2979 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2980 nd->nd_flag |= fxdr_unsigned(int, *tl);
2981 if (nd->nd_flag & ND_LEASE) {
2982 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2983 nd->nd_duration = fxdr_unsigned(int, *tl);
2984 } else
2985 nd->nd_duration = NQ_MINLEASE;
2986 } else
2987 nd->nd_duration = NQ_MINLEASE;
2988 nd->nd_md = md;
2989 nd->nd_dpos = dpos;
2990 return (0);
2991nfsmout:
2992 return (error);
2993}
2994
2995/*
2996 * Search for a sleeping nfsd and wake it up.
2997 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
2998 * running nfsds will go look for the work in the nfssvc_sock list.
2999 */
3000void
3001nfsrv_wakenfsd(slp)
3002 struct nfssvc_sock *slp;
3003{
3004 register struct nfsd *nd;
3005
3006 if ((slp->ns_flag & SLP_VALID) == 0)
3007 return;
55e303ae 3008 TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
1c79356b
A
3009 if (nd->nfsd_flag & NFSD_WAITING) {
3010 nd->nfsd_flag &= ~NFSD_WAITING;
3011 if (nd->nfsd_slp)
3012 panic("nfsd wakeup");
3013 slp->ns_sref++;
3014 nd->nfsd_slp = slp;
3015 wakeup((caddr_t)nd);
3016 return;
3017 }
3018 }
3019 slp->ns_flag |= SLP_DOREC;
3020 nfsd_head_flag |= NFSD_CHECKSLP;
3021}
3022#endif /* NFS_NOSERVER */
3023
3024static int
55e303ae 3025nfs_msg(p, server, msg, error)
1c79356b 3026 struct proc *p;
55e303ae
A
3027 const char *server, *msg;
3028 int error;
1c79356b
A
3029{
3030 tpr_t tpr;
3031
3032 if (p)
3033 tpr = tprintf_open(p);
3034 else
3035 tpr = NULL;
55e303ae
A
3036 if (error)
3037 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg,
3038 error);
3039 else
3040 tprintf(tpr, "nfs server %s: %s\n", server, msg);
1c79356b
A
3041 tprintf_close(tpr);
3042 return (0);
3043}
55e303ae 3044
e5568f75
A
3045void
3046nfs_down(rep, nmp, proc, msg, error, flags)
55e303ae 3047 struct nfsreq *rep;
e5568f75
A
3048 struct nfsmount *nmp;
3049 struct proc *proc;
55e303ae 3050 const char *msg;
e5568f75 3051 int error, flags;
55e303ae 3052{
e5568f75 3053 if (nmp == NULL)
55e303ae 3054 return;
e5568f75
A
3055 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
3056 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
55e303ae 3057 VQ_NOTRESP, 0);
e5568f75 3058 nmp->nm_state |= NFSSTA_TIMEO;
55e303ae 3059 }
e5568f75
A
3060 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
3061 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
3062 VQ_NOTRESPLOCK, 0);
3063 nmp->nm_state |= NFSSTA_LOCKTIMEO;
3064 }
3065 if (rep)
3066 rep->r_flags |= R_TPRINTFMSG;
3067 nfs_msg(proc, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
55e303ae
A
3068}
3069
e5568f75
A
3070void
3071nfs_up(rep, nmp, proc, msg, flags)
55e303ae 3072 struct nfsreq *rep;
e5568f75
A
3073 struct nfsmount *nmp;
3074 struct proc *proc;
55e303ae 3075 const char *msg;
e5568f75 3076 int flags;
55e303ae 3077{
e5568f75 3078 if (nmp == NULL)
55e303ae 3079 return;
e5568f75
A
3080 if ((rep == NULL) || (rep->r_flags & R_TPRINTFMSG) != 0)
3081 nfs_msg(proc, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
3082 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
3083 nmp->nm_state &= ~NFSSTA_TIMEO;
3084 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
55e303ae
A
3085 VQ_NOTRESP, 1);
3086 }
e5568f75
A
3087 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
3088 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
3089 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
3090 VQ_NOTRESPLOCK, 1);
3091 }
55e303ae 3092}
e5568f75 3093