]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_socket.c
xnu-517.3.7.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_socket.c
1 /*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
26 /*
27 * Copyright (c) 1989, 1991, 1993, 1995
28 * The Regents of the University of California. All rights reserved.
29 *
30 * This code is derived from software contributed to Berkeley by
31 * Rick Macklem at The University of Guelph.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
62 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
63 */
64
65 /*
66 * Socket operations for use by nfs
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/mount.h>
73 #include <sys/kernel.h>
74 #include <sys/mbuf.h>
75 #include <sys/malloc.h>
76 #include <sys/vnode.h>
77 #include <sys/domain.h>
78 #include <sys/protosw.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
81 #include <sys/syslog.h>
82 #include <sys/tprintf.h>
83 #include <machine/spl.h>
84
85 #include <sys/time.h>
86 #include <kern/clock.h>
87 #include <sys/user.h>
88
89 #include <netinet/in.h>
90 #include <netinet/tcp.h>
91
92 #include <nfs/rpcv2.h>
93 #include <nfs/nfsproto.h>
94 #include <nfs/nfs.h>
95 #include <nfs/xdr_subs.h>
96 #include <nfs/nfsm_subs.h>
97 #include <nfs/nfsmount.h>
98 #include <nfs/nfsnode.h>
99 #include <nfs/nfsrtt.h>
100 #include <nfs/nqnfs.h>
101
102 #include <sys/kdebug.h>
103
104 #define FSDBG(A, B, C, D, E) \
105 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
106 (int)(B), (int)(C), (int)(D), (int)(E), 0)
107 #define FSDBG_TOP(A, B, C, D, E) \
108 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
109 (int)(B), (int)(C), (int)(D), (int)(E), 0)
110 #define FSDBG_BOT(A, B, C, D, E) \
111 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
112 (int)(B), (int)(C), (int)(D), (int)(E), 0)
113
114 #define TRUE 1
115 #define FALSE 0
116
117 /*
118 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
119 * Use the mean and mean deviation of rtt for the appropriate type of rpc
120 * for the frequent rpcs and a default for the others.
121 * The justification for doing "other" this way is that these rpcs
122 * happen so infrequently that timer est. would probably be stale.
123 * Also, since many of these rpcs are
124 * non-idempotent, a conservative timeout is desired.
125 * getattr, lookup - A+2D
126 * read, write - A+4D
127 * other - nm_timeo
128 */
129 #define NFS_RTO(n, t) \
130 ((t) == 0 ? (n)->nm_timeo : \
131 ((t) < 3 ? \
132 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
133 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
134 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
135 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
136 /*
137 * External data, mostly RPC constants in XDR form
138 */
139 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
140 rpc_msgaccepted, rpc_call, rpc_autherr,
141 rpc_auth_kerb;
142 extern u_long nfs_prog, nqnfs_prog;
143 extern time_t nqnfsstarttime;
144 extern struct nfsstats nfsstats;
145 extern int nfsv3_procid[NFS_NPROCS];
146 extern int nfs_ticks;
147 extern u_long nfs_xidwrap;
148
149 /*
150 * Defines which timer to use for the procnum.
151 * 0 - default
152 * 1 - getattr
153 * 2 - lookup
154 * 3 - read
155 * 4 - write
156 */
157 static int proct[NFS_NPROCS] = {
158 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
159 0, 0, 0,
160 };
161
162 /*
163 * There is a congestion window for outstanding rpcs maintained per mount
164 * point. The cwnd size is adjusted in roughly the way that:
165 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
166 * SIGCOMM '88". ACM, August 1988.
167 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
168 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
169 * of rpcs is in progress.
170 * (The sent count and cwnd are scaled for integer arith.)
171 * Variants of "slow start" were tried and were found to be too much of a
172 * performance hit (ave. rtt 3 times larger),
173 * I suspect due to the large rtt that nfs rpcs have.
174 */
175 #define NFS_CWNDSCALE 256
176 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
177 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
178 int nfsrtton = 0;
179 struct nfsrtt nfsrtt;
180
181 static int nfs_msg __P((struct proc *, const char *, const char *, int));
182 static void nfs_up(struct nfsreq *, const char *, int);
183 static void nfs_down(struct nfsreq *, const char *, int);
184 static int nfs_rcvlock __P((struct nfsreq *));
185 static void nfs_rcvunlock __P((struct nfsreq *));
186 static int nfs_receive __P((struct nfsreq *rep, struct mbuf **aname,
187 struct mbuf **mp));
188 static int nfs_reconnect __P((struct nfsreq *rep));
189 static void nfs_repbusy(struct nfsreq *rep);
190 static struct nfsreq * nfs_repnext(struct nfsreq *rep);
191 static void nfs_repdequeue(struct nfsreq *rep);
192 #ifndef NFS_NOSERVER
193 static int nfsrv_getstream __P((struct nfssvc_sock *,int));
194
195 int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
196 struct nfssvc_sock *slp,
197 struct proc *procp,
198 struct mbuf **mreqp)) = {
199 nfsrv_null,
200 nfsrv_getattr,
201 nfsrv_setattr,
202 nfsrv_lookup,
203 nfsrv3_access,
204 nfsrv_readlink,
205 nfsrv_read,
206 nfsrv_write,
207 nfsrv_create,
208 nfsrv_mkdir,
209 nfsrv_symlink,
210 nfsrv_mknod,
211 nfsrv_remove,
212 nfsrv_rmdir,
213 nfsrv_rename,
214 nfsrv_link,
215 nfsrv_readdir,
216 nfsrv_readdirplus,
217 nfsrv_statfs,
218 nfsrv_fsinfo,
219 nfsrv_pathconf,
220 nfsrv_commit,
221 nqnfsrv_getlease,
222 nqnfsrv_vacated,
223 nfsrv_noop,
224 nfsrv_noop
225 };
226 #endif /* NFS_NOSERVER */
227
228 /*
229 * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG)
230 * But some of this code may prove useful someday...
231 */
232 #undef NFSDIAG
233 #if NFSDIAG
234 int nfstraceindx = 0;
235 struct nfstracerec nfstracebuf[NFSTBUFSIZ] = {{0,0,0,0}};
236
237 #define NFSTRACESUSPENDERS
238 #ifdef NFSTRACESUSPENDERS
239 uint nfstracemask = 0xfff00200;
240 int nfstracexid = -1;
241 uint onfstracemask = 0;
242 int nfstracesuspend = -1;
243 #define NFSTRACE_SUSPEND \
244 { \
245 if (nfstracemask) { \
246 onfstracemask = nfstracemask; \
247 nfstracemask = 0; \
248 } \
249 }
250 #define NFSTRACE_RESUME \
251 { \
252 nfstracesuspend = -1; \
253 if (!nfstracemask) \
254 nfstracemask = onfstracemask; \
255 }
256 #define NFSTRACE_STARTSUSPENDCOUNTDOWN \
257 { \
258 nfstracesuspend = (nfstraceindx+100) % NFSTBUFSIZ; \
259 }
260 #define NFSTRACE_SUSPENDING (nfstracesuspend != -1)
261 #define NFSTRACE_SUSPENSEOVER \
262 (nfstracesuspend > 100 ? \
263 (nfstraceindx >= nfstracesuspend || \
264 nfstraceindx < nfstracesuspend - 100) : \
265 (nfstraceindx >= nfstracesuspend && \
266 nfstraceindx < nfstracesuspend + 8192 - 100))
267 #else
268 uint nfstracemask = 0;
269 #endif /* NFSTRACESUSPENDERS */
270
271 int nfsprnttimo = 1;
272
273 int nfsodata[1024];
274 int nfsoprocnum, nfsolen;
275 int nfsbt[32], nfsbtlen;
276
277 #if defined(__ppc__)
278 int
279 backtrace(int *where, int size)
280 {
281 int register sp, *fp, numsaved;
282
283 __asm__ volatile("mr %0,r1" : "=r" (sp));
284
285 fp = (int *)*((int *)sp);
286 size /= sizeof(int);
287 for (numsaved = 0; numsaved < size; numsaved++) {
288 *where++ = fp[2];
289 if ((int)fp <= 0)
290 break;
291 fp = (int *)*fp;
292 }
293 return (numsaved);
294 }
295 #elif defined(__i386__)
296 int
297 backtrace()
298 {
299 return (0); /* Till someone implements a real routine */
300 }
301 #else
302 #error architecture not implemented.
303 #endif
304
305 void
306 nfsdup(struct nfsreq *rep)
307 {
308 int *ip, i, first = 1, end;
309 char *s, b[240];
310 struct mbuf *mb;
311
312 if ((nfs_debug & NFS_DEBUG_DUP) == 0)
313 return;
314 /* last mbuf in chain will be nfs content */
315 for (mb = rep->r_mreq; mb->m_next; mb = mb->m_next)
316 ;
317 if (rep->r_procnum == nfsoprocnum && mb->m_len == nfsolen &&
318 !bcmp((caddr_t)nfsodata, mb->m_data, nfsolen)) {
319 s = b + sprintf(b, "nfsdup x=%x p=%d h=", rep->r_xid,
320 rep->r_procnum);
321 end = (int)(VTONFS(rep->r_vp)->n_fhp);
322 ip = (int *)(end & ~3);
323 end += VTONFS(rep->r_vp)->n_fhsize;
324 while ((int)ip < end) {
325 i = *ip++;
326 if (first) { /* avoid leading zeroes */
327 if (i == 0)
328 continue;
329 first = 0;
330 s += sprintf(s, "%x", i);
331 } else
332 s += sprintf(s, "%08x", i);
333 }
334 if (first)
335 sprintf(s, "%x", 0);
336 else /* eliminate trailing zeroes */
337 while (*--s == '0')
338 *s = 0;
339 /*
340 * set a breakpoint here and you can view the
341 * current backtrace and the one saved in nfsbt
342 */
343 kprintf("%s\n", b);
344 }
345 nfsoprocnum = rep->r_procnum;
346 nfsolen = mb->m_len;
347 bcopy(mb->m_data, (caddr_t)nfsodata, mb->m_len);
348 nfsbtlen = backtrace(&nfsbt, sizeof(nfsbt));
349 }
350 #endif /* NFSDIAG */
351
352 /*
353 * Initialize sockets and congestion for a new NFS connection.
354 * We do not free the sockaddr if error.
355 */
356 int
357 nfs_connect(nmp, rep)
358 register struct nfsmount *nmp;
359 struct nfsreq *rep;
360 {
361 register struct socket *so;
362 int s, error, rcvreserve, sndreserve;
363 struct sockaddr *saddr;
364 struct sockaddr_in sin;
365 u_short tport;
366
367 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
368 nmp->nm_so = (struct socket *)0;
369 saddr = mtod(nmp->nm_nam, struct sockaddr *);
370 error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
371 nmp->nm_soproto);
372 if (error) {
373 goto bad;
374 }
375 so = nmp->nm_so;
376 nmp->nm_soflags = so->so_proto->pr_flags;
377
378 /*
379 * Some servers require that the client port be a reserved port number.
380 */
381 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
382 sin.sin_len = sizeof (struct sockaddr_in);
383 sin.sin_family = AF_INET;
384 sin.sin_addr.s_addr = INADDR_ANY;
385 tport = IPPORT_RESERVED - 1;
386 sin.sin_port = htons(tport);
387
388 while ((error = sobind(so, (struct sockaddr *) &sin) == EADDRINUSE) &&
389 (--tport > IPPORT_RESERVED / 2))
390 sin.sin_port = htons(tport);
391 if (error) {
392 goto bad;
393 }
394 }
395
396 /*
397 * Protocols that do not require connections may be optionally left
398 * unconnected for servers that reply from a port other than NFS_PORT.
399 */
400 if (nmp->nm_flag & NFSMNT_NOCONN) {
401 if (nmp->nm_soflags & PR_CONNREQUIRED) {
402 error = ENOTCONN;
403 goto bad;
404 }
405 } else {
406 error = soconnect(so, mtod(nmp->nm_nam, struct sockaddr *));
407 if (error) {
408 goto bad;
409 }
410
411 /*
412 * Wait for the connection to complete. Cribbed from the
413 * connect system call but with the wait timing out so
414 * that interruptible mounts don't hang here for a long time.
415 */
416 s = splnet();
417 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
418 (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
419 "nfscon", 2 * hz);
420 if ((so->so_state & SS_ISCONNECTING) &&
421 so->so_error == 0 && rep &&
422 (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
423 so->so_state &= ~SS_ISCONNECTING;
424 splx(s);
425 goto bad;
426 }
427 }
428 if (so->so_error) {
429 error = so->so_error;
430 so->so_error = 0;
431 splx(s);
432 goto bad;
433 }
434 splx(s);
435 }
436 /*
437 * Always time out on recieve, this allows us to reconnect the
438 * socket to deal with network changes.
439 */
440 so->so_rcv.sb_timeo = (2 * hz);
441 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
442 so->so_snd.sb_timeo = (5 * hz);
443 } else {
444 so->so_snd.sb_timeo = 0;
445 }
446 if (nmp->nm_sotype == SOCK_DGRAM) {
447 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
448 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
449 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
450 } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
451 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
452 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
453 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
454 } else {
455 if (nmp->nm_sotype != SOCK_STREAM)
456 panic("nfscon sotype");
457
458 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
459 struct sockopt sopt;
460 int val;
461
462 bzero(&sopt, sizeof sopt);
463 sopt.sopt_dir = SOPT_SET;
464 sopt.sopt_level = SOL_SOCKET;
465 sopt.sopt_name = SO_KEEPALIVE;
466 sopt.sopt_val = &val;
467 sopt.sopt_valsize = sizeof val;
468 val = 1;
469 sosetopt(so, &sopt);
470 }
471 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
472 struct sockopt sopt;
473 int val;
474
475 bzero(&sopt, sizeof sopt);
476 sopt.sopt_dir = SOPT_SET;
477 sopt.sopt_level = IPPROTO_TCP;
478 sopt.sopt_name = TCP_NODELAY;
479 sopt.sopt_val = &val;
480 sopt.sopt_valsize = sizeof val;
481 val = 1;
482 sosetopt(so, &sopt);
483 }
484
485 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 3;
486 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) *
487 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
488 }
489
490 if (sndreserve > NFS_MAXSOCKBUF)
491 sndreserve = NFS_MAXSOCKBUF;
492 if (rcvreserve > NFS_MAXSOCKBUF)
493 rcvreserve = NFS_MAXSOCKBUF;
494 error = soreserve(so, sndreserve, rcvreserve);
495 if (error) {
496 goto bad;
497 }
498 so->so_rcv.sb_flags |= SB_NOINTR;
499 so->so_snd.sb_flags |= SB_NOINTR;
500
501 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
502
503 /* Initialize other non-zero congestion variables */
504 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
505 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
506 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
507 nmp->nm_sdrtt[3] = 0;
508 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
509 nmp->nm_sent = 0;
510 FSDBG(529, nmp, nmp->nm_state, nmp->nm_soflags, nmp->nm_cwnd);
511 nmp->nm_timeouts = 0;
512 return (0);
513
514 bad:
515 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
516 nfs_disconnect(nmp);
517 return (error);
518 }
519
520 /*
521 * Reconnect routine:
522 * Called when a connection is broken on a reliable protocol.
523 * - clean up the old socket
524 * - nfs_connect() again
525 * - set R_MUSTRESEND for all outstanding requests on mount point
526 * If this fails the mount point is DEAD!
527 * nb: Must be called with the nfs_sndlock() set on the mount point.
528 */
529 static int
530 nfs_reconnect(rep)
531 register struct nfsreq *rep;
532 {
533 register struct nfsreq *rp;
534 register struct nfsmount *nmp = rep->r_nmp;
535 int error;
536
537 nfs_disconnect(nmp);
538 while ((error = nfs_connect(nmp, rep))) {
539 if (error == EINTR || error == ERESTART)
540 return (EINTR);
541 if (error == EIO)
542 return (EIO);
543 nfs_down(rep, "can not connect", error);
544 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
545 }
546
547 NFS_DPF(DUP, ("nfs_reconnect RESEND\n"));
548 /*
549 * Loop through outstanding request list and fix up all requests
550 * on old socket.
551 */
552 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
553 if (rp->r_nmp == nmp)
554 rp->r_flags |= R_MUSTRESEND;
555 }
556 return (0);
557 }
558
559 /*
560 * NFS disconnect. Clean up and unlink.
561 */
562 void
563 nfs_disconnect(nmp)
564 register struct nfsmount *nmp;
565 {
566 register struct socket *so;
567
568 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
569 if (nmp->nm_so) {
570 so = nmp->nm_so;
571 nmp->nm_so = (struct socket *)0;
572 soshutdown(so, 2);
573 soclose(so);
574 }
575 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
576 }
577
578 /*
579 * This is the nfs send routine. For connection based socket types, it
580 * must be called with an nfs_sndlock() on the socket.
581 * "rep == NULL" indicates that it has been called from a server.
582 * For the client side:
583 * - return EINTR if the RPC is terminated, 0 otherwise
584 * - set R_MUSTRESEND if the send fails for any reason
585 * - do any cleanup required by recoverable socket errors (???)
586 * For the server side:
587 * - return EINTR or ERESTART if interrupted by a signal
588 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
589 * - do any cleanup required by recoverable socket errors (???)
590 */
591 int
592 nfs_send(so, nam, top, rep)
593 register struct socket *so;
594 struct mbuf *nam;
595 register struct mbuf *top;
596 struct nfsreq *rep;
597 {
598 struct sockaddr *sendnam;
599 int error, error2, soflags, flags;
600 int xidqueued = 0;
601 struct nfsreq *rp;
602 char savenametolog[MNAMELEN];
603
604 if (rep) {
605 error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
606 if (error) {
607 m_freem(top);
608 return (error);
609 }
610 if ((so = rep->r_nmp->nm_so) == NULL) {
611 rep->r_flags |= R_MUSTRESEND;
612 m_freem(top);
613 return (0);
614 }
615 rep->r_flags &= ~R_MUSTRESEND;
616 soflags = rep->r_nmp->nm_soflags;
617 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
618 if (rp == rep)
619 break;
620 if (rp)
621 xidqueued = rp->r_xid;
622 } else
623 soflags = so->so_proto->pr_flags;
624 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED) ||
625 (nam == 0))
626 sendnam = (struct sockaddr *)0;
627 else
628 sendnam = mtod(nam, struct sockaddr *);
629
630 if (so->so_type == SOCK_SEQPACKET)
631 flags = MSG_EOR;
632 else
633 flags = 0;
634
635 #if NFSDIAG
636 if (rep)
637 nfsdup(rep);
638 #endif
639 /*
640 * Save the name here in case mount point goes away when we switch
641 * funnels. The name is using local stack and is large, but don't
642 * want to block if we malloc.
643 */
644 if (rep)
645 strncpy(savenametolog,
646 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname,
647 MNAMELEN);
648 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
649 error = sosend(so, sendnam, (struct uio *)0, top,
650 (struct mbuf *)0, flags);
651 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
652
653 if (error) {
654 if (rep) {
655 if (xidqueued) {
656 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
657 if (rp == rep && rp->r_xid == xidqueued)
658 break;
659 if (!rp)
660 panic("nfs_send: error %d xid %x gone",
661 error, xidqueued);
662 }
663 log(LOG_INFO, "nfs send error %d for server %s\n",
664 error, savenametolog);
665 /*
666 * Deal with errors for the client side.
667 */
668 error2 = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
669 if (error2) {
670 error = error2;
671 } else {
672 rep->r_flags |= R_MUSTRESEND;
673 NFS_DPF(DUP,
674 ("nfs_send RESEND error=%d\n", error));
675 }
676 } else
677 log(LOG_INFO, "nfsd send error %d\n", error);
678
679 /*
680 * Handle any recoverable (soft) socket errors here. (???)
681 */
682 if (error != EINTR && error != ERESTART && error != EIO &&
683 error != EWOULDBLOCK && error != EPIPE) {
684 error = 0;
685 }
686 }
687 return (error);
688 }
689
690 /*
691 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
692 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
693 * Mark and consolidate the data into a new mbuf list.
694 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
695 * small mbufs.
696 * For SOCK_STREAM we must be very careful to read an entire record once
697 * we have read any of it, even if the system call has been interrupted.
698 */
699 static int
700 nfs_receive(rep, aname, mp)
701 register struct nfsreq *rep;
702 struct mbuf **aname;
703 struct mbuf **mp;
704 {
705 register struct socket *so;
706 struct uio auio;
707 struct iovec aio;
708 register struct mbuf *m;
709 struct mbuf *control;
710 u_long len;
711 struct sockaddr **getnam;
712 struct sockaddr *tmp_nam;
713 struct mbuf *mhck;
714 struct sockaddr_in *sin;
715 int error, error2, sotype, rcvflg;
716 struct proc *p = current_proc(); /* XXX */
717
718 /*
719 * Set up arguments for soreceive()
720 */
721 *mp = (struct mbuf *)0;
722 *aname = (struct mbuf *)0;
723 sotype = rep->r_nmp->nm_sotype;
724
725 /*
726 * For reliable protocols, lock against other senders/receivers
727 * in case a reconnect is necessary.
728 * For SOCK_STREAM, first get the Record Mark to find out how much
729 * more there is to get.
730 * We must lock the socket against other receivers
731 * until we have an entire rpc request/reply.
732 */
733 if (sotype != SOCK_DGRAM) {
734 error = nfs_sndlock(rep);
735 if (error)
736 return (error);
737 tryagain:
738 /*
739 * Check for fatal errors and resending request.
740 */
741 /*
742 * Ugh: If a reconnect attempt just happened, nm_so
743 * would have changed. NULL indicates a failed
744 * attempt that has essentially shut down this
745 * mount point.
746 */
747 if ((error = nfs_sigintr(rep->r_nmp, rep, p)) || rep->r_mrep) {
748 nfs_sndunlock(rep);
749 if (error)
750 return (error);
751 return (EINTR);
752 }
753 so = rep->r_nmp->nm_so;
754 if (!so) {
755 error = nfs_reconnect(rep);
756 if (error) {
757 nfs_sndunlock(rep);
758 return (error);
759 }
760 goto tryagain;
761 }
762 while (rep->r_flags & R_MUSTRESEND) {
763 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
764 nfsstats.rpcretries++;
765 NFS_DPF(DUP,
766 ("nfs_receive RESEND %s\n",
767 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname));
768 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
769 /*
770 * we also hold rcv lock so rep is still
771 * legit this point
772 */
773 if (error) {
774 if (error == EINTR || error == ERESTART ||
775 (error = nfs_reconnect(rep))) {
776 nfs_sndunlock(rep);
777 return (error);
778 }
779 goto tryagain;
780 }
781 }
782 nfs_sndunlock(rep);
783 if (sotype == SOCK_STREAM) {
784 aio.iov_base = (caddr_t) &len;
785 aio.iov_len = sizeof(u_long);
786 auio.uio_iov = &aio;
787 auio.uio_iovcnt = 1;
788 auio.uio_segflg = UIO_SYSSPACE;
789 auio.uio_rw = UIO_READ;
790 auio.uio_offset = 0;
791 auio.uio_resid = sizeof(u_long);
792 auio.uio_procp = p;
793 do {
794 rcvflg = MSG_WAITALL;
795 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
796 error = soreceive(so, (struct sockaddr **)0, &auio,
797 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
798 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
799 if (!rep->r_nmp) /* if unmounted then bailout */
800 goto shutout;
801 if (error == EWOULDBLOCK && rep) {
802 error2 = nfs_sigintr(rep->r_nmp, rep, p);
803 if (error2)
804 error = error2;
805 }
806 } while (error == EWOULDBLOCK);
807 if (!error && auio.uio_resid > 0) {
808 log(LOG_INFO,
809 "short receive (%d/%d) from nfs server %s\n",
810 sizeof(u_long) - auio.uio_resid,
811 sizeof(u_long),
812 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
813 error = EPIPE;
814 }
815 if (error)
816 goto errout;
817 len = ntohl(len) & ~0x80000000;
818 /*
819 * This is SERIOUS! We are out of sync with the sender
820 * and forcing a disconnect/reconnect is all I can do.
821 */
822 if (len > NFS_MAXPACKET) {
823 log(LOG_ERR, "%s (%d) from nfs server %s\n",
824 "impossible packet length",
825 len,
826 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
827 error = EFBIG;
828 goto errout;
829 }
830 auio.uio_resid = len;
831
832 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
833 do {
834 rcvflg = MSG_WAITALL;
835 error = soreceive(so, (struct sockaddr **)0,
836 &auio, mp, (struct mbuf **)0, &rcvflg);
837 if (!rep->r_nmp) /* if unmounted then bailout */ {
838 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
839 goto shutout;
840 }
841 } while (error == EWOULDBLOCK || error == EINTR ||
842 error == ERESTART);
843
844 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
845
846 if (!error && auio.uio_resid > 0) {
847 log(LOG_INFO,
848 "short receive (%d/%d) from nfs server %s\n",
849 len - auio.uio_resid, len,
850 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
851 error = EPIPE;
852 }
853 } else {
854 /*
855 * NB: Since uio_resid is big, MSG_WAITALL is ignored
856 * and soreceive() will return when it has either a
857 * control msg or a data msg.
858 * We have no use for control msg., but must grab them
859 * and then throw them away so we know what is going
860 * on.
861 */
862 auio.uio_resid = len = 100000000; /* Anything Big */
863 auio.uio_procp = p;
864
865 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
866 do {
867 rcvflg = 0;
868 error = soreceive(so, (struct sockaddr **)0,
869 &auio, mp, &control, &rcvflg);
870 if (control)
871 m_freem(control);
872 if (!rep->r_nmp) /* if unmounted then bailout */ {
873 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
874 goto shutout;
875 }
876 if (error == EWOULDBLOCK && rep) {
877 error2 = nfs_sigintr(rep->r_nmp, rep, p);
878 if (error2) {
879 thread_funnel_switch(NETWORK_FUNNEL,
880 KERNEL_FUNNEL);
881 return (error2);
882 }
883 }
884 } while (error == EWOULDBLOCK ||
885 (!error && *mp == NULL && control));
886
887 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
888
889 if ((rcvflg & MSG_EOR) == 0)
890 printf("Egad!!\n");
891 if (!error && *mp == NULL)
892 error = EPIPE;
893 len -= auio.uio_resid;
894 }
895 errout:
896 if (error && error != EINTR && error != ERESTART) {
897 m_freem(*mp);
898 *mp = (struct mbuf *)0;
899 if (error != EPIPE)
900 log(LOG_INFO,
901 "receive error %d from nfs server %s\n",
902 error,
903 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
904 error = nfs_sndlock(rep);
905 if (!error)
906 error = nfs_reconnect(rep);
907 if (!error)
908 goto tryagain;
909 }
910 } else {
911 /*
912 * We could have failed while rebinding the datagram socket
913 * so we need to attempt to rebind here.
914 */
915 if ((so = rep->r_nmp->nm_so) == NULL) {
916 error = nfs_sndlock(rep);
917 if (!error) {
918 error = nfs_reconnect(rep);
919 nfs_sndunlock(rep);
920 }
921 if (error)
922 return (error);
923 if (!rep->r_nmp) /* if unmounted then bailout */
924 return (ENXIO);
925 so = rep->r_nmp->nm_so;
926 }
927 if (so->so_state & SS_ISCONNECTED)
928 getnam = (struct sockaddr **)0;
929 else
930 getnam = &tmp_nam;;
931 auio.uio_resid = len = 1000000;
932 auio.uio_procp = p;
933
934 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
935 do {
936 rcvflg = 0;
937 error = soreceive(so, getnam, &auio, mp,
938 (struct mbuf **)0, &rcvflg);
939
940 if ((getnam) && (*getnam)) {
941 MGET(mhck, M_WAIT, MT_SONAME);
942 mhck->m_len = (*getnam)->sa_len;
943 sin = mtod(mhck, struct sockaddr_in *);
944 bcopy(*getnam, sin, sizeof(struct sockaddr_in));
945 mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
946 FREE(*getnam, M_SONAME);
947 *aname = mhck;
948 }
949 if (!rep->r_nmp) /* if unmounted then bailout */
950 goto dgramout;
951 if (error) {
952 error2 = nfs_sigintr(rep->r_nmp, rep, p);
953 if (error2) {
954 error = error2;
955 goto dgramout;
956 }
957 }
958 /* Reconnect for all errors. We may be receiving
959 * soft/hard/blocking errors because of a network
960 * change.
961 * XXX: we should rate limit or delay this
962 * to once every N attempts or something.
963 * although TCP doesn't seem to.
964 */
965 if (error) {
966 thread_funnel_switch(NETWORK_FUNNEL,
967 KERNEL_FUNNEL);
968 error2 = nfs_sndlock(rep);
969 if (!error2) {
970 error2 = nfs_reconnect(rep);
971 if (error2)
972 error = error2;
973 else if (!rep->r_nmp) /* if unmounted then bailout */
974 error = ENXIO;
975 else
976 so = rep->r_nmp->nm_so;
977 nfs_sndunlock(rep);
978 } else {
979 error = error2;
980 }
981 thread_funnel_switch(KERNEL_FUNNEL,
982 NETWORK_FUNNEL);
983 }
984 } while (error == EWOULDBLOCK);
985
986 dgramout:
987 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
988 len -= auio.uio_resid;
989 }
990 shutout:
991 if (error) {
992 m_freem(*mp);
993 *mp = (struct mbuf *)0;
994 }
995 return (error);
996 }
997
998 /*
999 * Implement receipt of reply on a socket.
1000 * We must search through the list of received datagrams matching them
1001 * with outstanding requests using the xid, until ours is found.
1002 */
1003 /* ARGSUSED */
1004 int
1005 nfs_reply(myrep)
1006 struct nfsreq *myrep;
1007 {
1008 register struct nfsreq *rep;
1009 register struct nfsmount *nmp = myrep->r_nmp;
1010 register long t1;
1011 struct mbuf *mrep, *md;
1012 struct mbuf *nam;
1013 u_long rxid, *tl;
1014 caddr_t dpos, cp2;
1015 int error;
1016
1017 /*
1018 * Loop around until we get our own reply
1019 */
1020 for (;;) {
1021 /*
1022 * Lock against other receivers so that I don't get stuck in
1023 * sbwait() after someone else has received my reply for me.
1024 * Also necessary for connection based protocols to avoid
1025 * race conditions during a reconnect.
1026 * If nfs_rcvlock() returns EALREADY, that means that
1027 * the reply has already been recieved by another
1028 * process and we can return immediately. In this
1029 * case, the lock is not taken to avoid races with
1030 * other processes.
1031 */
1032 error = nfs_rcvlock(myrep);
1033 if (error == EALREADY)
1034 return (0);
1035 if (error)
1036 return (error);
1037
1038 /*
1039 * If we slept after putting bits otw, then reply may have
1040 * arrived. In which case returning is required, or we
1041 * would hang trying to nfs_receive an already received reply.
1042 */
1043 if (myrep->r_mrep != NULL) {
1044 nfs_rcvunlock(myrep);
1045 FSDBG(530, myrep->r_xid, myrep, myrep->r_nmp, -1);
1046 return (0);
1047 }
1048 /*
1049 * Get the next Rpc reply off the socket. Assume myrep->r_nmp
1050 * is still intact by checks done in nfs_rcvlock.
1051 */
1052 error = nfs_receive(myrep, &nam, &mrep);
1053 if (nam)
1054 m_freem(nam);
1055 /*
1056 * Bailout asap if nfsmount struct gone (unmounted).
1057 */
1058 if (!myrep->r_nmp || !nmp->nm_so) {
1059 FSDBG(530, myrep->r_xid, myrep, nmp, -2);
1060 return (ENXIO);
1061 }
1062 if (error) {
1063 FSDBG(530, myrep->r_xid, myrep, nmp, error);
1064 nfs_rcvunlock(myrep);
1065
1066 /* Bailout asap if nfsmount struct gone (unmounted). */
1067 if (!myrep->r_nmp || !nmp->nm_so)
1068 return (ENXIO);
1069
1070 /*
1071 * Ignore routing errors on connectionless protocols??
1072 */
1073 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
1074 nmp->nm_so->so_error = 0;
1075 if (myrep->r_flags & R_GETONEREP)
1076 return (0);
1077 continue;
1078 }
1079 return (error);
1080 }
1081
1082 /*
1083 * We assume all is fine, but if we did not have an error
1084 * and mrep is 0, better not dereference it. nfs_receieve
1085 * calls soreceive which carefully sets error=0 when it got
1086 * errors on sbwait (tsleep). In most cases, I assume that's
1087 * so we could go back again. In tcp case, EPIPE is returned.
1088 * In udp, case nfs_receive gets back here with no error and no
1089 * mrep. Is the right fix to have soreceive check for process
1090 * aborted after sbwait and return something non-zero? Should
1091 * nfs_receive give an EPIPE? Too risky to play with those
1092 * two this late in game for a shutdown problem. Instead,
1093 * just check here and get out. (ekn)
1094 */
1095 if (!mrep) {
1096 FSDBG(530, myrep->r_xid, myrep, nmp, -3);
1097 return (ENXIO); /* sounds good */
1098 }
1099
1100 /*
1101 * Get the xid and check that it is an rpc reply
1102 */
1103 md = mrep;
1104 dpos = mtod(md, caddr_t);
1105 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
1106 rxid = *tl++;
1107 if (*tl != rpc_reply) {
1108 #ifndef NFS_NOSERVER
1109 if (nmp->nm_flag & NFSMNT_NQNFS) {
1110 if (nqnfs_callback(nmp, mrep, md, dpos))
1111 nfsstats.rpcinvalid++;
1112 } else {
1113 nfsstats.rpcinvalid++;
1114 m_freem(mrep);
1115 }
1116 #else
1117 nfsstats.rpcinvalid++;
1118 m_freem(mrep);
1119 #endif
1120 nfsmout:
1121 if (nmp->nm_state & NFSSTA_RCVLOCK)
1122 nfs_rcvunlock(myrep);
1123 if (myrep->r_flags & R_GETONEREP)
1124 return (0); /* this path used by NQNFS */
1125 continue;
1126 }
1127
1128 /*
1129 * Loop through the request list to match up the reply
1130 * Iff no match, just drop the datagram
1131 */
1132 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
1133 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
1134 /* Found it.. */
1135 rep->r_mrep = mrep;
1136 rep->r_md = md;
1137 rep->r_dpos = dpos;
1138 /*
1139 * If we're tracking the round trip time
1140 * then we update the circular log here
1141 * with the stats from our current request.
1142 */
1143 if (nfsrtton) {
1144 struct rttl *rt;
1145
1146 rt = &nfsrtt.rttl[nfsrtt.pos];
1147 rt->proc = rep->r_procnum;
1148 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
1149 rt->sent = nmp->nm_sent;
1150 rt->cwnd = nmp->nm_cwnd;
1151 if (proct[rep->r_procnum] == 0)
1152 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1153 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
1154 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
1155 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
1156 microtime(&rt->tstamp); // XXX unused
1157 if (rep->r_flags & R_TIMING)
1158 rt->rtt = rep->r_rtt;
1159 else
1160 rt->rtt = 1000000;
1161 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
1162 }
1163 /*
1164 * Update congestion window.
1165 * Do the additive increase of
1166 * one rpc/rtt.
1167 */
1168 FSDBG(530, rep->r_xid, rep, nmp->nm_sent,
1169 nmp->nm_cwnd);
1170 if (nmp->nm_cwnd <= nmp->nm_sent) {
1171 nmp->nm_cwnd +=
1172 (NFS_CWNDSCALE * NFS_CWNDSCALE +
1173 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
1174 if (nmp->nm_cwnd > NFS_MAXCWND)
1175 nmp->nm_cwnd = NFS_MAXCWND;
1176 }
1177 if (rep->r_flags & R_SENT) {
1178 rep->r_flags &= ~R_SENT;
1179 nmp->nm_sent -= NFS_CWNDSCALE;
1180 }
1181 /*
1182 * Update rtt using a gain of 0.125 on the mean
1183 * and a gain of 0.25 on the deviation.
1184 */
1185 if (rep->r_flags & R_TIMING) {
1186 /*
1187 * Since the timer resolution of
1188 * NFS_HZ is so course, it can often
1189 * result in r_rtt == 0. Since
1190 * r_rtt == N means that the actual
1191 * rtt is between N+dt and N+2-dt ticks,
1192 * add 1.
1193 */
1194 if (proct[rep->r_procnum] == 0)
1195 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1196 t1 = rep->r_rtt + 1;
1197 t1 -= (NFS_SRTT(rep) >> 3);
1198 NFS_SRTT(rep) += t1;
1199 if (t1 < 0)
1200 t1 = -t1;
1201 t1 -= (NFS_SDRTT(rep) >> 2);
1202 NFS_SDRTT(rep) += t1;
1203 }
1204 nmp->nm_timeouts = 0;
1205 break;
1206 }
1207 }
1208 nfs_rcvunlock(myrep);
1209 /*
1210 * If not matched to a request, drop it.
1211 * If it's mine, get out.
1212 */
1213 if (rep == 0) {
1214 nfsstats.rpcunexpected++;
1215 m_freem(mrep);
1216 } else if (rep == myrep) {
1217 if (rep->r_mrep == NULL)
1218 panic("nfs_reply: nil r_mrep");
1219 return (0);
1220 }
1221 FSDBG(530, myrep->r_xid, myrep, rep,
1222 rep ? rep->r_xid : myrep->r_flags);
1223 if (myrep->r_flags & R_GETONEREP)
1224 return (0); /* this path used by NQNFS */
1225 }
1226 }
1227
1228 /*
1229 * nfs_request - goes something like this
1230 * - fill in request struct
1231 * - links it into list
1232 * - calls nfs_send() for first transmit
1233 * - calls nfs_receive() to get reply
1234 * - break down rpc header and return with nfs reply pointed to
1235 * by mrep or error
1236 * nb: always frees up mreq mbuf list
1237 */
1238 int
1239 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp)
1240 struct vnode *vp;
1241 struct mbuf *mrest;
1242 int procnum;
1243 struct proc *procp;
1244 struct ucred *cred;
1245 struct mbuf **mrp;
1246 struct mbuf **mdp;
1247 caddr_t *dposp;
1248 u_int64_t *xidp;
1249 {
1250 register struct mbuf *m, *mrep, *m2;
1251 register struct nfsreq *rep, *rp;
1252 register u_long *tl;
1253 register int i;
1254 struct nfsmount *nmp;
1255 struct mbuf *md, *mheadend;
1256 struct nfsnode *np;
1257 char nickv[RPCX_NICKVERF];
1258 time_t reqtime, waituntil;
1259 caddr_t dpos, cp2;
1260 int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
1261 int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
1262 int verf_len, verf_type;
1263 u_long xid;
1264 u_quad_t frev;
1265 char *auth_str, *verf_str;
1266 NFSKERBKEY_T key; /* save session key */
1267 int nmsotype;
1268 struct timeval now;
1269
1270 if (xidp)
1271 *xidp = 0;
1272
1273 MALLOC_ZONE(rep, struct nfsreq *,
1274 sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
1275
1276 nmp = VFSTONFS(vp->v_mount);
1277 if (nmp == NULL ||
1278 (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1279 (NFSSTA_FORCE|NFSSTA_TIMEO)) {
1280 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1281 return (ENXIO);
1282 }
1283 nmsotype = nmp->nm_sotype;
1284
1285 FSDBG_TOP(531, vp, procnum, nmp, rep);
1286
1287 rep->r_nmp = nmp;
1288 rep->r_vp = vp;
1289 rep->r_procp = procp;
1290 rep->r_procnum = procnum;
1291 microuptime(&now);
1292 rep->r_lastmsg = now.tv_sec -
1293 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
1294 i = 0;
1295 m = mrest;
1296 while (m) {
1297 i += m->m_len;
1298 m = m->m_next;
1299 }
1300 mrest_len = i;
1301
1302 /*
1303 * Get the RPC header with authorization.
1304 */
1305 kerbauth:
1306 nmp = VFSTONFS(vp->v_mount);
1307 if (!nmp) {
1308 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1309 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1310 return (ENXIO);
1311 }
1312 verf_str = auth_str = (char *)0;
1313 if (nmp->nm_flag & NFSMNT_KERB) {
1314 verf_str = nickv;
1315 verf_len = sizeof (nickv);
1316 auth_type = RPCAUTH_KERB4;
1317 bzero((caddr_t)key, sizeof (key));
1318 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
1319 &auth_len, verf_str, verf_len)) {
1320 nmp = VFSTONFS(vp->v_mount);
1321 if (!nmp) {
1322 FSDBG_BOT(531, 2, vp, error, rep);
1323 FREE_ZONE((caddr_t)rep,
1324 sizeof (struct nfsreq), M_NFSREQ);
1325 m_freem(mrest);
1326 return (ENXIO);
1327 }
1328 error = nfs_getauth(nmp, rep, cred, &auth_str,
1329 &auth_len, verf_str, &verf_len, key);
1330 nmp = VFSTONFS(vp->v_mount);
1331 if (!error && !nmp)
1332 error = ENXIO;
1333 if (error) {
1334 FSDBG_BOT(531, 2, vp, error, rep);
1335 FREE_ZONE((caddr_t)rep,
1336 sizeof (struct nfsreq), M_NFSREQ);
1337 m_freem(mrest);
1338 return (error);
1339 }
1340 }
1341 } else {
1342 auth_type = RPCAUTH_UNIX;
1343 if (cred->cr_ngroups < 1)
1344 panic("nfsreq nogrps");
1345 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1346 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
1347 5 * NFSX_UNSIGNED;
1348 }
1349 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
1350 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
1351 if (xidp)
1352 *xidp = ntohl(xid) + ((u_int64_t)nfs_xidwrap << 32);
1353 if (auth_str)
1354 _FREE(auth_str, M_TEMP);
1355
1356 /*
1357 * For stream protocols, insert a Sun RPC Record Mark.
1358 */
1359 if (nmsotype == SOCK_STREAM) {
1360 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
1361 *mtod(m, u_long *) = htonl(0x80000000 |
1362 (m->m_pkthdr.len - NFSX_UNSIGNED));
1363 }
1364 rep->r_mreq = m;
1365 rep->r_xid = xid;
1366 tryagain:
1367 nmp = VFSTONFS(vp->v_mount);
1368 if (nmp && (nmp->nm_flag & NFSMNT_SOFT))
1369 rep->r_retry = nmp->nm_retry;
1370 else
1371 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1372 rep->r_rtt = rep->r_rexmit = 0;
1373 if (proct[procnum] > 0)
1374 rep->r_flags = R_TIMING;
1375 else
1376 rep->r_flags = 0;
1377 rep->r_mrep = NULL;
1378
1379 /*
1380 * Do the client side RPC.
1381 */
1382 nfsstats.rpcrequests++;
1383 /*
1384 * Chain request into list of outstanding requests. Be sure
1385 * to put it LAST so timer finds oldest requests first.
1386 */
1387 s = splsoftclock();
1388 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
1389
1390 /* Get send time for nqnfs */
1391 microtime(&now);
1392 reqtime = now.tv_sec;
1393
1394 /*
1395 * If backing off another request or avoiding congestion, don't
1396 * send this one now but let timer do it. If not timing a request,
1397 * do it now.
1398 */
1399 if (nmp && nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
1400 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1401 nmp->nm_sent < nmp->nm_cwnd)) {
1402 int connrequired = (nmp->nm_soflags & PR_CONNREQUIRED);
1403
1404 splx(s);
1405 if (connrequired)
1406 error = nfs_sndlock(rep);
1407
1408 /*
1409 * Set the R_SENT before doing the send in case another thread
1410 * processes the reply before the nfs_send returns here
1411 */
1412 if (!error) {
1413 if ((rep->r_flags & R_MUSTRESEND) == 0) {
1414 FSDBG(531, rep->r_xid, rep, nmp->nm_sent,
1415 nmp->nm_cwnd);
1416 nmp->nm_sent += NFS_CWNDSCALE;
1417 rep->r_flags |= R_SENT;
1418 }
1419
1420 m2 = m_copym(m, 0, M_COPYALL, M_WAIT);
1421 error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
1422 if (connrequired)
1423 nfs_sndunlock(rep);
1424 }
1425 nmp = VFSTONFS(vp->v_mount);
1426 if (error) {
1427 if (nmp)
1428 nmp->nm_sent -= NFS_CWNDSCALE;
1429 rep->r_flags &= ~R_SENT;
1430 }
1431 } else {
1432 splx(s);
1433 rep->r_rtt = -1;
1434 }
1435
1436 /*
1437 * Wait for the reply from our send or the timer's.
1438 */
1439 if (!error || error == EPIPE)
1440 error = nfs_reply(rep);
1441
1442 /*
1443 * RPC done, unlink the request.
1444 */
1445 nfs_repdequeue(rep);
1446
1447 nmp = VFSTONFS(vp->v_mount);
1448
1449 /*
1450 * Decrement the outstanding request count.
1451 */
1452 if (rep->r_flags & R_SENT) {
1453 rep->r_flags &= ~R_SENT; /* paranoia */
1454 if (nmp) {
1455 FSDBG(531, rep->r_xid, rep, nmp->nm_sent, nmp->nm_cwnd);
1456 nmp->nm_sent -= NFS_CWNDSCALE;
1457 }
1458 }
1459
1460 /*
1461 * If there was a successful reply and a tprintf msg.
1462 * tprintf a response.
1463 */
1464 nfs_up(rep, "is alive again", error);
1465 mrep = rep->r_mrep;
1466 md = rep->r_md;
1467 dpos = rep->r_dpos;
1468 if (!error && !nmp)
1469 error = ENXIO;
1470 if (error) {
1471 m_freem(rep->r_mreq);
1472 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1473 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1474 return (error);
1475 }
1476
1477 /*
1478 * break down the rpc header and check if ok
1479 */
1480 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
1481 if (*tl++ == rpc_msgdenied) {
1482 if (*tl == rpc_mismatch)
1483 error = EOPNOTSUPP;
1484 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1485 if (!failed_auth) {
1486 failed_auth++;
1487 mheadend->m_next = (struct mbuf *)0;
1488 m_freem(mrep);
1489 m_freem(rep->r_mreq);
1490 goto kerbauth;
1491 } else
1492 error = EAUTH;
1493 } else
1494 error = EACCES;
1495 m_freem(mrep);
1496 m_freem(rep->r_mreq);
1497 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1498 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1499 return (error);
1500 }
1501
1502 /*
1503 * Grab any Kerberos verifier, otherwise just throw it away.
1504 */
1505 verf_type = fxdr_unsigned(int, *tl++);
1506 i = fxdr_unsigned(int, *tl);
1507 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1508 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1509 if (error)
1510 goto nfsmout;
1511 } else if (i > 0)
1512 nfsm_adv(nfsm_rndup(i));
1513 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1514 /* 0 == ok */
1515 if (*tl == 0) {
1516 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1517 if (*tl != 0) {
1518 error = fxdr_unsigned(int, *tl);
1519 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1520 error == NFSERR_TRYLATER) {
1521 m_freem(mrep);
1522 error = 0;
1523 microuptime(&now);
1524 waituntil = now.tv_sec + trylater_delay;
1525 NFS_DPF(DUP,
1526 ("nfs_request %s flag=%x trylater_cnt=%x waituntil=%lx trylater_delay=%x\n",
1527 nmp->nm_mountp->mnt_stat.f_mntfromname,
1528 nmp->nm_flag, trylater_cnt, waituntil,
1529 trylater_delay));
1530 while (now.tv_sec < waituntil) {
1531 (void)tsleep((caddr_t)&lbolt,
1532 PSOCK, "nqnfstry", 0);
1533 microuptime(&now);
1534 }
1535 trylater_delay *= 2;
1536 if (trylater_delay > 60)
1537 trylater_delay = 60;
1538 if (trylater_cnt < 7)
1539 trylater_cnt++;
1540 goto tryagain;
1541 }
1542
1543 /*
1544 * If the File Handle was stale, invalidate the
1545 * lookup cache, just in case.
1546 */
1547 if (error == ESTALE)
1548 cache_purge(vp);
1549 if (nmp->nm_flag & NFSMNT_NFSV3) {
1550 *mrp = mrep;
1551 *mdp = md;
1552 *dposp = dpos;
1553 error |= NFSERR_RETERR;
1554 } else
1555 m_freem(mrep);
1556 m_freem(rep->r_mreq);
1557 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1558 FREE_ZONE((caddr_t)rep,
1559 sizeof (struct nfsreq), M_NFSREQ);
1560 return (error);
1561 }
1562
1563 /*
1564 * For nqnfs, get any lease in reply
1565 */
1566 if (nmp->nm_flag & NFSMNT_NQNFS) {
1567 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1568 if (*tl) {
1569 np = VTONFS(vp);
1570 nqlflag = fxdr_unsigned(int, *tl);
1571 nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
1572 cachable = fxdr_unsigned(int, *tl++);
1573 reqtime += fxdr_unsigned(int, *tl++);
1574 microtime(&now);
1575 if (reqtime > now.tv_sec) {
1576 fxdr_hyper(tl, &frev);
1577 nqnfs_clientlease(nmp, np, nqlflag,
1578 cachable, reqtime, frev);
1579 }
1580 }
1581 }
1582 *mrp = mrep;
1583 *mdp = md;
1584 *dposp = dpos;
1585 m_freem(rep->r_mreq);
1586 FSDBG_BOT(531, 0xf0f0f0f0, rep->r_xid, nmp, rep);
1587 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1588 return (0);
1589 }
1590 m_freem(mrep);
1591 error = EPROTONOSUPPORT;
1592 nfsmout:
1593 m_freem(rep->r_mreq);
1594 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1595 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1596 return (error);
1597 }
1598
1599 #ifndef NFS_NOSERVER
1600 /*
1601 * Generate the rpc reply header
1602 * siz arg. is used to decide if adding a cluster is worthwhile
1603 */
1604 int
1605 nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
1606 int siz;
1607 struct nfsrv_descript *nd;
1608 struct nfssvc_sock *slp;
1609 int err;
1610 int cache;
1611 u_quad_t *frev;
1612 struct mbuf **mrq;
1613 struct mbuf **mbp;
1614 caddr_t *bposp;
1615 {
1616 register u_long *tl;
1617 register struct mbuf *mreq;
1618 caddr_t bpos;
1619 struct mbuf *mb, *mb2;
1620
1621 MGETHDR(mreq, M_WAIT, MT_DATA);
1622 mb = mreq;
1623 /*
1624 * If this is a big reply, use a cluster else
1625 * try and leave leading space for the lower level headers.
1626 */
1627 siz += RPC_REPLYSIZ;
1628 if (siz >= MINCLSIZE) {
1629 MCLGET(mreq, M_WAIT);
1630 } else
1631 mreq->m_data += max_hdr;
1632 tl = mtod(mreq, u_long *);
1633 mreq->m_len = 6 * NFSX_UNSIGNED;
1634 bpos = ((caddr_t)tl) + mreq->m_len;
1635 *tl++ = txdr_unsigned(nd->nd_retxid);
1636 *tl++ = rpc_reply;
1637 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1638 *tl++ = rpc_msgdenied;
1639 if (err & NFSERR_AUTHERR) {
1640 *tl++ = rpc_autherr;
1641 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1642 mreq->m_len -= NFSX_UNSIGNED;
1643 bpos -= NFSX_UNSIGNED;
1644 } else {
1645 *tl++ = rpc_mismatch;
1646 *tl++ = txdr_unsigned(RPC_VER2);
1647 *tl = txdr_unsigned(RPC_VER2);
1648 }
1649 } else {
1650 *tl++ = rpc_msgaccepted;
1651
1652 /*
1653 * For Kerberos authentication, we must send the nickname
1654 * verifier back, otherwise just RPCAUTH_NULL.
1655 */
1656 if (nd->nd_flag & ND_KERBFULL) {
1657 register struct nfsuid *nuidp;
1658 struct timeval ktvin, ktvout;
1659
1660 for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
1661 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1662 if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
1663 (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
1664 &nuidp->nu_haddr, nd->nd_nam2)))
1665 break;
1666 }
1667 if (nuidp) {
1668 ktvin.tv_sec =
1669 txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
1670 ktvin.tv_usec =
1671 txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1672
1673 /*
1674 * Encrypt the timestamp in ecb mode using the
1675 * session key.
1676 */
1677 #if NFSKERB
1678 XXX
1679 #endif
1680
1681 *tl++ = rpc_auth_kerb;
1682 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1683 *tl = ktvout.tv_sec;
1684 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1685 *tl++ = ktvout.tv_usec;
1686 *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
1687 } else {
1688 *tl++ = 0;
1689 *tl++ = 0;
1690 }
1691 } else {
1692 *tl++ = 0;
1693 *tl++ = 0;
1694 }
1695 switch (err) {
1696 case EPROGUNAVAIL:
1697 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1698 break;
1699 case EPROGMISMATCH:
1700 *tl = txdr_unsigned(RPC_PROGMISMATCH);
1701 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1702 if (nd->nd_flag & ND_NQNFS) {
1703 *tl++ = txdr_unsigned(3);
1704 *tl = txdr_unsigned(3);
1705 } else {
1706 *tl++ = txdr_unsigned(2);
1707 *tl = txdr_unsigned(3);
1708 }
1709 break;
1710 case EPROCUNAVAIL:
1711 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1712 break;
1713 case EBADRPC:
1714 *tl = txdr_unsigned(RPC_GARBAGE);
1715 break;
1716 default:
1717 *tl = 0;
1718 if (err != NFSERR_RETVOID) {
1719 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1720 if (err)
1721 *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1722 else
1723 *tl = 0;
1724 }
1725 break;
1726 };
1727 }
1728
1729 /*
1730 * For nqnfs, piggyback lease as requested.
1731 */
1732 if ((nd->nd_flag & ND_NQNFS) && err == 0) {
1733 if (nd->nd_flag & ND_LEASE) {
1734 nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
1735 *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
1736 *tl++ = txdr_unsigned(cache);
1737 *tl++ = txdr_unsigned(nd->nd_duration);
1738 txdr_hyper(frev, tl);
1739 } else {
1740 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1741 *tl = 0;
1742 }
1743 }
1744 if (mrq != NULL)
1745 *mrq = mreq;
1746 *mbp = mb;
1747 *bposp = bpos;
1748 if (err != 0 && err != NFSERR_RETVOID)
1749 nfsstats.srvrpc_errs++;
1750 return (0);
1751 }
1752
1753
1754 #endif /* NFS_NOSERVER */
1755
1756
1757 /*
1758 * From FreeBSD 1.58, a Matt Dillon fix...
1759 * Flag a request as being about to terminate.
1760 * The nm_sent count is decremented now to avoid deadlocks when the process
1761 * in soreceive() hasn't yet managed to send its own request.
1762 */
1763 static void
1764 nfs_softterm(struct nfsreq *rep)
1765 {
1766
1767 rep->r_flags |= R_SOFTTERM;
1768 if (rep->r_flags & R_SENT) {
1769 FSDBG(532, rep->r_xid, rep, rep->r_nmp->nm_sent,
1770 rep->r_nmp->nm_cwnd);
1771 rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
1772 rep->r_flags &= ~R_SENT;
1773 }
1774 }
1775
1776 void
1777 nfs_timer_funnel(arg)
1778 void * arg;
1779 {
1780 (void) thread_funnel_set(kernel_flock, TRUE);
1781 nfs_timer(arg);
1782 (void) thread_funnel_set(kernel_flock, FALSE);
1783
1784 }
1785
1786 /*
1787 * Ensure rep isn't in use by the timer, then dequeue it.
1788 */
1789 void
1790 nfs_repdequeue(struct nfsreq *rep)
1791 {
1792 int s;
1793
1794 while ((rep->r_flags & R_BUSY)) {
1795 rep->r_flags |= R_WAITING;
1796 tsleep(rep, PSOCK, "repdeq", 0);
1797 }
1798 s = splsoftclock();
1799 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
1800 splx(s);
1801 }
1802
1803 /*
1804 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
1805 * free()'d out from under it.
1806 */
1807 void
1808 nfs_repbusy(struct nfsreq *rep)
1809 {
1810
1811 if ((rep->r_flags & R_BUSY))
1812 panic("rep locked");
1813 rep->r_flags |= R_BUSY;
1814 }
1815
1816 /*
1817 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
1818 */
1819 struct nfsreq *
1820 nfs_repnext(struct nfsreq *rep)
1821 {
1822 struct nfsreq * nextrep;
1823
1824 if (rep == NULL)
1825 return (NULL);
1826 /*
1827 * We need to get and busy the next req before signalling the
1828 * current one, otherwise wakeup() may block us and we'll race to
1829 * grab the next req.
1830 */
1831 nextrep = TAILQ_NEXT(rep, r_chain);
1832 if (nextrep != NULL)
1833 nfs_repbusy(nextrep);
1834 /* unbusy and signal. */
1835 rep->r_flags &= ~R_BUSY;
1836 if ((rep->r_flags & R_WAITING)) {
1837 rep->r_flags &= ~R_WAITING;
1838 wakeup(rep);
1839 }
1840 return (nextrep);
1841 }
1842
1843 /*
1844 * Nfs timer routine
1845 * Scan the nfsreq list and retranmit any requests that have timed out
1846 * To avoid retransmission attempts on STREAM sockets (in the future) make
1847 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1848 */
1849 void
1850 nfs_timer(arg)
1851 void *arg; /* never used */
1852 {
1853 register struct nfsreq *rep;
1854 register struct mbuf *m;
1855 register struct socket *so;
1856 register struct nfsmount *nmp;
1857 register int timeo;
1858 int s, error;
1859 #ifndef NFS_NOSERVER
1860 static long lasttime = 0;
1861 register struct nfssvc_sock *slp;
1862 u_quad_t cur_usec;
1863 #endif /* NFS_NOSERVER */
1864 #if NFSDIAG
1865 int rttdiag;
1866 #endif
1867 int flags, rexmit, cwnd, sent;
1868 u_long xid;
1869 struct timeval now;
1870
1871 s = splnet();
1872 /*
1873 * XXX If preemptable threads are implemented the spls used for the
1874 * outstanding request queue must be replaced with mutexes.
1875 */
1876 #ifdef NFSTRACESUSPENDERS
1877 if (NFSTRACE_SUSPENDING) {
1878 TAILQ_FOREACH(rep, &nfs_reqq, r_chain)
1879 if (rep->r_xid == nfstracexid)
1880 break;
1881 if (!rep) {
1882 NFSTRACE_RESUME;
1883 } else if (NFSTRACE_SUSPENSEOVER) {
1884 NFSTRACE_SUSPEND;
1885 }
1886 }
1887 #endif
1888 rep = TAILQ_FIRST(&nfs_reqq);
1889 if (rep != NULL)
1890 nfs_repbusy(rep);
1891 microuptime(&now);
1892 for ( ; rep != NULL ; rep = nfs_repnext(rep)) {
1893 #ifdef NFSTRACESUSPENDERS
1894 if (rep->r_mrep && !NFSTRACE_SUSPENDING) {
1895 nfstracexid = rep->r_xid;
1896 NFSTRACE_STARTSUSPENDCOUNTDOWN;
1897 }
1898 #endif
1899 nmp = rep->r_nmp;
1900 if (!nmp) /* unmounted */
1901 continue;
1902 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1903 continue;
1904 if (nfs_sigintr(nmp, rep, rep->r_procp))
1905 continue;
1906 if (nmp->nm_tprintf_initial_delay != 0 &&
1907 (rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) &&
1908 rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
1909 rep->r_lastmsg = now.tv_sec;
1910 nfs_down(rep, "not responding", 0);
1911 }
1912 if (rep->r_rtt >= 0) {
1913 rep->r_rtt++;
1914 if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1915 timeo = nmp->nm_timeo;
1916 else
1917 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
1918 /* ensure 62.5 ms floor */
1919 while (16 * timeo < hz)
1920 timeo *= 2;
1921 if (nmp->nm_timeouts > 0)
1922 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1923 if (rep->r_rtt <= timeo)
1924 continue;
1925 if (nmp->nm_timeouts < 8)
1926 nmp->nm_timeouts++;
1927 }
1928 /*
1929 * Check for too many retransmits. This is never true for
1930 * 'hard' mounts because we set r_retry to NFS_MAXREXMIT + 1
1931 * and never allow r_rexmit to be more than NFS_MAXREXMIT.
1932 */
1933 if (rep->r_rexmit >= rep->r_retry) { /* too many */
1934 nfsstats.rpctimeouts++;
1935 nfs_softterm(rep);
1936 continue;
1937 }
1938 if (nmp->nm_sotype != SOCK_DGRAM) {
1939 if (++rep->r_rexmit > NFS_MAXREXMIT)
1940 rep->r_rexmit = NFS_MAXREXMIT;
1941 continue;
1942 }
1943 if ((so = nmp->nm_so) == NULL)
1944 continue;
1945
1946 /*
1947 * If there is enough space and the window allows..
1948 * Resend it
1949 * Set r_rtt to -1 in case we fail to send it now.
1950 */
1951 #if NFSDIAG
1952 rttdiag = rep->r_rtt;
1953 #endif
1954 rep->r_rtt = -1;
1955 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1956 ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1957 (rep->r_flags & R_SENT) ||
1958 nmp->nm_sent < nmp->nm_cwnd) &&
1959 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1960
1961 struct proc *p = current_proc();
1962
1963 #if NFSDIAG
1964 if (rep->r_flags & R_SENT && nfsprnttimo &&
1965 nmp->nm_timeouts >= nfsprnttimo) {
1966 int t = proct[rep->r_procnum];
1967 if (t)
1968 NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d A=%d D=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum, nmp->nm_srtt[t-1], nmp->nm_sdrtt[t-1]));
1969 else
1970 NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum));
1971 }
1972 nfsdup(rep);
1973 #endif /* NFSDIAG */
1974 /*
1975 * Iff first send, start timing
1976 * else turn timing off, backoff timer
1977 * and divide congestion window by 2.
1978 * We update these *before* the send to avoid
1979 * racing against receiving the reply.
1980 * We save them so we can restore them on send error.
1981 */
1982 flags = rep->r_flags;
1983 rexmit = rep->r_rexmit;
1984 cwnd = nmp->nm_cwnd;
1985 sent = nmp->nm_sent;
1986 xid = rep->r_xid;
1987 if (rep->r_flags & R_SENT) {
1988 rep->r_flags &= ~R_TIMING;
1989 if (++rep->r_rexmit > NFS_MAXREXMIT)
1990 rep->r_rexmit = NFS_MAXREXMIT;
1991 nmp->nm_cwnd >>= 1;
1992 if (nmp->nm_cwnd < NFS_CWNDSCALE)
1993 nmp->nm_cwnd = NFS_CWNDSCALE;
1994 nfsstats.rpcretries++;
1995 } else {
1996 rep->r_flags |= R_SENT;
1997 nmp->nm_sent += NFS_CWNDSCALE;
1998 }
1999 FSDBG(535, xid, rep, nmp->nm_sent, nmp->nm_cwnd);
2000
2001 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
2002
2003 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
2004 error = (*so->so_proto->pr_usrreqs->pru_send)
2005 (so, 0, m, 0, 0, p);
2006 else
2007 error = (*so->so_proto->pr_usrreqs->pru_send)
2008 (so, 0, m, mtod(nmp->nm_nam, struct sockaddr *), 0, p);
2009
2010 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
2011
2012 FSDBG(535, xid, error, sent, cwnd);
2013
2014 if (error) {
2015 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
2016 so->so_error = 0;
2017 rep->r_flags = flags | R_RESENDERR;
2018 rep->r_rexmit = rexmit;
2019 nmp->nm_cwnd = cwnd;
2020 nmp->nm_sent = sent;
2021 if (flags & R_SENT)
2022 nfsstats.rpcretries--;
2023 } else
2024 rep->r_rtt = 0;
2025 }
2026 }
2027 #ifndef NFS_NOSERVER
2028 /*
2029 * Call the nqnfs server timer once a second to handle leases.
2030 */
2031 microuptime(&now);
2032 if (lasttime != now.tv_sec) {
2033 lasttime = now.tv_sec;
2034 nqnfs_serverd();
2035 }
2036
2037 /*
2038 * Scan the write gathering queues for writes that need to be
2039 * completed now.
2040 */
2041 cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
2042 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
2043 if (LIST_FIRST(&slp->ns_tq) &&
2044 LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
2045 nfsrv_wakenfsd(slp);
2046 }
2047 #endif /* NFS_NOSERVER */
2048 splx(s);
2049 timeout(nfs_timer_funnel, (void *)0, nfs_ticks);
2050
2051 }
2052
2053
2054 /*
2055 * Test for a termination condition pending on the process.
2056 * This is used to determine if we need to bail on a mount.
2057 * EIO is returned if there has been a soft timeout.
2058 * EINTR is returned if there is a signal pending that is not being ignored
2059 * and the mount is interruptable, or if we are a thread that is in the process
2060 * of cancellation (also SIGKILL posted).
2061 */
2062 int
2063 nfs_sigintr(nmp, rep, p)
2064 struct nfsmount *nmp;
2065 struct nfsreq *rep;
2066 struct proc *p;
2067 {
2068 struct uthread *curr_td;
2069 sigset_t pending_sigs;
2070 int context_good = 0;
2071 struct nfsmount *repnmp;
2072
2073 if (nmp == NULL)
2074 return (ENXIO);
2075 if (rep != NULL) {
2076 repnmp = rep->r_nmp;
2077 /* we've had a forced unmount. */
2078 if (repnmp == NULL)
2079 return (ENXIO);
2080 /* request has timed out on a 'soft' mount. */
2081 if (rep->r_flags & R_SOFTTERM)
2082 return (EIO);
2083 /*
2084 * We're in the progress of a force unmount and there's
2085 * been a timeout we're dead and fail IO.
2086 */
2087 if ((repnmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
2088 (NFSSTA_FORCE|NFSSTA_TIMEO))
2089 return (EIO);
2090 /* Someone is unmounting us, go soft and mark it. */
2091 if ((repnmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT)) {
2092 repnmp->nm_flag |= NFSMNT_SOFT;
2093 nmp->nm_state |= NFSSTA_FORCE;
2094 }
2095 /*
2096 * If the mount is hung and we've requested not to hang
2097 * on remote filesystems, then bail now.
2098 */
2099 if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0 &&
2100 (repnmp->nm_state & NFSSTA_TIMEO) != 0)
2101 return (EIO);
2102 }
2103 /* XXX: is this valid? this probably should be an assertion. */
2104 if (p == NULL)
2105 return (0);
2106
2107 /*
2108 * XXX: Since nfs doesn't have a good shot at getting the current
2109 * thread we take a guess. (only struct proc * are passed to VOPs)
2110 * What we do is look at the current thread, if it belongs to the
2111 * passed in proc pointer then we have a "good/accurate" context
2112 * and can make an accurate guess as to what to do.
2113 * However if we have a bad context we have to make due with what
2114 * is in the proc struct which may not be as up to date as we'd
2115 * like.
2116 * This is ok because the process will call us with the correct
2117 * context after a short timeout while waiting for a response.
2118 */
2119 curr_td = (struct uthread *)get_bsdthread_info(current_act());
2120 if (curr_td->uu_proc == p)
2121 context_good = 1;
2122 if (context_good && current_thread_aborted())
2123 return (EINTR);
2124 /* mask off thread and process blocked signals. */
2125 if (context_good)
2126 pending_sigs = curr_td->uu_siglist & ~curr_td->uu_sigmask;
2127 else
2128 pending_sigs = p->p_siglist;
2129 /* mask off process level and NFS ignored signals. */
2130 pending_sigs &= ~p->p_sigignore & NFSINT_SIGMASK;
2131 if (pending_sigs && (nmp->nm_flag & NFSMNT_INT) != 0)
2132 return (EINTR);
2133 return (0);
2134 }
2135
2136 /*
2137 * Lock a socket against others.
2138 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2139 * and also to avoid race conditions between the processes with nfs requests
2140 * in progress when a reconnect is necessary.
2141 */
2142 int
2143 nfs_sndlock(rep)
2144 struct nfsreq *rep;
2145 {
2146 register int *statep;
2147 struct proc *p;
2148 int error, slpflag = 0, slptimeo = 0;
2149
2150 if (rep->r_nmp == NULL)
2151 return (ENXIO);
2152 statep = &rep->r_nmp->nm_state;
2153
2154 p = rep->r_procp;
2155 if (rep->r_nmp->nm_flag & NFSMNT_INT)
2156 slpflag = PCATCH;
2157 while (*statep & NFSSTA_SNDLOCK) {
2158 error = nfs_sigintr(rep->r_nmp, rep, p);
2159 if (error)
2160 return (error);
2161 *statep |= NFSSTA_WANTSND;
2162 if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0)
2163 slptimeo = hz;
2164 (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
2165 "nfsndlck", slptimeo);
2166 if (slpflag == PCATCH) {
2167 slpflag = 0;
2168 slptimeo = 2 * hz;
2169 }
2170 /*
2171 * Make sure while we slept that the mountpoint didn't go away.
2172 * nfs_sigintr and callers expect it in tact.
2173 */
2174 if (!rep->r_nmp)
2175 return (ENXIO); /* don't have lock until out of loop */
2176 }
2177 *statep |= NFSSTA_SNDLOCK;
2178 return (0);
2179 }
2180
2181 /*
2182 * Unlock the stream socket for others.
2183 */
2184 void
2185 nfs_sndunlock(rep)
2186 struct nfsreq *rep;
2187 {
2188 register int *statep;
2189
2190 if (rep->r_nmp == NULL)
2191 return;
2192 statep = &rep->r_nmp->nm_state;
2193 if ((*statep & NFSSTA_SNDLOCK) == 0)
2194 panic("nfs sndunlock");
2195 *statep &= ~NFSSTA_SNDLOCK;
2196 if (*statep & NFSSTA_WANTSND) {
2197 *statep &= ~NFSSTA_WANTSND;
2198 wakeup((caddr_t)statep);
2199 }
2200 }
2201
2202 static int
2203 nfs_rcvlock(rep)
2204 register struct nfsreq *rep;
2205 {
2206 register int *statep;
2207 int error, slpflag, slptimeo = 0;
2208
2209 /* make sure we still have our mountpoint */
2210 if (!rep->r_nmp) {
2211 if (rep->r_mrep != NULL)
2212 return (EALREADY);
2213 return (ENXIO);
2214 }
2215
2216 statep = &rep->r_nmp->nm_state;
2217 FSDBG_TOP(534, rep->r_xid, rep, rep->r_nmp, *statep);
2218 if (rep->r_nmp->nm_flag & NFSMNT_INT)
2219 slpflag = PCATCH;
2220 else
2221 slpflag = 0;
2222 while (*statep & NFSSTA_RCVLOCK) {
2223 if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp))) {
2224 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x100);
2225 return (error);
2226 } else if (rep->r_mrep != NULL) {
2227 /*
2228 * Don't bother sleeping if reply already arrived
2229 */
2230 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x101);
2231 return (EALREADY);
2232 }
2233 FSDBG(534, rep->r_xid, rep, rep->r_nmp, 0x102);
2234 *statep |= NFSSTA_WANTRCV;
2235 /*
2236 * We need to poll if we're P_NOREMOTEHANG so that we
2237 * call nfs_sigintr periodically above.
2238 */
2239 if (rep->r_procp != NULL &&
2240 (rep->r_procp->p_flag & P_NOREMOTEHANG) != 0)
2241 slptimeo = hz;
2242 (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
2243 "nfsrcvlk", slptimeo);
2244 if (slpflag == PCATCH) {
2245 slpflag = 0;
2246 slptimeo = 2 * hz;
2247 }
2248 /*
2249 * Make sure while we slept that the mountpoint didn't go away.
2250 * nfs_sigintr and caller nfs_reply expect it intact.
2251 */
2252 if (!rep->r_nmp) {
2253 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x103);
2254 return (ENXIO); /* don't have lock until out of loop */
2255 }
2256 }
2257 /*
2258 * nfs_reply will handle it if reply already arrived.
2259 * (We may have slept or been preempted while on network funnel).
2260 */
2261 FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *statep);
2262 *statep |= NFSSTA_RCVLOCK;
2263 return (0);
2264 }
2265
2266 /*
2267 * Unlock the stream socket for others.
2268 */
2269 static void
2270 nfs_rcvunlock(rep)
2271 register struct nfsreq *rep;
2272 {
2273 register int *statep;
2274
2275 if (rep->r_nmp == NULL)
2276 return;
2277 statep = &rep->r_nmp->nm_state;
2278
2279 FSDBG(533, statep, *statep, 0, 0);
2280 if ((*statep & NFSSTA_RCVLOCK) == 0)
2281 panic("nfs rcvunlock");
2282 *statep &= ~NFSSTA_RCVLOCK;
2283 if (*statep & NFSSTA_WANTRCV) {
2284 *statep &= ~NFSSTA_WANTRCV;
2285 wakeup((caddr_t)statep);
2286 }
2287 }
2288
2289
2290 #ifndef NFS_NOSERVER
2291 /*
2292 * Socket upcall routine for the nfsd sockets.
2293 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2294 * Essentially do as much as possible non-blocking, else punt and it will
2295 * be called with M_WAIT from an nfsd.
2296 */
2297 /*
2298 * Needs to run under network funnel
2299 */
2300 void
2301 nfsrv_rcv(so, arg, waitflag)
2302 struct socket *so;
2303 caddr_t arg;
2304 int waitflag;
2305 {
2306 register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
2307 register struct mbuf *m;
2308 struct mbuf *mp, *mhck;
2309 struct sockaddr *nam=0;
2310 struct uio auio;
2311 int flags, ns_nflag=0, error;
2312 struct sockaddr_in *sin;
2313
2314 if ((slp->ns_flag & SLP_VALID) == 0)
2315 return;
2316 #ifdef notdef
2317 /*
2318 * Define this to test for nfsds handling this under heavy load.
2319 */
2320 if (waitflag == M_DONTWAIT) {
2321 ns_nflag = SLPN_NEEDQ;
2322 goto dorecs;
2323 }
2324 #endif
2325 auio.uio_procp = NULL;
2326 if (so->so_type == SOCK_STREAM) {
2327 /*
2328 * If there are already records on the queue, defer soreceive()
2329 * to an nfsd so that there is feedback to the TCP layer that
2330 * the nfs servers are heavily loaded.
2331 */
2332 if (slp->ns_rec && waitflag == M_DONTWAIT) {
2333 ns_nflag = SLPN_NEEDQ;
2334 goto dorecs;
2335 }
2336
2337 /*
2338 * Do soreceive().
2339 */
2340 auio.uio_resid = 1000000000;
2341 flags = MSG_DONTWAIT;
2342 error = soreceive(so, (struct sockaddr **) 0, &auio, &mp, (struct mbuf **)0, &flags);
2343 if (error || mp == (struct mbuf *)0) {
2344 if (error == EWOULDBLOCK)
2345 ns_nflag = SLPN_NEEDQ;
2346 else
2347 ns_nflag = SLPN_DISCONN;
2348 goto dorecs;
2349 }
2350 m = mp;
2351 if (slp->ns_rawend) {
2352 slp->ns_rawend->m_next = m;
2353 slp->ns_cc += 1000000000 - auio.uio_resid;
2354 } else {
2355 slp->ns_raw = m;
2356 slp->ns_cc = 1000000000 - auio.uio_resid;
2357 }
2358 while (m->m_next)
2359 m = m->m_next;
2360 slp->ns_rawend = m;
2361
2362 /*
2363 * Now try and parse record(s) out of the raw stream data.
2364 */
2365 error = nfsrv_getstream(slp, waitflag);
2366 if (error) {
2367 if (error == EPERM)
2368 ns_nflag = SLPN_DISCONN;
2369 else
2370 ns_nflag = SLPN_NEEDQ;
2371 }
2372 } else {
2373 do {
2374 auio.uio_resid = 1000000000;
2375 flags = MSG_DONTWAIT;
2376 nam = 0;
2377 error = soreceive(so, &nam, &auio, &mp,
2378 (struct mbuf **)0, &flags);
2379
2380 if (mp) {
2381 if (nam) {
2382 MGET(mhck, M_WAIT, MT_SONAME);
2383 mhck->m_len = nam->sa_len;
2384 sin = mtod(mhck, struct sockaddr_in *);
2385 bcopy(nam, sin, sizeof(struct sockaddr_in));
2386 mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
2387 FREE(nam, M_SONAME);
2388
2389 m = mhck;
2390 m->m_next = mp;
2391 } else
2392 m = mp;
2393 if (slp->ns_recend)
2394 slp->ns_recend->m_nextpkt = m;
2395 else
2396 slp->ns_rec = m;
2397 slp->ns_recend = m;
2398 m->m_nextpkt = (struct mbuf *)0;
2399 }
2400 if (error) {
2401 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
2402 && error != EWOULDBLOCK) {
2403 ns_nflag = SLPN_DISCONN;
2404 goto dorecs;
2405 }
2406 }
2407 } while (mp);
2408 }
2409
2410 /*
2411 * Now try and process the request records, non-blocking.
2412 */
2413 dorecs:
2414 if (ns_nflag)
2415 slp->ns_nflag |= ns_nflag;
2416 if (waitflag == M_DONTWAIT &&
2417 (slp->ns_rec || (slp->ns_nflag & (SLPN_NEEDQ | SLPN_DISCONN)))) {
2418 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
2419 nfsrv_wakenfsd(slp);
2420 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
2421 }
2422 }
2423
2424 /*
2425 * Try and extract an RPC request from the mbuf data list received on a
2426 * stream socket. The "waitflag" argument indicates whether or not it
2427 * can sleep.
2428 */
2429 static int
2430 nfsrv_getstream(slp, waitflag)
2431 register struct nfssvc_sock *slp;
2432 int waitflag;
2433 {
2434 register struct mbuf *m, **mpp;
2435 register char *cp1, *cp2;
2436 register int len;
2437 struct mbuf *om, *m2, *recm;
2438 u_long recmark;
2439
2440 if (slp->ns_nflag & SLPN_GETSTREAM)
2441 panic("nfs getstream");
2442 slp->ns_nflag |= SLPN_GETSTREAM;
2443 for (;;) {
2444 if (slp->ns_reclen == 0) {
2445 if (slp->ns_cc < NFSX_UNSIGNED) {
2446 slp->ns_nflag &= ~SLPN_GETSTREAM;
2447 return (0);
2448 }
2449 m = slp->ns_raw;
2450 if (m->m_len >= NFSX_UNSIGNED) {
2451 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
2452 m->m_data += NFSX_UNSIGNED;
2453 m->m_len -= NFSX_UNSIGNED;
2454 } else {
2455 cp1 = (caddr_t)&recmark;
2456 cp2 = mtod(m, caddr_t);
2457 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2458 while (m->m_len == 0) {
2459 m = m->m_next;
2460 cp2 = mtod(m, caddr_t);
2461 }
2462 *cp1++ = *cp2++;
2463 m->m_data++;
2464 m->m_len--;
2465 }
2466 }
2467 slp->ns_cc -= NFSX_UNSIGNED;
2468 recmark = ntohl(recmark);
2469 slp->ns_reclen = recmark & ~0x80000000;
2470 if (recmark & 0x80000000)
2471 slp->ns_nflag |= SLPN_LASTFRAG;
2472 else
2473 slp->ns_nflag &= ~SLPN_LASTFRAG;
2474 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
2475 slp->ns_nflag &= ~SLPN_GETSTREAM;
2476 return (EPERM);
2477 }
2478 }
2479
2480 /*
2481 * Now get the record part.
2482 *
2483 * Note that slp->ns_reclen may be 0. Linux sometimes
2484 * generates 0-length RPCs
2485 */
2486 recm = NULL;
2487 if (slp->ns_cc == slp->ns_reclen) {
2488 recm = slp->ns_raw;
2489 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
2490 slp->ns_cc = slp->ns_reclen = 0;
2491 } else if (slp->ns_cc > slp->ns_reclen) {
2492 len = 0;
2493 m = slp->ns_raw;
2494 om = (struct mbuf *)0;
2495 while (len < slp->ns_reclen) {
2496 if ((len + m->m_len) > slp->ns_reclen) {
2497 m2 = m_copym(m, 0, slp->ns_reclen - len,
2498 waitflag);
2499 if (m2) {
2500 if (om) {
2501 om->m_next = m2;
2502 recm = slp->ns_raw;
2503 } else
2504 recm = m2;
2505 m->m_data += slp->ns_reclen - len;
2506 m->m_len -= slp->ns_reclen - len;
2507 len = slp->ns_reclen;
2508 } else {
2509 slp->ns_nflag &= ~SLPN_GETSTREAM;
2510 return (EWOULDBLOCK);
2511 }
2512 } else if ((len + m->m_len) == slp->ns_reclen) {
2513 om = m;
2514 len += m->m_len;
2515 m = m->m_next;
2516 recm = slp->ns_raw;
2517 om->m_next = (struct mbuf *)0;
2518 } else {
2519 om = m;
2520 len += m->m_len;
2521 m = m->m_next;
2522 }
2523 }
2524 slp->ns_raw = m;
2525 slp->ns_cc -= len;
2526 slp->ns_reclen = 0;
2527 } else {
2528 slp->ns_nflag &= ~SLPN_GETSTREAM;
2529 return (0);
2530 }
2531
2532 /*
2533 * Accumulate the fragments into a record.
2534 */
2535 mpp = &slp->ns_frag;
2536 while (*mpp)
2537 mpp = &((*mpp)->m_next);
2538 *mpp = recm;
2539 if (slp->ns_nflag & SLPN_LASTFRAG) {
2540 if (slp->ns_recend)
2541 slp->ns_recend->m_nextpkt = slp->ns_frag;
2542 else
2543 slp->ns_rec = slp->ns_frag;
2544 slp->ns_recend = slp->ns_frag;
2545 slp->ns_frag = (struct mbuf *)0;
2546 }
2547 }
2548 }
2549
2550 /*
2551 * Parse an RPC header.
2552 */
2553 int
2554 nfsrv_dorec(slp, nfsd, ndp)
2555 register struct nfssvc_sock *slp;
2556 struct nfsd *nfsd;
2557 struct nfsrv_descript **ndp;
2558 {
2559 register struct mbuf *m;
2560 register struct mbuf *nam;
2561 register struct nfsrv_descript *nd;
2562 int error;
2563
2564 *ndp = NULL;
2565 if ((slp->ns_flag & SLP_VALID) == 0 ||
2566 (m = slp->ns_rec) == (struct mbuf *)0)
2567 return (ENOBUFS);
2568 slp->ns_rec = m->m_nextpkt;
2569 if (slp->ns_rec)
2570 m->m_nextpkt = (struct mbuf *)0;
2571 else
2572 slp->ns_recend = (struct mbuf *)0;
2573 if (m->m_type == MT_SONAME) {
2574 nam = m;
2575 m = m->m_next;
2576 nam->m_next = NULL;
2577 } else
2578 nam = NULL;
2579 MALLOC_ZONE(nd, struct nfsrv_descript *,
2580 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2581 nd->nd_md = nd->nd_mrep = m;
2582 nd->nd_nam2 = nam;
2583 nd->nd_dpos = mtod(m, caddr_t);
2584 error = nfs_getreq(nd, nfsd, TRUE);
2585 if (error) {
2586 if (nam)
2587 m_freem(nam);
2588 FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
2589 return (error);
2590 }
2591 *ndp = nd;
2592 nfsd->nfsd_nd = nd;
2593 return (0);
2594 }
2595
2596 /*
2597 * Parse an RPC request
2598 * - verify it
2599 * - fill in the cred struct.
2600 */
2601 int
2602 nfs_getreq(nd, nfsd, has_header)
2603 register struct nfsrv_descript *nd;
2604 struct nfsd *nfsd;
2605 int has_header;
2606 {
2607 register int len, i;
2608 register u_long *tl;
2609 register long t1;
2610 struct uio uio;
2611 struct iovec iov;
2612 caddr_t dpos, cp2, cp;
2613 u_long nfsvers, auth_type;
2614 uid_t nickuid;
2615 int error = 0, nqnfs = 0, ticklen;
2616 struct mbuf *mrep, *md;
2617 register struct nfsuid *nuidp;
2618 struct timeval tvin, tvout, now;
2619 #if 0 /* until encrypted keys are implemented */
2620 NFSKERBKEYSCHED_T keys; /* stores key schedule */
2621 #endif
2622
2623 mrep = nd->nd_mrep;
2624 md = nd->nd_md;
2625 dpos = nd->nd_dpos;
2626 if (has_header) {
2627 nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED);
2628 nd->nd_retxid = fxdr_unsigned(u_long, *tl++);
2629 if (*tl++ != rpc_call) {
2630 m_freem(mrep);
2631 return (EBADRPC);
2632 }
2633 } else
2634 nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED);
2635 nd->nd_repstat = 0;
2636 nd->nd_flag = 0;
2637 if (*tl++ != rpc_vers) {
2638 nd->nd_repstat = ERPCMISMATCH;
2639 nd->nd_procnum = NFSPROC_NOOP;
2640 return (0);
2641 }
2642 if (*tl != nfs_prog) {
2643 if (*tl == nqnfs_prog)
2644 nqnfs++;
2645 else {
2646 nd->nd_repstat = EPROGUNAVAIL;
2647 nd->nd_procnum = NFSPROC_NOOP;
2648 return (0);
2649 }
2650 }
2651 tl++;
2652 nfsvers = fxdr_unsigned(u_long, *tl++);
2653 if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
2654 (nfsvers != NQNFS_VER3 && nqnfs)) {
2655 nd->nd_repstat = EPROGMISMATCH;
2656 nd->nd_procnum = NFSPROC_NOOP;
2657 return (0);
2658 }
2659 if (nqnfs)
2660 nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
2661 else if (nfsvers == NFS_VER3)
2662 nd->nd_flag = ND_NFSV3;
2663 nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
2664 if (nd->nd_procnum == NFSPROC_NULL)
2665 return (0);
2666 if (nd->nd_procnum >= NFS_NPROCS ||
2667 (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
2668 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
2669 nd->nd_repstat = EPROCUNAVAIL;
2670 nd->nd_procnum = NFSPROC_NOOP;
2671 return (0);
2672 }
2673 if ((nd->nd_flag & ND_NFSV3) == 0)
2674 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
2675 auth_type = *tl++;
2676 len = fxdr_unsigned(int, *tl++);
2677 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2678 m_freem(mrep);
2679 return (EBADRPC);
2680 }
2681
2682 nd->nd_flag &= ~ND_KERBAUTH;
2683 /*
2684 * Handle auth_unix or auth_kerb.
2685 */
2686 if (auth_type == rpc_auth_unix) {
2687 len = fxdr_unsigned(int, *++tl);
2688 if (len < 0 || len > NFS_MAXNAMLEN) {
2689 m_freem(mrep);
2690 return (EBADRPC);
2691 }
2692 nfsm_adv(nfsm_rndup(len));
2693 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2694 bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
2695 nd->nd_cr.cr_ref = 1;
2696 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
2697 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
2698 len = fxdr_unsigned(int, *tl);
2699 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
2700 m_freem(mrep);
2701 return (EBADRPC);
2702 }
2703 nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED);
2704 for (i = 1; i <= len; i++)
2705 if (i < NGROUPS)
2706 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
2707 else
2708 tl++;
2709 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
2710 if (nd->nd_cr.cr_ngroups > 1)
2711 nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
2712 len = fxdr_unsigned(int, *++tl);
2713 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2714 m_freem(mrep);
2715 return (EBADRPC);
2716 }
2717 if (len > 0)
2718 nfsm_adv(nfsm_rndup(len));
2719 } else if (auth_type == rpc_auth_kerb) {
2720 switch (fxdr_unsigned(int, *tl++)) {
2721 case RPCAKN_FULLNAME:
2722 ticklen = fxdr_unsigned(int, *tl);
2723 *((u_long *)nfsd->nfsd_authstr) = *tl;
2724 uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
2725 nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
2726 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
2727 m_freem(mrep);
2728 return (EBADRPC);
2729 }
2730 uio.uio_offset = 0;
2731 uio.uio_iov = &iov;
2732 uio.uio_iovcnt = 1;
2733 uio.uio_segflg = UIO_SYSSPACE;
2734 iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
2735 iov.iov_len = RPCAUTH_MAXSIZ - 4;
2736 nfsm_mtouio(&uio, uio.uio_resid);
2737 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2738 if (*tl++ != rpc_auth_kerb ||
2739 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
2740 printf("Bad kerb verifier\n");
2741 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2742 nd->nd_procnum = NFSPROC_NOOP;
2743 return (0);
2744 }
2745 nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
2746 tl = (u_long *)cp;
2747 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
2748 printf("Not fullname kerb verifier\n");
2749 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2750 nd->nd_procnum = NFSPROC_NOOP;
2751 return (0);
2752 }
2753 cp += NFSX_UNSIGNED;
2754 bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
2755 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
2756 nd->nd_flag |= ND_KERBFULL;
2757 nfsd->nfsd_flag |= NFSD_NEEDAUTH;
2758 break;
2759 case RPCAKN_NICKNAME:
2760 if (len != 2 * NFSX_UNSIGNED) {
2761 printf("Kerb nickname short\n");
2762 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
2763 nd->nd_procnum = NFSPROC_NOOP;
2764 return (0);
2765 }
2766 nickuid = fxdr_unsigned(uid_t, *tl);
2767 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2768 if (*tl++ != rpc_auth_kerb ||
2769 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
2770 printf("Kerb nick verifier bad\n");
2771 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2772 nd->nd_procnum = NFSPROC_NOOP;
2773 return (0);
2774 }
2775 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2776 tvin.tv_sec = *tl++;
2777 tvin.tv_usec = *tl;
2778
2779 for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
2780 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
2781 if (nuidp->nu_cr.cr_uid == nickuid &&
2782 (!nd->nd_nam2 ||
2783 netaddr_match(NU_NETFAM(nuidp),
2784 &nuidp->nu_haddr, nd->nd_nam2)))
2785 break;
2786 }
2787 if (!nuidp) {
2788 nd->nd_repstat =
2789 (NFSERR_AUTHERR|AUTH_REJECTCRED);
2790 nd->nd_procnum = NFSPROC_NOOP;
2791 return (0);
2792 }
2793
2794 /*
2795 * Now, decrypt the timestamp using the session key
2796 * and validate it.
2797 */
2798 #if NFSKERB
2799 XXX
2800 #endif
2801
2802 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
2803 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
2804 microtime(&now);
2805 if (nuidp->nu_expire < now.tv_sec ||
2806 nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
2807 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
2808 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
2809 nuidp->nu_expire = 0;
2810 nd->nd_repstat =
2811 (NFSERR_AUTHERR|AUTH_REJECTVERF);
2812 nd->nd_procnum = NFSPROC_NOOP;
2813 return (0);
2814 }
2815 nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
2816 nd->nd_flag |= ND_KERBNICK;
2817 };
2818 } else {
2819 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
2820 nd->nd_procnum = NFSPROC_NOOP;
2821 return (0);
2822 }
2823
2824 /*
2825 * For nqnfs, get piggybacked lease request.
2826 */
2827 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
2828 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2829 nd->nd_flag |= fxdr_unsigned(int, *tl);
2830 if (nd->nd_flag & ND_LEASE) {
2831 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2832 nd->nd_duration = fxdr_unsigned(int, *tl);
2833 } else
2834 nd->nd_duration = NQ_MINLEASE;
2835 } else
2836 nd->nd_duration = NQ_MINLEASE;
2837 nd->nd_md = md;
2838 nd->nd_dpos = dpos;
2839 return (0);
2840 nfsmout:
2841 return (error);
2842 }
2843
2844 /*
2845 * Search for a sleeping nfsd and wake it up.
2846 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
2847 * running nfsds will go look for the work in the nfssvc_sock list.
2848 */
2849 void
2850 nfsrv_wakenfsd(slp)
2851 struct nfssvc_sock *slp;
2852 {
2853 register struct nfsd *nd;
2854
2855 if ((slp->ns_flag & SLP_VALID) == 0)
2856 return;
2857 TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
2858 if (nd->nfsd_flag & NFSD_WAITING) {
2859 nd->nfsd_flag &= ~NFSD_WAITING;
2860 if (nd->nfsd_slp)
2861 panic("nfsd wakeup");
2862 slp->ns_sref++;
2863 nd->nfsd_slp = slp;
2864 wakeup((caddr_t)nd);
2865 return;
2866 }
2867 }
2868 slp->ns_flag |= SLP_DOREC;
2869 nfsd_head_flag |= NFSD_CHECKSLP;
2870 }
2871 #endif /* NFS_NOSERVER */
2872
2873 static int
2874 nfs_msg(p, server, msg, error)
2875 struct proc *p;
2876 const char *server, *msg;
2877 int error;
2878 {
2879 tpr_t tpr;
2880
2881 if (p)
2882 tpr = tprintf_open(p);
2883 else
2884 tpr = NULL;
2885 if (error)
2886 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg,
2887 error);
2888 else
2889 tprintf(tpr, "nfs server %s: %s\n", server, msg);
2890 tprintf_close(tpr);
2891 return (0);
2892 }
2893
2894 static void
2895 nfs_down(rep, msg, error)
2896 struct nfsreq *rep;
2897 const char *msg;
2898 int error;
2899 {
2900 int dosignal;
2901
2902 if (rep == NULL || rep->r_nmp == NULL)
2903 return;
2904 if (!(rep->r_nmp->nm_state & NFSSTA_TIMEO)) {
2905 vfs_event_signal(&rep->r_nmp->nm_mountp->mnt_stat.f_fsid,
2906 VQ_NOTRESP, 0);
2907 rep->r_nmp->nm_state |= NFSSTA_TIMEO;
2908 }
2909 rep->r_flags |= R_TPRINTFMSG;
2910 nfs_msg(rep->r_procp, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname,
2911 msg, error);
2912 }
2913
2914 static void
2915 nfs_up(rep, msg, error)
2916 struct nfsreq *rep;
2917 const char *msg;
2918 int error;
2919 {
2920
2921 if (error != 0 || rep == NULL || rep->r_nmp == NULL)
2922 return;
2923 if ((rep->r_flags & R_TPRINTFMSG) != 0)
2924 nfs_msg(rep->r_procp,
2925 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
2926 if ((rep->r_nmp->nm_state & NFSSTA_TIMEO)) {
2927 rep->r_nmp->nm_state &= ~NFSSTA_TIMEO;
2928 vfs_event_signal(&rep->r_nmp->nm_mountp->mnt_stat.f_fsid,
2929 VQ_NOTRESP, 1);
2930 }
2931 }