2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 * Copyright (c) 1989, 1991, 1993, 1995
25 * The Regents of the University of California. All rights reserved.
27 * This code is derived from software contributed to Berkeley by
28 * Rick Macklem at The University of Guelph.
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
59 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
63 * Socket operations for use by nfs
66 #include <sys/param.h>
67 #include <sys/systm.h>
69 #include <sys/mount.h>
70 #include <sys/kernel.h>
72 #include <sys/malloc.h>
73 #include <sys/vnode.h>
74 #include <sys/domain.h>
75 #include <sys/protosw.h>
76 #include <sys/socket.h>
77 #include <sys/socketvar.h>
78 #include <sys/syslog.h>
79 #include <sys/tprintf.h>
80 #include <machine/spl.h>
83 #include <kern/clock.h>
86 #include <netinet/in.h>
87 #include <netinet/tcp.h>
89 #include <nfs/rpcv2.h>
90 #include <nfs/nfsproto.h>
92 #include <nfs/xdr_subs.h>
93 #include <nfs/nfsm_subs.h>
94 #include <nfs/nfsmount.h>
95 #include <nfs/nfsnode.h>
96 #include <nfs/nfsrtt.h>
97 #include <nfs/nqnfs.h>
99 #include <sys/kdebug.h>
101 #define FSDBG(A, B, C, D, E) \
102 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
103 (int)(B), (int)(C), (int)(D), (int)(E), 0)
104 #define FSDBG_TOP(A, B, C, D, E) \
105 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
106 (int)(B), (int)(C), (int)(D), (int)(E), 0)
107 #define FSDBG_BOT(A, B, C, D, E) \
108 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
109 (int)(B), (int)(C), (int)(D), (int)(E), 0)
115 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
116 * Use the mean and mean deviation of rtt for the appropriate type of rpc
117 * for the frequent rpcs and a default for the others.
118 * The justification for doing "other" this way is that these rpcs
119 * happen so infrequently that timer est. would probably be stale.
120 * Also, since many of these rpcs are
121 * non-idempotent, a conservative timeout is desired.
122 * getattr, lookup - A+2D
126 #define NFS_RTO(n, t) \
127 ((t) == 0 ? (n)->nm_timeo : \
129 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
130 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
131 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
132 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
134 * External data, mostly RPC constants in XDR form
136 extern u_long rpc_reply
, rpc_msgdenied
, rpc_mismatch
, rpc_vers
, rpc_auth_unix
,
137 rpc_msgaccepted
, rpc_call
, rpc_autherr
,
139 extern u_long nfs_prog
, nqnfs_prog
;
140 extern time_t nqnfsstarttime
;
141 extern struct nfsstats nfsstats
;
142 extern int nfsv3_procid
[NFS_NPROCS
];
143 extern int nfs_ticks
;
144 extern u_long nfs_xidwrap
;
147 * Defines which timer to use for the procnum.
154 static int proct
[NFS_NPROCS
] = {
155 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
160 * There is a congestion window for outstanding rpcs maintained per mount
161 * point. The cwnd size is adjusted in roughly the way that:
162 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
163 * SIGCOMM '88". ACM, August 1988.
164 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
165 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
166 * of rpcs is in progress.
167 * (The sent count and cwnd are scaled for integer arith.)
168 * Variants of "slow start" were tried and were found to be too much of a
169 * performance hit (ave. rtt 3 times larger),
170 * I suspect due to the large rtt that nfs rpcs have.
172 #define NFS_CWNDSCALE 256
173 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
174 static int nfs_backoff
[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
176 struct nfsrtt nfsrtt
;
178 static int nfs_msg
__P((struct proc
*,char *,char *));
179 static int nfs_rcvlock
__P((struct nfsreq
*));
180 static void nfs_rcvunlock
__P((int *flagp
));
181 static int nfs_receive
__P((struct nfsreq
*rep
, struct mbuf
**aname
,
183 static int nfs_reconnect
__P((struct nfsreq
*rep
));
185 static int nfsrv_getstream
__P((struct nfssvc_sock
*,int));
187 int (*nfsrv3_procs
[NFS_NPROCS
]) __P((struct nfsrv_descript
*nd
,
188 struct nfssvc_sock
*slp
,
190 struct mbuf
**mreqp
)) = {
218 #endif /* NFS_NOSERVER */
221 * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG)
222 * But some of this code may prove useful someday...
226 int nfstraceindx
= 0;
227 struct nfstracerec nfstracebuf
[NFSTBUFSIZ
] = {{0,0,0,0}};
229 #define NFSTRACESUSPENDERS
230 #ifdef NFSTRACESUSPENDERS
231 uint nfstracemask
= 0xfff00200;
232 int nfstracexid
= -1;
233 uint onfstracemask
= 0;
234 int nfstracesuspend
= -1;
235 #define NFSTRACE_SUSPEND \
237 if (nfstracemask) { \
238 onfstracemask = nfstracemask; \
242 #define NFSTRACE_RESUME \
244 nfstracesuspend = -1; \
246 nfstracemask = onfstracemask; \
248 #define NFSTRACE_STARTSUSPENDCOUNTDOWN \
250 nfstracesuspend = (nfstraceindx+100) % NFSTBUFSIZ; \
252 #define NFSTRACE_SUSPENDING (nfstracesuspend != -1)
253 #define NFSTRACE_SUSPENSEOVER \
254 (nfstracesuspend > 100 ? \
255 (nfstraceindx >= nfstracesuspend || \
256 nfstraceindx < nfstracesuspend - 100) : \
257 (nfstraceindx >= nfstracesuspend && \
258 nfstraceindx < nfstracesuspend + 8192 - 100))
260 uint nfstracemask
= 0;
261 #endif /* NFSTRACESUSPENDERS */
266 int nfsoprocnum
, nfsolen
;
267 int nfsbt
[32], nfsbtlen
;
271 backtrace(int *where
, int size
)
273 int register sp
, *fp
, numsaved
;
275 __asm__
volatile("mr %0,r1" : "=r" (sp
));
277 fp
= (int *)*((int *)sp
);
279 for (numsaved
= 0; numsaved
< size
; numsaved
++) {
287 #elif defined(__i386__)
291 return (0); /* Till someone implements a real routine */
294 #error architecture not implemented.
298 nfsdup(struct nfsreq
*rep
)
300 int *ip
, i
, first
= 1, end
;
304 if ((nfs_debug
& NFS_DEBUG_DUP
) == 0)
306 /* last mbuf in chain will be nfs content */
307 for (mb
= rep
->r_mreq
; mb
->m_next
; mb
= mb
->m_next
)
309 if (rep
->r_procnum
== nfsoprocnum
&& mb
->m_len
== nfsolen
&&
310 !bcmp((caddr_t
)nfsodata
, mb
->m_data
, nfsolen
)) {
311 s
= b
+ sprintf(b
, "nfsdup x=%x p=%d h=", rep
->r_xid
,
313 end
= (int)(VTONFS(rep
->r_vp
)->n_fhp
);
314 ip
= (int *)(end
& ~3);
315 end
+= VTONFS(rep
->r_vp
)->n_fhsize
;
316 while ((int)ip
< end
) {
318 if (first
) { /* avoid leading zeroes */
322 s
+= sprintf(s
, "%x", i
);
324 s
+= sprintf(s
, "%08x", i
);
328 else /* eliminate trailing zeroes */
332 * set a breakpoint here and you can view the
333 * current backtrace and the one saved in nfsbt
337 nfsoprocnum
= rep
->r_procnum
;
339 bcopy(mb
->m_data
, (caddr_t
)nfsodata
, mb
->m_len
);
340 nfsbtlen
= backtrace(&nfsbt
, sizeof(nfsbt
));
345 * Initialize sockets and congestion for a new NFS connection.
346 * We do not free the sockaddr if error.
349 nfs_connect(nmp
, rep
)
350 register struct nfsmount
*nmp
;
353 register struct socket
*so
;
354 int s
, error
, rcvreserve
, sndreserve
;
355 struct sockaddr
*saddr
;
356 struct sockaddr_in sin
;
359 thread_funnel_switch(KERNEL_FUNNEL
, NETWORK_FUNNEL
);
360 nmp
->nm_so
= (struct socket
*)0;
361 saddr
= mtod(nmp
->nm_nam
, struct sockaddr
*);
362 error
= socreate(saddr
->sa_family
, &nmp
->nm_so
, nmp
->nm_sotype
,
368 nmp
->nm_soflags
= so
->so_proto
->pr_flags
;
371 * Some servers require that the client port be a reserved port number.
373 if (saddr
->sa_family
== AF_INET
&& (nmp
->nm_flag
& NFSMNT_RESVPORT
)) {
374 sin
.sin_len
= sizeof (struct sockaddr_in
);
375 sin
.sin_family
= AF_INET
;
376 sin
.sin_addr
.s_addr
= INADDR_ANY
;
377 tport
= IPPORT_RESERVED
- 1;
378 sin
.sin_port
= htons(tport
);
380 while ((error
= sobind(so
, (struct sockaddr
*) &sin
) == EADDRINUSE
) &&
381 (--tport
> IPPORT_RESERVED
/ 2))
382 sin
.sin_port
= htons(tport
);
389 * Protocols that do not require connections may be optionally left
390 * unconnected for servers that reply from a port other than NFS_PORT.
392 if (nmp
->nm_flag
& NFSMNT_NOCONN
) {
393 if (nmp
->nm_soflags
& PR_CONNREQUIRED
) {
398 error
= soconnect(so
, mtod(nmp
->nm_nam
, struct sockaddr
*));
404 * Wait for the connection to complete. Cribbed from the
405 * connect system call but with the wait timing out so
406 * that interruptible mounts don't hang here for a long time.
409 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
410 (void) tsleep((caddr_t
)&so
->so_timeo
, PSOCK
,
412 if ((so
->so_state
& SS_ISCONNECTING
) &&
413 so
->so_error
== 0 && rep
&&
414 (error
= nfs_sigintr(nmp
, rep
, rep
->r_procp
))) {
415 so
->so_state
&= ~SS_ISCONNECTING
;
421 error
= so
->so_error
;
428 if (nmp
->nm_flag
& (NFSMNT_SOFT
| NFSMNT_INT
)) {
429 so
->so_rcv
.sb_timeo
= (5 * hz
);
430 so
->so_snd
.sb_timeo
= (5 * hz
);
432 so
->so_rcv
.sb_timeo
= 0;
433 so
->so_snd
.sb_timeo
= 0;
435 if (nmp
->nm_sotype
== SOCK_DGRAM
) {
436 sndreserve
= (nmp
->nm_wsize
+ NFS_MAXPKTHDR
) * 2;
437 rcvreserve
= (nmp
->nm_rsize
+ NFS_MAXPKTHDR
) * 2;
438 } else if (nmp
->nm_sotype
== SOCK_SEQPACKET
) {
439 sndreserve
= (nmp
->nm_wsize
+ NFS_MAXPKTHDR
) * 2;
440 rcvreserve
= (nmp
->nm_rsize
+ NFS_MAXPKTHDR
) * 2;
442 if (nmp
->nm_sotype
!= SOCK_STREAM
)
443 panic("nfscon sotype");
445 if (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) {
449 bzero(&sopt
, sizeof sopt
);
450 sopt
.sopt_level
= SOL_SOCKET
;
451 sopt
.sopt_name
= SO_KEEPALIVE
;
452 sopt
.sopt_val
= &val
;
453 sopt
.sopt_valsize
= sizeof val
;
457 if (so
->so_proto
->pr_protocol
== IPPROTO_TCP
) {
461 bzero(&sopt
, sizeof sopt
);
462 sopt
.sopt_level
= IPPROTO_TCP
;
463 sopt
.sopt_name
= TCP_NODELAY
;
464 sopt
.sopt_val
= &val
;
465 sopt
.sopt_valsize
= sizeof val
;
470 sndreserve
= (nmp
->nm_wsize
+ NFS_MAXPKTHDR
+ sizeof (u_long
))
472 rcvreserve
= (nmp
->nm_rsize
+ NFS_MAXPKTHDR
+ sizeof (u_long
))
476 error
= soreserve(so
, sndreserve
, rcvreserve
);
480 so
->so_rcv
.sb_flags
|= SB_NOINTR
;
481 so
->so_snd
.sb_flags
|= SB_NOINTR
;
483 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
485 /* Initialize other non-zero congestion variables */
486 nmp
->nm_srtt
[0] = nmp
->nm_srtt
[1] = nmp
->nm_srtt
[2] =
487 nmp
->nm_srtt
[3] = (NFS_TIMEO
<< 3);
488 nmp
->nm_sdrtt
[0] = nmp
->nm_sdrtt
[1] = nmp
->nm_sdrtt
[2] =
489 nmp
->nm_sdrtt
[3] = 0;
490 nmp
->nm_cwnd
= NFS_MAXCWND
/ 2; /* Initial send window */
492 FSDBG(529, nmp
, nmp
->nm_flag
, nmp
->nm_soflags
, nmp
->nm_cwnd
);
493 nmp
->nm_timeouts
= 0;
497 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
504 * Called when a connection is broken on a reliable protocol.
505 * - clean up the old socket
506 * - nfs_connect() again
507 * - set R_MUSTRESEND for all outstanding requests on mount point
508 * If this fails the mount point is DEAD!
509 * nb: Must be called with the nfs_sndlock() set on the mount point.
513 register struct nfsreq
*rep
;
515 register struct nfsreq
*rp
;
516 register struct nfsmount
*nmp
= rep
->r_nmp
;
520 while ((error
= nfs_connect(nmp
, rep
))) {
521 if (error
== EINTR
|| error
== ERESTART
)
523 (void) tsleep((caddr_t
)&lbolt
, PSOCK
, "nfscon", 0);
526 NFS_DPF(DUP
, ("nfs_reconnect RESEND\n"));
528 * Loop through outstanding request list and fix up all requests
531 for (rp
= nfs_reqq
.tqh_first
; rp
!= 0; rp
= rp
->r_chain
.tqe_next
) {
532 if (rp
->r_nmp
== nmp
)
533 rp
->r_flags
|= R_MUSTRESEND
;
539 * NFS disconnect. Clean up and unlink.
543 register struct nfsmount
*nmp
;
545 register struct socket
*so
;
547 thread_funnel_switch(KERNEL_FUNNEL
, NETWORK_FUNNEL
);
550 nmp
->nm_so
= (struct socket
*)0;
554 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
558 * This is the nfs send routine. For connection based socket types, it
559 * must be called with an nfs_sndlock() on the socket.
560 * "rep == NULL" indicates that it has been called from a server.
561 * For the client side:
562 * - return EINTR if the RPC is terminated, 0 otherwise
563 * - set R_MUSTRESEND if the send fails for any reason
564 * - do any cleanup required by recoverable socket errors (???)
565 * For the server side:
566 * - return EINTR or ERESTART if interrupted by a signal
567 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
568 * - do any cleanup required by recoverable socket errors (???)
571 nfs_send(so
, nam
, top
, rep
)
572 register struct socket
*so
;
574 register struct mbuf
*top
;
577 struct sockaddr
*sendnam
;
578 int error
, soflags
, flags
;
581 char savenametolog
[MNAMELEN
];
584 if (rep
->r_flags
& R_SOFTTERM
) {
588 if ((so
= rep
->r_nmp
->nm_so
) == NULL
) {
589 rep
->r_flags
|= R_MUSTRESEND
;
593 rep
->r_flags
&= ~R_MUSTRESEND
;
594 soflags
= rep
->r_nmp
->nm_soflags
;
595 for (rp
= nfs_reqq
.tqh_first
; rp
; rp
= rp
->r_chain
.tqe_next
)
599 xidqueued
= rp
->r_xid
;
601 soflags
= so
->so_proto
->pr_flags
;
602 if ((soflags
& PR_CONNREQUIRED
) || (so
->so_state
& SS_ISCONNECTED
) ||
604 sendnam
= (struct sockaddr
*)0;
606 sendnam
= mtod(nam
, struct sockaddr
*);
608 if (so
->so_type
== SOCK_SEQPACKET
)
618 * Save the name here in case mount point goes away when we switch
619 * funnels. The name is using local stack and is large, but don't
620 * want to block if we malloc.
623 strncpy(savenametolog
,
624 rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
,
626 thread_funnel_switch(KERNEL_FUNNEL
, NETWORK_FUNNEL
);
627 error
= sosend(so
, sendnam
, (struct uio
*)0, top
,
628 (struct mbuf
*)0, flags
);
629 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
634 for (rp
= nfs_reqq
.tqh_first
; rp
;
635 rp
= rp
->r_chain
.tqe_next
)
636 if (rp
== rep
&& rp
->r_xid
== xidqueued
)
639 panic("nfs_send: error %d xid %x gone",
642 log(LOG_INFO
, "nfs send error %d for server %s\n",
643 error
, savenametolog
);
645 * Deal with errors for the client side.
647 if (rep
->r_flags
& R_SOFTTERM
)
650 rep
->r_flags
|= R_MUSTRESEND
;
652 ("nfs_send RESEND error=%d\n", error
));
655 log(LOG_INFO
, "nfsd send error %d\n", error
);
658 * Handle any recoverable (soft) socket errors here. (???)
660 if (error
!= EINTR
&& error
!= ERESTART
&&
661 error
!= EWOULDBLOCK
&& error
!= EPIPE
)
668 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
669 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
670 * Mark and consolidate the data into a new mbuf list.
671 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
673 * For SOCK_STREAM we must be very careful to read an entire record once
674 * we have read any of it, even if the system call has been interrupted.
677 nfs_receive(rep
, aname
, mp
)
678 register struct nfsreq
*rep
;
682 register struct socket
*so
;
685 register struct mbuf
*m
;
686 struct mbuf
*control
;
688 struct sockaddr
**getnam
;
689 struct sockaddr
*tmp_nam
;
691 struct sockaddr_in
*sin
;
692 int error
, sotype
, rcvflg
;
693 struct proc
*p
= current_proc(); /* XXX */
696 * Set up arguments for soreceive()
698 *mp
= (struct mbuf
*)0;
699 *aname
= (struct mbuf
*)0;
700 sotype
= rep
->r_nmp
->nm_sotype
;
703 * For reliable protocols, lock against other senders/receivers
704 * in case a reconnect is necessary.
705 * For SOCK_STREAM, first get the Record Mark to find out how much
706 * more there is to get.
707 * We must lock the socket against other receivers
708 * until we have an entire rpc request/reply.
710 if (sotype
!= SOCK_DGRAM
) {
711 error
= nfs_sndlock(&rep
->r_nmp
->nm_flag
, rep
);
716 * Check for fatal errors and resending request.
719 * Ugh: If a reconnect attempt just happened, nm_so
720 * would have changed. NULL indicates a failed
721 * attempt that has essentially shut down this
724 if (rep
->r_mrep
|| (rep
->r_flags
& R_SOFTTERM
)) {
725 nfs_sndunlock(&rep
->r_nmp
->nm_flag
);
728 so
= rep
->r_nmp
->nm_so
;
730 error
= nfs_reconnect(rep
);
732 nfs_sndunlock(&rep
->r_nmp
->nm_flag
);
737 while (rep
->r_flags
& R_MUSTRESEND
) {
738 m
= m_copym(rep
->r_mreq
, 0, M_COPYALL
, M_WAIT
);
739 nfsstats
.rpcretries
++;
741 ("nfs_receive RESEND %s\n",
742 rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
));
743 error
= nfs_send(so
, rep
->r_nmp
->nm_nam
, m
, rep
);
745 * we also hold rcv lock so rep is still
749 if (error
== EINTR
|| error
== ERESTART
||
750 (error
= nfs_reconnect(rep
))) {
751 nfs_sndunlock(&rep
->r_nmp
->nm_flag
);
757 nfs_sndunlock(&rep
->r_nmp
->nm_flag
);
758 if (sotype
== SOCK_STREAM
) {
759 aio
.iov_base
= (caddr_t
) &len
;
760 aio
.iov_len
= sizeof(u_long
);
763 auio
.uio_segflg
= UIO_SYSSPACE
;
764 auio
.uio_rw
= UIO_READ
;
766 auio
.uio_resid
= sizeof(u_long
);
769 rcvflg
= MSG_WAITALL
;
770 thread_funnel_switch(KERNEL_FUNNEL
, NETWORK_FUNNEL
);
771 error
= soreceive(so
, (struct sockaddr
**)0, &auio
,
772 (struct mbuf
**)0, (struct mbuf
**)0, &rcvflg
);
773 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
774 if (!rep
->r_nmp
) /* if unmounted then bailout */
776 if (error
== EWOULDBLOCK
&& rep
) {
777 if (rep
->r_flags
& R_SOFTTERM
)
780 } while (error
== EWOULDBLOCK
);
781 if (!error
&& auio
.uio_resid
> 0) {
783 "short receive (%d/%d) from nfs server %s\n",
784 sizeof(u_long
) - auio
.uio_resid
,
786 rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
791 len
= ntohl(len
) & ~0x80000000;
793 * This is SERIOUS! We are out of sync with the sender
794 * and forcing a disconnect/reconnect is all I can do.
796 if (len
> NFS_MAXPACKET
) {
797 log(LOG_ERR
, "%s (%d) from nfs server %s\n",
798 "impossible packet length",
800 rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
804 auio
.uio_resid
= len
;
806 thread_funnel_switch(KERNEL_FUNNEL
, NETWORK_FUNNEL
);
808 rcvflg
= MSG_WAITALL
;
809 error
= soreceive(so
, (struct sockaddr
**)0,
810 &auio
, mp
, (struct mbuf
**)0, &rcvflg
);
811 if (!rep
->r_nmp
) /* if unmounted then bailout */ {
812 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
815 } while (error
== EWOULDBLOCK
|| error
== EINTR
||
818 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
820 if (!error
&& auio
.uio_resid
> 0) {
822 "short receive (%d/%d) from nfs server %s\n",
823 len
- auio
.uio_resid
, len
,
824 rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
829 * NB: Since uio_resid is big, MSG_WAITALL is ignored
830 * and soreceive() will return when it has either a
831 * control msg or a data msg.
832 * We have no use for control msg., but must grab them
833 * and then throw them away so we know what is going
836 auio
.uio_resid
= len
= 100000000; /* Anything Big */
839 thread_funnel_switch(KERNEL_FUNNEL
, NETWORK_FUNNEL
);
842 error
= soreceive(so
, (struct sockaddr
**)0,
843 &auio
, mp
, &control
, &rcvflg
);
844 if (!rep
->r_nmp
) /* if unmounted then bailout */ {
845 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
850 if (error
== EWOULDBLOCK
&& rep
) {
851 if (rep
->r_flags
& R_SOFTTERM
) {
852 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
856 } while (error
== EWOULDBLOCK
||
857 (!error
&& *mp
== NULL
&& control
));
859 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
861 if ((rcvflg
& MSG_EOR
) == 0)
863 if (!error
&& *mp
== NULL
)
865 len
-= auio
.uio_resid
;
868 if (error
&& error
!= EINTR
&& error
!= ERESTART
) {
870 *mp
= (struct mbuf
*)0;
873 "receive error %d from nfs server %s\n",
875 rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
876 error
= nfs_sndlock(&rep
->r_nmp
->nm_flag
, rep
);
878 error
= nfs_reconnect(rep
);
883 if ((so
= rep
->r_nmp
->nm_so
) == NULL
)
885 if (so
->so_state
& SS_ISCONNECTED
)
886 getnam
= (struct sockaddr
**)0;
889 auio
.uio_resid
= len
= 1000000;
892 thread_funnel_switch(KERNEL_FUNNEL
, NETWORK_FUNNEL
);
895 error
= soreceive(so
, getnam
, &auio
, mp
,
896 (struct mbuf
**)0, &rcvflg
);
898 if ((getnam
) && (*getnam
)) {
899 MGET(mhck
, M_WAIT
, MT_SONAME
);
900 mhck
->m_len
= (*getnam
)->sa_len
;
901 sin
= mtod(mhck
, struct sockaddr_in
*);
902 bcopy(*getnam
, sin
, sizeof(struct sockaddr_in
));
903 mhck
->m_hdr
.mh_len
= sizeof(struct sockaddr_in
);
904 FREE(*getnam
, M_SONAME
);
907 if (!rep
->r_nmp
) /* if unmounted then bailout */ {
908 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
912 if (error
== EWOULDBLOCK
&&
913 (rep
->r_flags
& R_SOFTTERM
)) {
914 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
917 } while (error
== EWOULDBLOCK
);
919 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
920 len
-= auio
.uio_resid
;
925 *mp
= (struct mbuf
*)0;
931 * Implement receipt of reply on a socket.
932 * We must search through the list of received datagrams matching them
933 * with outstanding requests using the xid, until ours is found.
938 struct nfsreq
*myrep
;
940 register struct nfsreq
*rep
;
941 register struct nfsmount
*nmp
= myrep
->r_nmp
;
943 struct mbuf
*mrep
, *md
;
950 * Loop around until we get our own reply
954 * Lock against other receivers so that I don't get stuck in
955 * sbwait() after someone else has received my reply for me.
956 * Also necessary for connection based protocols to avoid
957 * race conditions during a reconnect.
958 * If nfs_rcvlock() returns EALREADY, that means that
959 * the reply has already been recieved by another
960 * process and we can return immediately. In this
961 * case, the lock is not taken to avoid races with
964 error
= nfs_rcvlock(myrep
);
965 if (error
== EALREADY
)
971 * If we slept after putting bits otw, then reply may have
972 * arrived. In which case returning is required, or we
973 * would hang trying to nfs_receive an already received reply.
975 if (myrep
->r_mrep
!= NULL
) {
976 nfs_rcvunlock(&nmp
->nm_flag
);
977 FSDBG(530, myrep
->r_xid
, myrep
, myrep
->r_nmp
, -1);
981 * Get the next Rpc reply off the socket. Assume myrep->r_nmp
982 * is still intact by checks done in nfs_rcvlock.
984 error
= nfs_receive(myrep
, &nam
, &mrep
);
986 * Bailout asap if nfsmount struct gone (unmounted).
988 if (!myrep
->r_nmp
|| !nmp
->nm_so
) {
989 FSDBG(530, myrep
->r_xid
, myrep
, nmp
, -2);
990 return (ECONNABORTED
);
993 FSDBG(530, myrep
->r_xid
, myrep
, nmp
, error
);
994 nfs_rcvunlock(&nmp
->nm_flag
);
996 /* Bailout asap if nfsmount struct gone (unmounted). */
997 if (!myrep
->r_nmp
|| !nmp
->nm_so
)
998 return (ECONNABORTED
);
1001 * Ignore routing errors on connectionless protocols??
1003 if (NFSIGNORE_SOERROR(nmp
->nm_soflags
, error
)) {
1004 nmp
->nm_so
->so_error
= 0;
1005 if (myrep
->r_flags
& R_GETONEREP
)
1015 * We assume all is fine, but if we did not have an error
1016 * and mrep is 0, better not dereference it. nfs_receieve
1017 * calls soreceive which carefully sets error=0 when it got
1018 * errors on sbwait (tsleep). In most cases, I assume that's
1019 * so we could go back again. In tcp case, EPIPE is returned.
1020 * In udp, case nfs_receive gets back here with no error and no
1021 * mrep. Is the right fix to have soreceive check for process
1022 * aborted after sbwait and return something non-zero? Should
1023 * nfs_receive give an EPIPE? Too risky to play with those
1024 * two this late in game for a shutdown problem. Instead,
1025 * just check here and get out. (ekn)
1028 FSDBG(530, myrep
->r_xid
, myrep
, nmp
, -3);
1029 return (ECONNABORTED
); /* sounds good */
1033 * Get the xid and check that it is an rpc reply
1036 dpos
= mtod(md
, caddr_t
);
1037 nfsm_dissect(tl
, u_long
*, 2*NFSX_UNSIGNED
);
1039 if (*tl
!= rpc_reply
) {
1040 #ifndef NFS_NOSERVER
1041 if (nmp
->nm_flag
& NFSMNT_NQNFS
) {
1042 if (nqnfs_callback(nmp
, mrep
, md
, dpos
))
1043 nfsstats
.rpcinvalid
++;
1045 nfsstats
.rpcinvalid
++;
1049 nfsstats
.rpcinvalid
++;
1053 if (nmp
->nm_flag
& NFSMNT_RCVLOCK
)
1054 nfs_rcvunlock(&nmp
->nm_flag
);
1055 if (myrep
->r_flags
& R_GETONEREP
)
1056 return (0); /* this path used by NQNFS */
1061 * Loop through the request list to match up the reply
1062 * Iff no match, just drop the datagram
1064 for (rep
= nfs_reqq
.tqh_first
; rep
!= 0;
1065 rep
= rep
->r_chain
.tqe_next
) {
1066 if (rep
->r_mrep
== NULL
&& rxid
== rep
->r_xid
) {
1074 rt
= &nfsrtt
.rttl
[nfsrtt
.pos
];
1075 rt
->proc
= rep
->r_procnum
;
1076 rt
->rto
= NFS_RTO(nmp
, proct
[rep
->r_procnum
]);
1077 rt
->sent
= nmp
->nm_sent
;
1078 rt
->cwnd
= nmp
->nm_cwnd
;
1079 if (proct
[rep
->r_procnum
] == 0)
1080 panic("nfs_reply: proct[%d] is zero", rep
->r_procnum
);
1081 rt
->srtt
= nmp
->nm_srtt
[proct
[rep
->r_procnum
] - 1];
1082 rt
->sdrtt
= nmp
->nm_sdrtt
[proct
[rep
->r_procnum
] - 1];
1083 rt
->fsid
= nmp
->nm_mountp
->mnt_stat
.f_fsid
;
1085 if (rep
->r_flags
& R_TIMING
)
1086 rt
->rtt
= rep
->r_rtt
;
1089 nfsrtt
.pos
= (nfsrtt
.pos
+ 1) % NFSRTTLOGSIZ
;
1092 * Update congestion window.
1093 * Do the additive increase of
1096 FSDBG(530, rep
->r_xid
, rep
, nmp
->nm_sent
,
1098 if (nmp
->nm_cwnd
<= nmp
->nm_sent
) {
1100 (NFS_CWNDSCALE
* NFS_CWNDSCALE
+
1101 (nmp
->nm_cwnd
>> 1)) / nmp
->nm_cwnd
;
1102 if (nmp
->nm_cwnd
> NFS_MAXCWND
)
1103 nmp
->nm_cwnd
= NFS_MAXCWND
;
1105 if (!(rep
->r_flags
& R_SENT
))
1106 printf("nfs_reply: unsent xid=%x",
1108 rep
->r_flags
&= ~R_SENT
;
1109 nmp
->nm_sent
-= NFS_CWNDSCALE
;
1111 * Update rtt using a gain of 0.125 on the mean
1112 * and a gain of 0.25 on the deviation.
1114 if (rep
->r_flags
& R_TIMING
) {
1116 * Since the timer resolution of
1117 * NFS_HZ is so course, it can often
1118 * result in r_rtt == 0. Since
1119 * r_rtt == N means that the actual
1120 * rtt is between N+dt and N+2-dt ticks,
1123 if (proct
[rep
->r_procnum
] == 0)
1124 panic("nfs_reply: proct[%d] is zero", rep
->r_procnum
);
1125 t1
= rep
->r_rtt
+ 1;
1126 t1
-= (NFS_SRTT(rep
) >> 3);
1127 NFS_SRTT(rep
) += t1
;
1130 t1
-= (NFS_SDRTT(rep
) >> 2);
1131 NFS_SDRTT(rep
) += t1
;
1133 nmp
->nm_timeouts
= 0;
1137 nfs_rcvunlock(&nmp
->nm_flag
);
1139 * If not matched to a request, drop it.
1140 * If it's mine, get out.
1143 nfsstats
.rpcunexpected
++;
1145 } else if (rep
== myrep
) {
1146 if (rep
->r_mrep
== NULL
)
1147 panic("nfs_reply: nil r_mrep");
1150 FSDBG(530, myrep
->r_xid
, myrep
, rep
,
1151 rep
? rep
->r_xid
: myrep
->r_flags
);
1152 if (myrep
->r_flags
& R_GETONEREP
)
1153 return (0); /* this path used by NQNFS */
1158 * nfs_request - goes something like this
1159 * - fill in request struct
1160 * - links it into list
1161 * - calls nfs_send() for first transmit
1162 * - calls nfs_receive() to get reply
1163 * - break down rpc header and return with nfs reply pointed to
1165 * nb: always frees up mreq mbuf list
1168 nfs_request(vp
, mrest
, procnum
, procp
, cred
, mrp
, mdp
, dposp
, xidp
)
1179 register struct mbuf
*m
, *mrep
;
1180 register struct nfsreq
*rep
, *rp
;
1181 register u_long
*tl
;
1183 struct nfsmount
*nmp
;
1184 struct mbuf
*md
, *mheadend
;
1186 char nickv
[RPCX_NICKVERF
];
1187 time_t reqtime
, waituntil
;
1189 int t1
, nqlflag
, cachable
, s
, error
= 0, mrest_len
, auth_len
, auth_type
;
1190 int trylater_delay
= NQ_TRYLATERDEL
, trylater_cnt
= 0, failed_auth
= 0;
1191 int verf_len
, verf_type
;
1194 char *auth_str
, *verf_str
;
1195 NFSKERBKEY_T key
; /* save session key */
1199 nmp
= VFSTONFS(vp
->v_mount
);
1200 MALLOC_ZONE(rep
, struct nfsreq
*,
1201 sizeof(struct nfsreq
), M_NFSREQ
, M_WAITOK
);
1202 FSDBG_TOP(531, vp
, procnum
, nmp
, rep
);
1205 * make sure if we blocked above, that the file system didn't get
1206 * unmounted leaving nmp bogus value to trip on later and crash.
1207 * Note nfs_unmount will set rep->r_nmp if unmounted volume, but we
1208 * aren't that far yet. SO this is best we can do. I wanted to check
1209 * for vp->v_mount = 0 also below, but that caused reboot crash.
1210 * Something must think it's okay for vp-v_mount=0 during booting.
1211 * Thus the best I can do here is see if we still have a vnode.
1214 if (vp
->v_type
== VBAD
) {
1215 FSDBG_BOT(531, 1, vp
, nmp
, rep
);
1216 _FREE_ZONE((caddr_t
)rep
, sizeof (struct nfsreq
), M_NFSREQ
);
1221 rep
->r_procp
= procp
;
1222 rep
->r_procnum
= procnum
;
1232 * Get the RPC header with authorization.
1235 verf_str
= auth_str
= (char *)0;
1236 if (nmp
->nm_flag
& NFSMNT_KERB
) {
1238 verf_len
= sizeof (nickv
);
1239 auth_type
= RPCAUTH_KERB4
;
1240 bzero((caddr_t
)key
, sizeof (key
));
1241 if (failed_auth
|| nfs_getnickauth(nmp
, cred
, &auth_str
,
1242 &auth_len
, verf_str
, verf_len
)) {
1243 error
= nfs_getauth(nmp
, rep
, cred
, &auth_str
,
1244 &auth_len
, verf_str
, &verf_len
, key
);
1246 FSDBG_BOT(531, 2, vp
, error
, rep
);
1247 _FREE_ZONE((caddr_t
)rep
,
1248 sizeof (struct nfsreq
), M_NFSREQ
);
1254 auth_type
= RPCAUTH_UNIX
;
1255 if (cred
->cr_ngroups
< 1)
1256 panic("nfsreq nogrps");
1257 auth_len
= ((((cred
->cr_ngroups
- 1) > nmp
->nm_numgrps
) ?
1258 nmp
->nm_numgrps
: (cred
->cr_ngroups
- 1)) << 2) +
1261 m
= nfsm_rpchead(cred
, nmp
->nm_flag
, procnum
, auth_type
, auth_len
,
1262 auth_str
, verf_len
, verf_str
, mrest
, mrest_len
, &mheadend
, &xid
);
1264 *xidp
= ntohl(xid
) + ((u_int64_t
)nfs_xidwrap
<< 32);
1266 _FREE(auth_str
, M_TEMP
);
1269 * For stream protocols, insert a Sun RPC Record Mark.
1271 if (nmp
->nm_sotype
== SOCK_STREAM
) {
1272 M_PREPEND(m
, NFSX_UNSIGNED
, M_WAIT
);
1273 *mtod(m
, u_long
*) = htonl(0x80000000 |
1274 (m
->m_pkthdr
.len
- NFSX_UNSIGNED
));
1279 if (nmp
->nm_flag
& NFSMNT_SOFT
)
1280 rep
->r_retry
= nmp
->nm_retry
;
1282 rep
->r_retry
= NFS_MAXREXMIT
+ 1; /* past clip limit */
1283 rep
->r_rtt
= rep
->r_rexmit
= 0;
1284 if (proct
[procnum
] > 0)
1285 rep
->r_flags
= R_TIMING
;
1291 * Do the client side RPC.
1293 nfsstats
.rpcrequests
++;
1295 * Chain request into list of outstanding requests. Be sure
1296 * to put it LAST so timer finds oldest requests first.
1299 TAILQ_INSERT_TAIL(&nfs_reqq
, rep
, r_chain
);
1301 /* Get send time for nqnfs */
1302 reqtime
= time
.tv_sec
;
1305 * If backing off another request or avoiding congestion, don't
1306 * send this one now but let timer do it. If not timing a request,
1309 if (nmp
->nm_so
&& (nmp
->nm_sotype
!= SOCK_DGRAM
||
1310 (nmp
->nm_flag
& NFSMNT_DUMBTIMR
) ||
1311 nmp
->nm_sent
< nmp
->nm_cwnd
)) {
1313 if (nmp
->nm_soflags
& PR_CONNREQUIRED
)
1314 error
= nfs_sndlock(&nmp
->nm_flag
, rep
);
1317 * Set the R_SENT before doing the send in case another thread
1318 * processes the reply before the nfs_send returns here
1321 if ((rep
->r_flags
& R_MUSTRESEND
) == 0) {
1322 FSDBG(531, rep
->r_xid
, rep
, nmp
->nm_sent
,
1324 nmp
->nm_sent
+= NFS_CWNDSCALE
;
1325 rep
->r_flags
|= R_SENT
;
1328 m
= m_copym(m
, 0, M_COPYALL
, M_WAIT
);
1329 error
= nfs_send(nmp
->nm_so
, nmp
->nm_nam
, m
, rep
);
1330 if (nmp
->nm_soflags
& PR_CONNREQUIRED
)
1331 nfs_sndunlock(&nmp
->nm_flag
);
1334 nmp
->nm_sent
-= NFS_CWNDSCALE
;
1335 rep
->r_flags
&= ~R_SENT
;
1343 * Wait for the reply from our send or the timer's.
1345 if (!error
|| error
== EPIPE
)
1346 error
= nfs_reply(rep
);
1349 * RPC done, unlink the request.
1352 for (rp
= nfs_reqq
.tqh_first
; rp
;
1353 rp
= rp
->r_chain
.tqe_next
)
1354 if (rp
== rep
&& rp
->r_xid
== xid
)
1357 panic("nfs_request race, rep %x xid %x", rep
, xid
);
1358 TAILQ_REMOVE(&nfs_reqq
, rep
, r_chain
);
1362 * Decrement the outstanding request count.
1364 if (rep
->r_flags
& R_SENT
) {
1365 FSDBG(531, rep
->r_xid
, rep
, nmp
->nm_sent
, nmp
->nm_cwnd
);
1366 rep
->r_flags
&= ~R_SENT
; /* paranoia */
1367 nmp
->nm_sent
-= NFS_CWNDSCALE
;
1371 * If there was a successful reply and a tprintf msg.
1372 * tprintf a response.
1374 if (!error
&& (rep
->r_flags
& R_TPRINTFMSG
))
1375 nfs_msg(rep
->r_procp
, nmp
->nm_mountp
->mnt_stat
.f_mntfromname
,
1381 m_freem(rep
->r_mreq
);
1382 FSDBG_BOT(531, error
, rep
->r_xid
, nmp
, rep
);
1383 _FREE_ZONE((caddr_t
)rep
, sizeof (struct nfsreq
), M_NFSREQ
);
1388 * break down the rpc header and check if ok
1390 nfsm_dissect(tl
, u_long
*, 3 * NFSX_UNSIGNED
);
1391 if (*tl
++ == rpc_msgdenied
) {
1392 if (*tl
== rpc_mismatch
)
1394 else if ((nmp
->nm_flag
& NFSMNT_KERB
) && *tl
++ == rpc_autherr
) {
1397 mheadend
->m_next
= (struct mbuf
*)0;
1399 m_freem(rep
->r_mreq
);
1406 m_freem(rep
->r_mreq
);
1407 FSDBG_BOT(531, error
, rep
->r_xid
, nmp
, rep
);
1408 _FREE_ZONE((caddr_t
)rep
, sizeof (struct nfsreq
), M_NFSREQ
);
1413 * Grab any Kerberos verifier, otherwise just throw it away.
1415 verf_type
= fxdr_unsigned(int, *tl
++);
1416 i
= fxdr_unsigned(int, *tl
);
1417 if ((nmp
->nm_flag
& NFSMNT_KERB
) && verf_type
== RPCAUTH_KERB4
) {
1418 error
= nfs_savenickauth(nmp
, cred
, i
, key
, &md
, &dpos
, mrep
);
1422 nfsm_adv(nfsm_rndup(i
));
1423 nfsm_dissect(tl
, u_long
*, NFSX_UNSIGNED
);
1426 nfsm_dissect(tl
, u_long
*, NFSX_UNSIGNED
);
1428 error
= fxdr_unsigned(int, *tl
);
1429 if ((nmp
->nm_flag
& NFSMNT_NFSV3
) &&
1430 error
== NFSERR_TRYLATER
) {
1433 waituntil
= time
.tv_sec
+ trylater_delay
;
1435 ("nfs_request %s flag=%x trylater_cnt=%x waituntil=%lx trylater_delay=%x\n",
1436 nmp
->nm_mountp
->mnt_stat
.f_mntfromname
,
1437 nmp
->nm_flag
, trylater_cnt
, waituntil
,
1439 while (time
.tv_sec
< waituntil
)
1440 (void)tsleep((caddr_t
)&lbolt
,
1441 PSOCK
, "nqnfstry", 0);
1442 trylater_delay
*= nfs_backoff
[trylater_cnt
];
1443 if (trylater_cnt
< 7)
1449 * If the File Handle was stale, invalidate the
1450 * lookup cache, just in case.
1452 if (error
== ESTALE
)
1454 if (nmp
->nm_flag
& NFSMNT_NFSV3
) {
1458 error
|= NFSERR_RETERR
;
1461 m_freem(rep
->r_mreq
);
1462 FSDBG_BOT(531, error
, rep
->r_xid
, nmp
, rep
);
1463 _FREE_ZONE((caddr_t
)rep
,
1464 sizeof (struct nfsreq
), M_NFSREQ
);
1469 * For nqnfs, get any lease in reply
1471 if (nmp
->nm_flag
& NFSMNT_NQNFS
) {
1472 nfsm_dissect(tl
, u_long
*, NFSX_UNSIGNED
);
1475 nqlflag
= fxdr_unsigned(int, *tl
);
1476 nfsm_dissect(tl
, u_long
*, 4*NFSX_UNSIGNED
);
1477 cachable
= fxdr_unsigned(int, *tl
++);
1478 reqtime
+= fxdr_unsigned(int, *tl
++);
1479 if (reqtime
> time
.tv_sec
) {
1480 fxdr_hyper(tl
, &frev
);
1481 nqnfs_clientlease(nmp
, np
, nqlflag
,
1482 cachable
, reqtime
, frev
);
1489 m_freem(rep
->r_mreq
);
1490 FSDBG_BOT(531, 0xf0f0f0f0, rep
->r_xid
, nmp
, rep
);
1491 FREE_ZONE((caddr_t
)rep
, sizeof (struct nfsreq
), M_NFSREQ
);
1495 error
= EPROTONOSUPPORT
;
1497 m_freem(rep
->r_mreq
);
1498 FSDBG_BOT(531, error
, rep
->r_xid
, nmp
, rep
);
1499 _FREE_ZONE((caddr_t
)rep
, sizeof (struct nfsreq
), M_NFSREQ
);
1503 #ifndef NFS_NOSERVER
1505 * Generate the rpc reply header
1506 * siz arg. is used to decide if adding a cluster is worthwhile
1509 nfs_rephead(siz
, nd
, slp
, err
, cache
, frev
, mrq
, mbp
, bposp
)
1511 struct nfsrv_descript
*nd
;
1512 struct nfssvc_sock
*slp
;
1520 register u_long
*tl
;
1521 register struct mbuf
*mreq
;
1523 struct mbuf
*mb
, *mb2
;
1525 MGETHDR(mreq
, M_WAIT
, MT_DATA
);
1528 * If this is a big reply, use a cluster else
1529 * try and leave leading space for the lower level headers.
1531 siz
+= RPC_REPLYSIZ
;
1532 if (siz
>= MINCLSIZE
) {
1533 MCLGET(mreq
, M_WAIT
);
1535 mreq
->m_data
+= max_hdr
;
1536 tl
= mtod(mreq
, u_long
*);
1537 mreq
->m_len
= 6 * NFSX_UNSIGNED
;
1538 bpos
= ((caddr_t
)tl
) + mreq
->m_len
;
1539 *tl
++ = txdr_unsigned(nd
->nd_retxid
);
1541 if (err
== ERPCMISMATCH
|| (err
& NFSERR_AUTHERR
)) {
1542 *tl
++ = rpc_msgdenied
;
1543 if (err
& NFSERR_AUTHERR
) {
1544 *tl
++ = rpc_autherr
;
1545 *tl
= txdr_unsigned(err
& ~NFSERR_AUTHERR
);
1546 mreq
->m_len
-= NFSX_UNSIGNED
;
1547 bpos
-= NFSX_UNSIGNED
;
1549 *tl
++ = rpc_mismatch
;
1550 *tl
++ = txdr_unsigned(RPC_VER2
);
1551 *tl
= txdr_unsigned(RPC_VER2
);
1554 *tl
++ = rpc_msgaccepted
;
1557 * For Kerberos authentication, we must send the nickname
1558 * verifier back, otherwise just RPCAUTH_NULL.
1560 if (nd
->nd_flag
& ND_KERBFULL
) {
1561 register struct nfsuid
*nuidp
;
1562 struct timeval ktvin
, ktvout
;
1564 for (nuidp
= NUIDHASH(slp
, nd
->nd_cr
.cr_uid
)->lh_first
;
1565 nuidp
!= 0; nuidp
= nuidp
->nu_hash
.le_next
) {
1566 if (nuidp
->nu_cr
.cr_uid
== nd
->nd_cr
.cr_uid
&&
1567 (!nd
->nd_nam2
|| netaddr_match(NU_NETFAM(nuidp
),
1568 &nuidp
->nu_haddr
, nd
->nd_nam2
)))
1573 txdr_unsigned(nuidp
->nu_timestamp
.tv_sec
- 1);
1575 txdr_unsigned(nuidp
->nu_timestamp
.tv_usec
);
1578 * Encrypt the timestamp in ecb mode using the
1585 *tl
++ = rpc_auth_kerb
;
1586 *tl
++ = txdr_unsigned(3 * NFSX_UNSIGNED
);
1587 *tl
= ktvout
.tv_sec
;
1588 nfsm_build(tl
, u_long
*, 3 * NFSX_UNSIGNED
);
1589 *tl
++ = ktvout
.tv_usec
;
1590 *tl
++ = txdr_unsigned(nuidp
->nu_cr
.cr_uid
);
1601 *tl
= txdr_unsigned(RPC_PROGUNAVAIL
);
1604 *tl
= txdr_unsigned(RPC_PROGMISMATCH
);
1605 nfsm_build(tl
, u_long
*, 2 * NFSX_UNSIGNED
);
1606 if (nd
->nd_flag
& ND_NQNFS
) {
1607 *tl
++ = txdr_unsigned(3);
1608 *tl
= txdr_unsigned(3);
1610 *tl
++ = txdr_unsigned(2);
1611 *tl
= txdr_unsigned(3);
1615 *tl
= txdr_unsigned(RPC_PROCUNAVAIL
);
1618 *tl
= txdr_unsigned(RPC_GARBAGE
);
1622 if (err
!= NFSERR_RETVOID
) {
1623 nfsm_build(tl
, u_long
*, NFSX_UNSIGNED
);
1625 *tl
= txdr_unsigned(nfsrv_errmap(nd
, err
));
1634 * For nqnfs, piggyback lease as requested.
1636 if ((nd
->nd_flag
& ND_NQNFS
) && err
== 0) {
1637 if (nd
->nd_flag
& ND_LEASE
) {
1638 nfsm_build(tl
, u_long
*, 5 * NFSX_UNSIGNED
);
1639 *tl
++ = txdr_unsigned(nd
->nd_flag
& ND_LEASE
);
1640 *tl
++ = txdr_unsigned(cache
);
1641 *tl
++ = txdr_unsigned(nd
->nd_duration
);
1642 txdr_hyper(frev
, tl
);
1644 nfsm_build(tl
, u_long
*, NFSX_UNSIGNED
);
1652 if (err
!= 0 && err
!= NFSERR_RETVOID
)
1653 nfsstats
.srvrpc_errs
++;
1658 #endif /* NFS_NOSERVER */
1662 * From FreeBSD 1.58, a Matt Dillon fix...
1663 * Flag a request as being about to terminate.
1664 * The nm_sent count is decremented now to avoid deadlocks when the process
1665 * in soreceive() hasn't yet managed to send its own request.
1668 nfs_softterm(struct nfsreq
*rep
)
1670 rep
->r_flags
|= R_SOFTTERM
;
1671 if (rep
->r_flags
& R_SENT
) {
1672 FSDBG(532, rep
->r_xid
, rep
, rep
->r_nmp
->nm_sent
,
1673 rep
->r_nmp
->nm_cwnd
);
1674 rep
->r_nmp
->nm_sent
-= NFS_CWNDSCALE
;
1675 rep
->r_flags
&= ~R_SENT
;
1680 nfs_timer_funnel(arg
)
1683 (void) thread_funnel_set(kernel_flock
, TRUE
);
1685 (void) thread_funnel_set(kernel_flock
, FALSE
);
1691 * Scan the nfsreq list and retranmit any requests that have timed out
1692 * To avoid retransmission attempts on STREAM sockets (in the future) make
1693 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1697 void *arg
; /* never used */
1699 register struct nfsreq
*rep
, *rp
;
1700 register struct mbuf
*m
;
1701 register struct socket
*so
;
1702 register struct nfsmount
*nmp
;
1705 #ifndef NFS_NOSERVER
1706 static long lasttime
= 0;
1707 register struct nfssvc_sock
*slp
;
1709 #endif /* NFS_NOSERVER */
1713 int flags
, rexmit
, cwnd
, sent
;
1718 * XXX If preemptable threads are implemented the spls used for the
1719 * outstanding request queue must be replaced with mutexes.
1722 #ifdef NFSTRACESUSPENDERS
1723 if (NFSTRACE_SUSPENDING
) {
1724 for (rep
= nfs_reqq
.tqh_first
; rep
!= 0;
1725 rep
= rep
->r_chain
.tqe_next
)
1726 if (rep
->r_xid
== nfstracexid
)
1730 } else if (NFSTRACE_SUSPENSEOVER
) {
1735 for (rep
= nfs_reqq
.tqh_first
; rep
!= 0; rep
= rep
->r_chain
.tqe_next
) {
1736 #ifdef NFSTRACESUSPENDERS
1737 if (rep
->r_mrep
&& !NFSTRACE_SUSPENDING
) {
1738 nfstracexid
= rep
->r_xid
;
1739 NFSTRACE_STARTSUSPENDCOUNTDOWN
;
1743 if (!nmp
) /* unmounted */
1745 if (rep
->r_mrep
|| (rep
->r_flags
& R_SOFTTERM
))
1747 if (nfs_sigintr(nmp
, rep
, rep
->r_procp
)) {
1751 if (rep
->r_rtt
>= 0) {
1753 if (nmp
->nm_flag
& NFSMNT_DUMBTIMR
)
1754 timeo
= nmp
->nm_timeo
;
1756 timeo
= NFS_RTO(nmp
, proct
[rep
->r_procnum
]);
1757 /* ensure 62.5 ms floor */
1758 while (16 * timeo
< hz
)
1760 if (nmp
->nm_timeouts
> 0)
1761 timeo
*= nfs_backoff
[nmp
->nm_timeouts
- 1];
1762 if (rep
->r_rtt
<= timeo
)
1764 if (nmp
->nm_timeouts
< 8)
1768 * Check for server not responding
1770 if ((rep
->r_flags
& R_TPRINTFMSG
) == 0 &&
1771 rep
->r_rexmit
> nmp
->nm_deadthresh
) {
1772 nfs_msg(rep
->r_procp
,
1773 nmp
->nm_mountp
->mnt_stat
.f_mntfromname
,
1775 rep
->r_flags
|= R_TPRINTFMSG
;
1777 if (rep
->r_rexmit
>= rep
->r_retry
) { /* too many */
1778 nfsstats
.rpctimeouts
++;
1782 if (nmp
->nm_sotype
!= SOCK_DGRAM
) {
1783 if (++rep
->r_rexmit
> NFS_MAXREXMIT
)
1784 rep
->r_rexmit
= NFS_MAXREXMIT
;
1787 if ((so
= nmp
->nm_so
) == NULL
)
1791 * If there is enough space and the window allows..
1793 * Set r_rtt to -1 in case we fail to send it now.
1796 rttdiag
= rep
->r_rtt
;
1799 if (sbspace(&so
->so_snd
) >= rep
->r_mreq
->m_pkthdr
.len
&&
1800 ((nmp
->nm_flag
& NFSMNT_DUMBTIMR
) ||
1801 (rep
->r_flags
& R_SENT
) ||
1802 nmp
->nm_sent
< nmp
->nm_cwnd
) &&
1803 (m
= m_copym(rep
->r_mreq
, 0, M_COPYALL
, M_DONTWAIT
))){
1805 struct proc
*p
= current_proc();
1808 if (rep
->r_flags
& R_SENT
&& nfsprnttimo
&&
1809 nmp
->nm_timeouts
>= nfsprnttimo
) {
1810 int t
= proct
[rep
->r_procnum
];
1812 NFS_DPF(DUP
, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d A=%d D=%d\n", nmp
->nm_mountp
->mnt_stat
.f_mntfromname
, nmp
->nm_timeo
, nmp
->nm_timeouts
, rttdiag
, timeo
, rep
->r_procnum
, nmp
->nm_srtt
[t
-1], nmp
->nm_sdrtt
[t
-1]));
1814 NFS_DPF(DUP
, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d\n", nmp
->nm_mountp
->mnt_stat
.f_mntfromname
, nmp
->nm_timeo
, nmp
->nm_timeouts
, rttdiag
, timeo
, rep
->r_procnum
));
1817 #endif /* NFSDIAG */
1819 * Iff first send, start timing
1820 * else turn timing off, backoff timer
1821 * and divide congestion window by 2.
1822 * We update these *before* the send to avoid
1823 * racing against receiving the reply.
1824 * We save them so we can restore them on send error.
1826 flags
= rep
->r_flags
;
1827 rexmit
= rep
->r_rexmit
;
1828 cwnd
= nmp
->nm_cwnd
;
1829 sent
= nmp
->nm_sent
;
1831 if (rep
->r_flags
& R_SENT
) {
1832 rep
->r_flags
&= ~R_TIMING
;
1833 if (++rep
->r_rexmit
> NFS_MAXREXMIT
)
1834 rep
->r_rexmit
= NFS_MAXREXMIT
;
1836 if (nmp
->nm_cwnd
< NFS_CWNDSCALE
)
1837 nmp
->nm_cwnd
= NFS_CWNDSCALE
;
1838 nfsstats
.rpcretries
++;
1840 rep
->r_flags
|= R_SENT
;
1841 nmp
->nm_sent
+= NFS_CWNDSCALE
;
1843 FSDBG(535, xid
, rep
, nmp
->nm_sent
, nmp
->nm_cwnd
);
1845 thread_funnel_switch(KERNEL_FUNNEL
, NETWORK_FUNNEL
);
1847 if ((nmp
->nm_flag
& NFSMNT_NOCONN
) == 0)
1848 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)
1849 (so
, 0, m
, 0, 0, p
);
1851 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)
1852 (so
, 0, m
, mtod(nmp
->nm_nam
, struct sockaddr
*), 0, p
);
1854 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
1856 FSDBG(535, xid
, error
, sent
, cwnd
);
1858 * This is to fix "nfs_sigintr" DSI panics.
1859 * We may have slept during the send so the current
1860 * place in the request queue may have been released.
1861 * Due to zone_gc it may even be part of an
1862 * unrelated newly allocated data structure.
1863 * Restart the list scan from the top if needed...
1865 for (rp
= nfs_reqq
.tqh_first
; rp
;
1866 rp
= rp
->r_chain
.tqe_next
)
1867 if (rp
== rep
&& rp
->r_xid
== xid
)
1872 panic("nfs_timer: race error %d xid 0x%x\n",
1877 if (NFSIGNORE_SOERROR(nmp
->nm_soflags
, error
))
1879 rep
->r_flags
= flags
;
1880 rep
->r_rexmit
= rexmit
;
1881 nmp
->nm_cwnd
= cwnd
;
1882 nmp
->nm_sent
= sent
;
1884 nfsstats
.rpcretries
--;
1889 #ifndef NFS_NOSERVER
1891 * Call the nqnfs server timer once a second to handle leases.
1893 if (lasttime
!= time
.tv_sec
) {
1894 lasttime
= time
.tv_sec
;
1899 * Scan the write gathering queues for writes that need to be
1902 cur_usec
= (u_quad_t
)time
.tv_sec
* 1000000 + (u_quad_t
)time
.tv_usec
;
1903 for (slp
= nfssvc_sockhead
.tqh_first
; slp
!= 0;
1904 slp
= slp
->ns_chain
.tqe_next
) {
1905 if (slp
->ns_tq
.lh_first
&& slp
->ns_tq
.lh_first
->nd_time
<=cur_usec
)
1906 nfsrv_wakenfsd(slp
);
1908 #endif /* NFS_NOSERVER */
1910 timeout(nfs_timer_funnel
, (void *)0, nfs_ticks
);
1916 * Test for a termination condition pending on the process.
1917 * This is used for NFSMNT_INT mounts.
1920 nfs_sigintr(nmp
, rep
, p
)
1921 struct nfsmount
*nmp
;
1923 register struct proc
*p
;
1928 ut
= (struct uthread
*)get_bsdthread_info(current_act());
1930 if (rep
&& (rep
->r_flags
& R_SOFTTERM
))
1932 if (!(nmp
->nm_flag
& NFSMNT_INT
))
1934 if (p
&& ut
&& ut
->uu_siglist
&&
1935 (((ut
->uu_siglist
& ~ut
->uu_sigmask
) & ~p
->p_sigignore
) &
1942 * Lock a socket against others.
1943 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1944 * and also to avoid race conditions between the processes with nfs requests
1945 * in progress when a reconnect is necessary.
1948 nfs_sndlock(flagp
, rep
)
1949 register int *flagp
;
1953 int slpflag
= 0, slptimeo
= 0;
1957 if (rep
->r_nmp
->nm_flag
& NFSMNT_INT
)
1960 p
= (struct proc
*)0;
1961 while (*flagp
& NFSMNT_SNDLOCK
) {
1962 if (nfs_sigintr(rep
->r_nmp
, rep
, p
))
1964 *flagp
|= NFSMNT_WANTSND
;
1965 (void) tsleep((caddr_t
)flagp
, slpflag
| (PZERO
- 1), "nfsndlck",
1967 if (slpflag
== PCATCH
) {
1972 * Make sure while we slept that the mountpoint didn't go away.
1973 * nfs_sigintr and callers expect it in tact.
1976 return (ECONNABORTED
); /* don't have lock until out of loop */
1978 *flagp
|= NFSMNT_SNDLOCK
;
1983 * Unlock the stream socket for others.
1986 nfs_sndunlock(flagp
)
1987 register int *flagp
;
1990 if ((*flagp
& NFSMNT_SNDLOCK
) == 0)
1991 panic("nfs sndunlock");
1992 *flagp
&= ~NFSMNT_SNDLOCK
;
1993 if (*flagp
& NFSMNT_WANTSND
) {
1994 *flagp
&= ~NFSMNT_WANTSND
;
1995 wakeup((caddr_t
)flagp
);
2001 register struct nfsreq
*rep
;
2003 register int *flagp
;
2004 int slpflag
, slptimeo
= 0;
2006 /* make sure we still have our mountpoint */
2008 if (rep
->r_mrep
!= NULL
)
2010 return (ECONNABORTED
);
2013 flagp
= &rep
->r_nmp
->nm_flag
;
2014 FSDBG_TOP(534, rep
->r_xid
, rep
, rep
->r_nmp
, *flagp
);
2015 if (*flagp
& NFSMNT_INT
)
2019 while (*flagp
& NFSMNT_RCVLOCK
) {
2020 if (nfs_sigintr(rep
->r_nmp
, rep
, rep
->r_procp
)) {
2021 FSDBG_BOT(534, rep
->r_xid
, rep
, rep
->r_nmp
, 0x100);
2023 } else if (rep
->r_mrep
!= NULL
) {
2025 * Don't bother sleeping if reply already arrived
2027 FSDBG_BOT(534, rep
->r_xid
, rep
, rep
->r_nmp
, 0x101);
2030 FSDBG(534, rep
->r_xid
, rep
, rep
->r_nmp
, 0x102);
2031 *flagp
|= NFSMNT_WANTRCV
;
2032 (void) tsleep((caddr_t
)flagp
, slpflag
| (PZERO
- 1), "nfsrcvlk",
2034 if (slpflag
== PCATCH
) {
2039 * Make sure while we slept that the mountpoint didn't go away.
2040 * nfs_sigintr and caller nfs_reply expect it intact.
2043 FSDBG_BOT(534, rep
->r_xid
, rep
, rep
->r_nmp
, 0x103);
2044 return (ECONNABORTED
); /* don't have lock until out of loop */
2048 * nfs_reply will handle it if reply already arrived.
2049 * (We may have slept or been preempted while on network funnel).
2051 FSDBG_BOT(534, rep
->r_xid
, rep
, rep
->r_nmp
, *flagp
);
2052 *flagp
|= NFSMNT_RCVLOCK
;
2057 * Unlock the stream socket for others.
2060 nfs_rcvunlock(flagp
)
2061 register int *flagp
;
2064 FSDBG(533, flagp
, *flagp
, 0, 0);
2065 if ((*flagp
& NFSMNT_RCVLOCK
) == 0)
2066 panic("nfs rcvunlock");
2067 *flagp
&= ~NFSMNT_RCVLOCK
;
2068 if (*flagp
& NFSMNT_WANTRCV
) {
2069 *flagp
&= ~NFSMNT_WANTRCV
;
2070 wakeup((caddr_t
)flagp
);
2075 #ifndef NFS_NOSERVER
2077 * Socket upcall routine for the nfsd sockets.
2078 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2079 * Essentially do as much as possible non-blocking, else punt and it will
2080 * be called with M_WAIT from an nfsd.
2083 * Needs to eun under network funnel
2086 nfsrv_rcv(so
, arg
, waitflag
)
2091 register struct nfssvc_sock
*slp
= (struct nfssvc_sock
*)arg
;
2092 register struct mbuf
*m
;
2093 struct mbuf
*mp
, *mhck
;
2094 struct sockaddr
*nam
=0;
2097 struct sockaddr_in
*sin
;
2099 if ((slp
->ns_flag
& SLP_VALID
) == 0)
2103 * Define this to test for nfsds handling this under heavy load.
2105 if (waitflag
== M_DONTWAIT
) {
2106 slp
->ns_flag
|= SLP_NEEDQ
; goto dorecs
;
2109 auio
.uio_procp
= NULL
;
2110 if (so
->so_type
== SOCK_STREAM
) {
2112 * If there are already records on the queue, defer soreceive()
2113 * to an nfsd so that there is feedback to the TCP layer that
2114 * the nfs servers are heavily loaded.
2116 if (slp
->ns_rec
&& waitflag
== M_DONTWAIT
) {
2117 slp
->ns_flag
|= SLP_NEEDQ
;
2124 auio
.uio_resid
= 1000000000;
2125 flags
= MSG_DONTWAIT
;
2126 error
= soreceive(so
, (struct sockaddr
**) 0, &auio
, &mp
, (struct mbuf
**)0, &flags
);
2127 if (error
|| mp
== (struct mbuf
*)0) {
2128 if (error
== EWOULDBLOCK
)
2129 slp
->ns_flag
|= SLP_NEEDQ
;
2131 slp
->ns_flag
|= SLP_DISCONN
;
2135 if (slp
->ns_rawend
) {
2136 slp
->ns_rawend
->m_next
= m
;
2137 slp
->ns_cc
+= 1000000000 - auio
.uio_resid
;
2140 slp
->ns_cc
= 1000000000 - auio
.uio_resid
;
2147 * Now try and parse record(s) out of the raw stream data.
2149 error
= nfsrv_getstream(slp
, waitflag
);
2152 slp
->ns_flag
|= SLP_DISCONN
;
2154 slp
->ns_flag
|= SLP_NEEDQ
;
2158 auio
.uio_resid
= 1000000000;
2159 flags
= MSG_DONTWAIT
;
2161 error
= soreceive(so
, &nam
, &auio
, &mp
,
2162 (struct mbuf
**)0, &flags
);
2166 MGET(mhck
, M_WAIT
, MT_SONAME
);
2167 mhck
->m_len
= nam
->sa_len
;
2168 sin
= mtod(mhck
, struct sockaddr_in
*);
2169 bcopy(nam
, sin
, sizeof(struct sockaddr_in
));
2170 mhck
->m_hdr
.mh_len
= sizeof(struct sockaddr_in
);
2171 FREE(nam
, M_SONAME
);
2178 slp
->ns_recend
->m_nextpkt
= m
;
2182 m
->m_nextpkt
= (struct mbuf
*)0;
2185 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)
2186 && error
!= EWOULDBLOCK
) {
2187 slp
->ns_flag
|= SLP_DISCONN
;
2195 * Now try and process the request records, non-blocking.
2198 if (waitflag
== M_DONTWAIT
&&
2199 (slp
->ns_rec
|| (slp
->ns_flag
& (SLP_NEEDQ
| SLP_DISCONN
)))) {
2200 thread_funnel_switch(NETWORK_FUNNEL
, KERNEL_FUNNEL
);
2201 nfsrv_wakenfsd(slp
);
2202 thread_funnel_switch(KERNEL_FUNNEL
, NETWORK_FUNNEL
);
2207 * Try and extract an RPC request from the mbuf data list received on a
2208 * stream socket. The "waitflag" argument indicates whether or not it
2212 nfsrv_getstream(slp
, waitflag
)
2213 register struct nfssvc_sock
*slp
;
2216 register struct mbuf
*m
, **mpp
;
2217 register char *cp1
, *cp2
;
2219 struct mbuf
*om
, *m2
, *recm
;
2222 if (slp
->ns_flag
& SLP_GETSTREAM
)
2223 panic("nfs getstream");
2224 slp
->ns_flag
|= SLP_GETSTREAM
;
2226 if (slp
->ns_reclen
== 0) {
2227 if (slp
->ns_cc
< NFSX_UNSIGNED
) {
2228 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2232 if (m
->m_len
>= NFSX_UNSIGNED
) {
2233 bcopy(mtod(m
, caddr_t
), (caddr_t
)&recmark
, NFSX_UNSIGNED
);
2234 m
->m_data
+= NFSX_UNSIGNED
;
2235 m
->m_len
-= NFSX_UNSIGNED
;
2237 cp1
= (caddr_t
)&recmark
;
2238 cp2
= mtod(m
, caddr_t
);
2239 while (cp1
< ((caddr_t
)&recmark
) + NFSX_UNSIGNED
) {
2240 while (m
->m_len
== 0) {
2242 cp2
= mtod(m
, caddr_t
);
2249 slp
->ns_cc
-= NFSX_UNSIGNED
;
2250 recmark
= ntohl(recmark
);
2251 slp
->ns_reclen
= recmark
& ~0x80000000;
2252 if (recmark
& 0x80000000)
2253 slp
->ns_flag
|= SLP_LASTFRAG
;
2255 slp
->ns_flag
&= ~SLP_LASTFRAG
;
2256 if (slp
->ns_reclen
< NFS_MINPACKET
|| slp
->ns_reclen
> NFS_MAXPACKET
) {
2257 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2263 * Now get the record part.
2265 * Note that slp->ns_reclen may be 0. Linux sometimes
2266 * generates 0-length RPCs
2269 if (slp
->ns_cc
== slp
->ns_reclen
) {
2271 slp
->ns_raw
= slp
->ns_rawend
= (struct mbuf
*)0;
2272 slp
->ns_cc
= slp
->ns_reclen
= 0;
2273 } else if (slp
->ns_cc
> slp
->ns_reclen
) {
2276 om
= (struct mbuf
*)0;
2277 while (len
< slp
->ns_reclen
) {
2278 if ((len
+ m
->m_len
) > slp
->ns_reclen
) {
2279 m2
= m_copym(m
, 0, slp
->ns_reclen
- len
,
2287 m
->m_data
+= slp
->ns_reclen
- len
;
2288 m
->m_len
-= slp
->ns_reclen
- len
;
2289 len
= slp
->ns_reclen
;
2291 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2292 return (EWOULDBLOCK
);
2294 } else if ((len
+ m
->m_len
) == slp
->ns_reclen
) {
2299 om
->m_next
= (struct mbuf
*)0;
2310 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2315 * Accumulate the fragments into a record.
2317 mpp
= &slp
->ns_frag
;
2319 mpp
= &((*mpp
)->m_next
);
2321 if (slp
->ns_flag
& SLP_LASTFRAG
) {
2323 slp
->ns_recend
->m_nextpkt
= slp
->ns_frag
;
2325 slp
->ns_rec
= slp
->ns_frag
;
2326 slp
->ns_recend
= slp
->ns_frag
;
2327 slp
->ns_frag
= (struct mbuf
*)0;
2333 * Parse an RPC header.
2336 nfsrv_dorec(slp
, nfsd
, ndp
)
2337 register struct nfssvc_sock
*slp
;
2339 struct nfsrv_descript
**ndp
;
2341 register struct mbuf
*m
;
2342 register struct mbuf
*nam
;
2343 register struct nfsrv_descript
*nd
;
2347 if ((slp
->ns_flag
& SLP_VALID
) == 0 ||
2348 (m
= slp
->ns_rec
) == (struct mbuf
*)0)
2350 slp
->ns_rec
= m
->m_nextpkt
;
2352 m
->m_nextpkt
= (struct mbuf
*)0;
2354 slp
->ns_recend
= (struct mbuf
*)0;
2355 if (m
->m_type
== MT_SONAME
) {
2361 MALLOC_ZONE(nd
, struct nfsrv_descript
*,
2362 sizeof (struct nfsrv_descript
), M_NFSRVDESC
, M_WAITOK
);
2363 nd
->nd_md
= nd
->nd_mrep
= m
;
2365 nd
->nd_dpos
= mtod(m
, caddr_t
);
2366 error
= nfs_getreq(nd
, nfsd
, TRUE
);
2369 _FREE_ZONE((caddr_t
)nd
, sizeof *nd
, M_NFSRVDESC
);
2378 * Parse an RPC request
2380 * - fill in the cred struct.
2383 nfs_getreq(nd
, nfsd
, has_header
)
2384 register struct nfsrv_descript
*nd
;
2388 register int len
, i
;
2389 register u_long
*tl
;
2393 caddr_t dpos
, cp2
, cp
;
2394 u_long nfsvers
, auth_type
;
2396 int error
= 0, nqnfs
= 0, ticklen
;
2397 struct mbuf
*mrep
, *md
;
2398 register struct nfsuid
*nuidp
;
2399 struct timeval tvin
, tvout
;
2400 #if 0 /* until encrypted keys are implemented */
2401 NFSKERBKEYSCHED_T keys
; /* stores key schedule */
2408 nfsm_dissect(tl
, u_long
*, 10 * NFSX_UNSIGNED
);
2409 nd
->nd_retxid
= fxdr_unsigned(u_long
, *tl
++);
2410 if (*tl
++ != rpc_call
) {
2415 nfsm_dissect(tl
, u_long
*, 8 * NFSX_UNSIGNED
);
2418 if (*tl
++ != rpc_vers
) {
2419 nd
->nd_repstat
= ERPCMISMATCH
;
2420 nd
->nd_procnum
= NFSPROC_NOOP
;
2423 if (*tl
!= nfs_prog
) {
2424 if (*tl
== nqnfs_prog
)
2427 nd
->nd_repstat
= EPROGUNAVAIL
;
2428 nd
->nd_procnum
= NFSPROC_NOOP
;
2433 nfsvers
= fxdr_unsigned(u_long
, *tl
++);
2434 if (((nfsvers
< NFS_VER2
|| nfsvers
> NFS_VER3
) && !nqnfs
) ||
2435 (nfsvers
!= NQNFS_VER3
&& nqnfs
)) {
2436 nd
->nd_repstat
= EPROGMISMATCH
;
2437 nd
->nd_procnum
= NFSPROC_NOOP
;
2441 nd
->nd_flag
= (ND_NFSV3
| ND_NQNFS
);
2442 else if (nfsvers
== NFS_VER3
)
2443 nd
->nd_flag
= ND_NFSV3
;
2444 nd
->nd_procnum
= fxdr_unsigned(u_long
, *tl
++);
2445 if (nd
->nd_procnum
== NFSPROC_NULL
)
2447 if (nd
->nd_procnum
>= NFS_NPROCS
||
2448 (!nqnfs
&& nd
->nd_procnum
>= NQNFSPROC_GETLEASE
) ||
2449 (!nd
->nd_flag
&& nd
->nd_procnum
> NFSV2PROC_STATFS
)) {
2450 nd
->nd_repstat
= EPROCUNAVAIL
;
2451 nd
->nd_procnum
= NFSPROC_NOOP
;
2454 if ((nd
->nd_flag
& ND_NFSV3
) == 0)
2455 nd
->nd_procnum
= nfsv3_procid
[nd
->nd_procnum
];
2457 len
= fxdr_unsigned(int, *tl
++);
2458 if (len
< 0 || len
> RPCAUTH_MAXSIZ
) {
2463 nd
->nd_flag
&= ~ND_KERBAUTH
;
2465 * Handle auth_unix or auth_kerb.
2467 if (auth_type
== rpc_auth_unix
) {
2468 len
= fxdr_unsigned(int, *++tl
);
2469 if (len
< 0 || len
> NFS_MAXNAMLEN
) {
2473 nfsm_adv(nfsm_rndup(len
));
2474 nfsm_dissect(tl
, u_long
*, 3 * NFSX_UNSIGNED
);
2475 bzero((caddr_t
)&nd
->nd_cr
, sizeof (struct ucred
));
2476 nd
->nd_cr
.cr_ref
= 1;
2477 nd
->nd_cr
.cr_uid
= fxdr_unsigned(uid_t
, *tl
++);
2478 nd
->nd_cr
.cr_gid
= fxdr_unsigned(gid_t
, *tl
++);
2479 len
= fxdr_unsigned(int, *tl
);
2480 if (len
< 0 || len
> RPCAUTH_UNIXGIDS
) {
2484 nfsm_dissect(tl
, u_long
*, (len
+ 2) * NFSX_UNSIGNED
);
2485 for (i
= 1; i
<= len
; i
++)
2487 nd
->nd_cr
.cr_groups
[i
] = fxdr_unsigned(gid_t
, *tl
++);
2490 nd
->nd_cr
.cr_ngroups
= (len
>= NGROUPS
) ? NGROUPS
: (len
+ 1);
2491 if (nd
->nd_cr
.cr_ngroups
> 1)
2492 nfsrvw_sort(nd
->nd_cr
.cr_groups
, nd
->nd_cr
.cr_ngroups
);
2493 len
= fxdr_unsigned(int, *++tl
);
2494 if (len
< 0 || len
> RPCAUTH_MAXSIZ
) {
2499 nfsm_adv(nfsm_rndup(len
));
2500 } else if (auth_type
== rpc_auth_kerb
) {
2501 switch (fxdr_unsigned(int, *tl
++)) {
2502 case RPCAKN_FULLNAME
:
2503 ticklen
= fxdr_unsigned(int, *tl
);
2504 *((u_long
*)nfsd
->nfsd_authstr
) = *tl
;
2505 uio
.uio_resid
= nfsm_rndup(ticklen
) + NFSX_UNSIGNED
;
2506 nfsd
->nfsd_authlen
= uio
.uio_resid
+ NFSX_UNSIGNED
;
2507 if (uio
.uio_resid
> (len
- 2 * NFSX_UNSIGNED
)) {
2514 uio
.uio_segflg
= UIO_SYSSPACE
;
2515 iov
.iov_base
= (caddr_t
)&nfsd
->nfsd_authstr
[4];
2516 iov
.iov_len
= RPCAUTH_MAXSIZ
- 4;
2517 nfsm_mtouio(&uio
, uio
.uio_resid
);
2518 nfsm_dissect(tl
, u_long
*, 2 * NFSX_UNSIGNED
);
2519 if (*tl
++ != rpc_auth_kerb
||
2520 fxdr_unsigned(int, *tl
) != 4 * NFSX_UNSIGNED
) {
2521 printf("Bad kerb verifier\n");
2522 nd
->nd_repstat
= (NFSERR_AUTHERR
|AUTH_BADVERF
);
2523 nd
->nd_procnum
= NFSPROC_NOOP
;
2526 nfsm_dissect(cp
, caddr_t
, 4 * NFSX_UNSIGNED
);
2528 if (fxdr_unsigned(int, *tl
) != RPCAKN_FULLNAME
) {
2529 printf("Not fullname kerb verifier\n");
2530 nd
->nd_repstat
= (NFSERR_AUTHERR
|AUTH_BADVERF
);
2531 nd
->nd_procnum
= NFSPROC_NOOP
;
2534 cp
+= NFSX_UNSIGNED
;
2535 bcopy(cp
, nfsd
->nfsd_verfstr
, 3 * NFSX_UNSIGNED
);
2536 nfsd
->nfsd_verflen
= 3 * NFSX_UNSIGNED
;
2537 nd
->nd_flag
|= ND_KERBFULL
;
2538 nfsd
->nfsd_flag
|= NFSD_NEEDAUTH
;
2540 case RPCAKN_NICKNAME
:
2541 if (len
!= 2 * NFSX_UNSIGNED
) {
2542 printf("Kerb nickname short\n");
2543 nd
->nd_repstat
= (NFSERR_AUTHERR
|AUTH_BADCRED
);
2544 nd
->nd_procnum
= NFSPROC_NOOP
;
2547 nickuid
= fxdr_unsigned(uid_t
, *tl
);
2548 nfsm_dissect(tl
, u_long
*, 2 * NFSX_UNSIGNED
);
2549 if (*tl
++ != rpc_auth_kerb
||
2550 fxdr_unsigned(int, *tl
) != 3 * NFSX_UNSIGNED
) {
2551 printf("Kerb nick verifier bad\n");
2552 nd
->nd_repstat
= (NFSERR_AUTHERR
|AUTH_BADVERF
);
2553 nd
->nd_procnum
= NFSPROC_NOOP
;
2556 nfsm_dissect(tl
, u_long
*, 3 * NFSX_UNSIGNED
);
2557 tvin
.tv_sec
= *tl
++;
2560 for (nuidp
= NUIDHASH(nfsd
->nfsd_slp
,nickuid
)->lh_first
;
2561 nuidp
!= 0; nuidp
= nuidp
->nu_hash
.le_next
) {
2562 if (nuidp
->nu_cr
.cr_uid
== nickuid
&&
2564 netaddr_match(NU_NETFAM(nuidp
),
2565 &nuidp
->nu_haddr
, nd
->nd_nam2
)))
2570 (NFSERR_AUTHERR
|AUTH_REJECTCRED
);
2571 nd
->nd_procnum
= NFSPROC_NOOP
;
2576 * Now, decrypt the timestamp using the session key
2583 tvout
.tv_sec
= fxdr_unsigned(long, tvout
.tv_sec
);
2584 tvout
.tv_usec
= fxdr_unsigned(long, tvout
.tv_usec
);
2585 if (nuidp
->nu_expire
< time
.tv_sec
||
2586 nuidp
->nu_timestamp
.tv_sec
> tvout
.tv_sec
||
2587 (nuidp
->nu_timestamp
.tv_sec
== tvout
.tv_sec
&&
2588 nuidp
->nu_timestamp
.tv_usec
> tvout
.tv_usec
)) {
2589 nuidp
->nu_expire
= 0;
2591 (NFSERR_AUTHERR
|AUTH_REJECTVERF
);
2592 nd
->nd_procnum
= NFSPROC_NOOP
;
2595 nfsrv_setcred(&nuidp
->nu_cr
, &nd
->nd_cr
);
2596 nd
->nd_flag
|= ND_KERBNICK
;
2599 nd
->nd_repstat
= (NFSERR_AUTHERR
| AUTH_REJECTCRED
);
2600 nd
->nd_procnum
= NFSPROC_NOOP
;
2605 * For nqnfs, get piggybacked lease request.
2607 if (nqnfs
&& nd
->nd_procnum
!= NQNFSPROC_EVICTED
) {
2608 nfsm_dissect(tl
, u_long
*, NFSX_UNSIGNED
);
2609 nd
->nd_flag
|= fxdr_unsigned(int, *tl
);
2610 if (nd
->nd_flag
& ND_LEASE
) {
2611 nfsm_dissect(tl
, u_long
*, NFSX_UNSIGNED
);
2612 nd
->nd_duration
= fxdr_unsigned(int, *tl
);
2614 nd
->nd_duration
= NQ_MINLEASE
;
2616 nd
->nd_duration
= NQ_MINLEASE
;
2625 * Search for a sleeping nfsd and wake it up.
2626 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
2627 * running nfsds will go look for the work in the nfssvc_sock list.
2631 struct nfssvc_sock
*slp
;
2633 register struct nfsd
*nd
;
2635 if ((slp
->ns_flag
& SLP_VALID
) == 0)
2637 for (nd
= nfsd_head
.tqh_first
; nd
!= 0; nd
= nd
->nfsd_chain
.tqe_next
) {
2638 if (nd
->nfsd_flag
& NFSD_WAITING
) {
2639 nd
->nfsd_flag
&= ~NFSD_WAITING
;
2641 panic("nfsd wakeup");
2644 wakeup((caddr_t
)nd
);
2648 slp
->ns_flag
|= SLP_DOREC
;
2649 nfsd_head_flag
|= NFSD_CHECKSLP
;
2651 #endif /* NFS_NOSERVER */
2654 nfs_msg(p
, server
, msg
)
2661 tpr
= tprintf_open(p
);
2664 tprintf(tpr
, "nfs server %s: %s\n", server
, msg
);