/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
*
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
#include <sys/time.h>
#include <kern/clock.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <sys/user.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <nfs/nfsrtt.h>
#include <nfs/nqnfs.h>
+#include <sys/kdebug.h>
+
+#define FSDBG(A, B, C, D, E) \
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
+ (int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_TOP(A, B, C, D, E) \
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
+ (int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_BOT(A, B, C, D, E) \
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
+ (int)(B), (int)(C), (int)(D), (int)(E), 0)
+
#define TRUE 1
#define FALSE 0
extern struct nfsstats nfsstats;
extern int nfsv3_procid[NFS_NPROCS];
extern int nfs_ticks;
+extern u_long nfs_xidwrap;
/*
* Defines which timer to use for the procnum.
int nfsrtton = 0;
struct nfsrtt nfsrtt;
-static int nfs_msg __P((struct proc *,char *,char *));
+static int nfs_msg __P((struct proc *, const char *, const char *, int));
+static void nfs_up(struct nfsreq *, const char *, int);
+static void nfs_down(struct nfsreq *, const char *, int);
static int nfs_rcvlock __P((struct nfsreq *));
-static void nfs_rcvunlock __P((int *flagp));
+static void nfs_rcvunlock __P((struct nfsreq *));
static int nfs_receive __P((struct nfsreq *rep, struct mbuf **aname,
struct mbuf **mp));
static int nfs_reconnect __P((struct nfsreq *rep));
+static void nfs_repbusy(struct nfsreq *rep);
+static struct nfsreq * nfs_repnext(struct nfsreq *rep);
+static void nfs_repdequeue(struct nfsreq *rep);
+
+/* XXX */
+boolean_t current_thread_aborted(void);
+kern_return_t thread_terminate(thread_act_t);
+
#ifndef NFS_NOSERVER
static int nfsrv_getstream __P((struct nfssvc_sock *,int));
};
#endif /* NFS_NOSERVER */
+/*
+ * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG)
+ * But some of this code may prove useful someday...
+ */
+#undef NFSDIAG
#if NFSDIAG
int nfstraceindx = 0;
struct nfstracerec nfstracebuf[NFSTBUFSIZ] = {{0,0,0,0}};
}
#endif /* NFSDIAG */
+
+/*
+ * attempt to bind a socket to a reserved port
+ */
+static int
+nfs_bind_resv(struct nfsmount *nmp)
+{
+ struct socket *so = nmp->nm_so;
+ struct sockaddr_in sin;
+ int error;
+ u_short tport;
+
+ if (!so)
+ return (EINVAL);
+
+ sin.sin_len = sizeof (struct sockaddr_in);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = INADDR_ANY;
+ tport = IPPORT_RESERVED - 1;
+ sin.sin_port = htons(tport);
+
+ while (((error = sobind(so, (struct sockaddr *) &sin)) == EADDRINUSE) &&
+ (--tport > IPPORT_RESERVED / 2))
+ sin.sin_port = htons(tport);
+ return (error);
+}
+
+/*
+ * variables for managing the nfs_bind_resv_thread
+ */
+int nfs_resv_mounts = 0;
+static int nfs_bind_resv_thread_state = 0;
+#define NFS_BIND_RESV_THREAD_STATE_INITTED 1
+#define NFS_BIND_RESV_THREAD_STATE_RUNNING 2
+static struct slock nfs_bind_resv_slock;
+struct nfs_bind_resv_request {
+ TAILQ_ENTRY(nfs_bind_resv_request) brr_chain;
+ struct nfsmount *brr_nmp;
+ int brr_error;
+};
+static TAILQ_HEAD(, nfs_bind_resv_request) nfs_bind_resv_request_queue;
+
+/*
+ * thread to handle any reserved port bind requests
+ */
+static void
+nfs_bind_resv_thread(void)
+{
+ struct nfs_bind_resv_request *brreq;
+ boolean_t funnel_state;
+
+ funnel_state = thread_funnel_set(network_flock, TRUE);
+ nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
+
+ while (nfs_resv_mounts > 0) {
+ simple_lock(&nfs_bind_resv_slock);
+ while ((brreq = TAILQ_FIRST(&nfs_bind_resv_request_queue))) {
+ TAILQ_REMOVE(&nfs_bind_resv_request_queue, brreq, brr_chain);
+ simple_unlock(&nfs_bind_resv_slock);
+ brreq->brr_error = nfs_bind_resv(brreq->brr_nmp);
+ wakeup(brreq);
+ simple_lock(&nfs_bind_resv_slock);
+ }
+ simple_unlock(&nfs_bind_resv_slock);
+ (void)tsleep((caddr_t)&nfs_bind_resv_request_queue, PSOCK,
+ "nfs_bind_resv_request_queue", 0);
+ }
+
+ nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
+ (void) thread_funnel_set(network_flock, funnel_state);
+ (void) thread_terminate(current_act());
+}
+
+int
+nfs_bind_resv_thread_wake(void)
+{
+ if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING)
+ return (EIO);
+ wakeup(&nfs_bind_resv_request_queue);
+ return (0);
+}
+
+/*
+ * underprivileged procs call this to request nfs_bind_resv_thread
+ * to perform the reserved port binding for them.
+ */
+static int
+nfs_bind_resv_nopriv(struct nfsmount *nmp)
+{
+ struct nfs_bind_resv_request brreq;
+ int error;
+
+ if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING) {
+ if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_INITTED) {
+ simple_lock_init(&nfs_bind_resv_slock);
+ TAILQ_INIT(&nfs_bind_resv_request_queue);
+ nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED;
+ }
+ kernel_thread(kernel_task, nfs_bind_resv_thread);
+ nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING;
+ }
+
+ brreq.brr_nmp = nmp;
+ brreq.brr_error = 0;
+
+ simple_lock(&nfs_bind_resv_slock);
+ TAILQ_INSERT_TAIL(&nfs_bind_resv_request_queue, &brreq, brr_chain);
+ simple_unlock(&nfs_bind_resv_slock);
+
+ error = nfs_bind_resv_thread_wake();
+ if (error) {
+ TAILQ_REMOVE(&nfs_bind_resv_request_queue, &brreq, brr_chain);
+ /* Note: we might be able to simply restart the thread */
+ return (error);
+ }
+
+ (void) tsleep((caddr_t)&brreq, PSOCK, "nfsbindresv", 0);
+
+ return (brreq.brr_error);
+}
+
/*
* Initialize sockets and congestion for a new NFS connection.
* We do not free the sockaddr if error.
*/
int
nfs_connect(nmp, rep)
- register struct nfsmount *nmp;
+ struct nfsmount *nmp;
struct nfsreq *rep;
{
- register struct socket *so;
+ struct socket *so;
int s, error, rcvreserve, sndreserve;
struct sockaddr *saddr;
- struct sockaddr_in sin;
- u_short tport;
thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
nmp->nm_so = (struct socket *)0;
* Some servers require that the client port be a reserved port number.
*/
if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
- sin.sin_len = sizeof (struct sockaddr_in);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = INADDR_ANY;
- tport = IPPORT_RESERVED - 1;
- sin.sin_port = htons(tport);
-
- while ((error = sobind(so, (struct sockaddr *) &sin) == EADDRINUSE) &&
- (--tport > IPPORT_RESERVED / 2))
- sin.sin_port = htons(tport);
- if (error) {
- goto bad;
+ struct proc *p;
+ /*
+ * sobind() requires current_proc() to have superuser privs.
+ * If this bind is part of a reconnect, and the current proc
+ * doesn't have superuser privs, we hand the sobind() off to
+ * a kernel thread to process.
+ */
+ if ((nmp->nm_state & NFSSTA_MOUNTED) &&
+ (p = current_proc()) && suser(p->p_ucred, &p->p_acflag)) {
+ /* request nfs_bind_resv_thread() to do bind */
+ error = nfs_bind_resv_nopriv(nmp);
+ } else {
+ error = nfs_bind_resv(nmp);
}
+ if (error)
+ goto bad;
}
/*
}
splx(s);
}
+ /*
+ * Always time out on recieve, this allows us to reconnect the
+ * socket to deal with network changes.
+ */
+ so->so_rcv.sb_timeo = (2 * hz);
if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
- so->so_rcv.sb_timeo = (5 * hz);
so->so_snd.sb_timeo = (5 * hz);
} else {
- so->so_rcv.sb_timeo = 0;
so->so_snd.sb_timeo = 0;
}
if (nmp->nm_sotype == SOCK_DGRAM) {
- sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
- rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
+ (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
} else if (nmp->nm_sotype == SOCK_SEQPACKET) {
- sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
- rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
+ (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
} else {
if (nmp->nm_sotype != SOCK_STREAM)
panic("nfscon sotype");
int val;
bzero(&sopt, sizeof sopt);
+ sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = SOL_SOCKET;
sopt.sopt_name = SO_KEEPALIVE;
sopt.sopt_val = &val;
int val;
bzero(&sopt, sizeof sopt);
+ sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = IPPROTO_TCP;
sopt.sopt_name = TCP_NODELAY;
sopt.sopt_val = &val;
sosetopt(so, &sopt);
}
- sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
- * 2;
- rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
- * 2;
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 3;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) *
+ (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
}
+ if (sndreserve > NFS_MAXSOCKBUF)
+ sndreserve = NFS_MAXSOCKBUF;
+ if (rcvreserve > NFS_MAXSOCKBUF)
+ rcvreserve = NFS_MAXSOCKBUF;
error = soreserve(so, sndreserve, rcvreserve);
if (error) {
goto bad;
nmp->nm_sdrtt[3] = 0;
nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
nmp->nm_sent = 0;
- NFSTRACE4(NFSTRC_CWND_INIT, nmp, nmp->nm_flag, nmp->nm_soflags,
- nmp->nm_cwnd);
+ FSDBG(529, nmp, nmp->nm_state, nmp->nm_soflags, nmp->nm_cwnd);
nmp->nm_timeouts = 0;
return (0);
while ((error = nfs_connect(nmp, rep))) {
if (error == EINTR || error == ERESTART)
return (EINTR);
+ if (error == EIO)
+ return (EIO);
+ nfs_down(rep, "can not connect", error);
+ if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
+ /* we're not yet completely mounted and */
+ /* we can't reconnect, so we fail */
+ return (error);
+ }
+ if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp)))
+ return (error);
(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
}
* Loop through outstanding request list and fix up all requests
* on old socket.
*/
- for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) {
+ TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
if (rp->r_nmp == nmp)
rp->r_flags |= R_MUSTRESEND;
}
struct nfsreq *rep;
{
struct sockaddr *sendnam;
- int error, soflags, flags;
+ int error, error2, soflags, flags;
int xidqueued = 0;
struct nfsreq *rp;
char savenametolog[MNAMELEN];
if (rep) {
- if (rep->r_flags & R_SOFTTERM) {
+ error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
+ if (error) {
m_freem(top);
- return (EINTR);
+ return (error);
}
if ((so = rep->r_nmp->nm_so) == NULL) {
rep->r_flags |= R_MUSTRESEND;
}
rep->r_flags &= ~R_MUSTRESEND;
soflags = rep->r_nmp->nm_soflags;
- for (rp = nfs_reqq.tqh_first; rp; rp = rp->r_chain.tqe_next)
+ TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
if (rp == rep)
break;
if (rp)
if (error) {
if (rep) {
if (xidqueued) {
- for (rp = nfs_reqq.tqh_first; rp;
- rp = rp->r_chain.tqe_next)
+ TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
if (rp == rep && rp->r_xid == xidqueued)
break;
if (!rp)
/*
* Deal with errors for the client side.
*/
- if (rep->r_flags & R_SOFTTERM)
- error = EINTR;
- else {
+ error2 = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
+ if (error2) {
+ error = error2;
+ } else {
rep->r_flags |= R_MUSTRESEND;
NFS_DPF(DUP,
("nfs_send RESEND error=%d\n", error));
/*
* Handle any recoverable (soft) socket errors here. (???)
*/
- if (error != EINTR && error != ERESTART &&
- error != EWOULDBLOCK && error != EPIPE)
+ if (error != EINTR && error != ERESTART && error != EIO &&
+ error != EWOULDBLOCK && error != EPIPE) {
error = 0;
+ }
}
return (error);
}
struct sockaddr *tmp_nam;
struct mbuf *mhck;
struct sockaddr_in *sin;
- int error, sotype, rcvflg;
+ int error, error2, sotype, rcvflg;
struct proc *p = current_proc(); /* XXX */
/*
* until we have an entire rpc request/reply.
*/
if (sotype != SOCK_DGRAM) {
- error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+ error = nfs_sndlock(rep);
if (error)
return (error);
tryagain:
* attempt that has essentially shut down this
* mount point.
*/
- if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
- nfs_sndunlock(&rep->r_nmp->nm_flag);
+ if ((error = nfs_sigintr(rep->r_nmp, rep, p)) || rep->r_mrep) {
+ nfs_sndunlock(rep);
+ if (error)
+ return (error);
return (EINTR);
}
so = rep->r_nmp->nm_so;
if (!so) {
error = nfs_reconnect(rep);
if (error) {
- nfs_sndunlock(&rep->r_nmp->nm_flag);
+ nfs_sndunlock(rep);
return (error);
}
goto tryagain;
if (error) {
if (error == EINTR || error == ERESTART ||
(error = nfs_reconnect(rep))) {
- nfs_sndunlock(&rep->r_nmp->nm_flag);
+ nfs_sndunlock(rep);
return (error);
}
goto tryagain;
}
}
- nfs_sndunlock(&rep->r_nmp->nm_flag);
+ nfs_sndunlock(rep);
if (sotype == SOCK_STREAM) {
aio.iov_base = (caddr_t) &len;
aio.iov_len = sizeof(u_long);
thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
error = soreceive(so, (struct sockaddr **)0, &auio,
(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
- thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
+ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
if (!rep->r_nmp) /* if unmounted then bailout */
goto shutout;
if (error == EWOULDBLOCK && rep) {
- if (rep->r_flags & R_SOFTTERM)
- return (EINTR);
+ error2 = nfs_sigintr(rep->r_nmp, rep, p);
+ if (error2)
+ error = error2;
}
} while (error == EWOULDBLOCK);
if (!error && auio.uio_resid > 0) {
rcvflg = 0;
error = soreceive(so, (struct sockaddr **)0,
&auio, mp, &control, &rcvflg);
+ if (control)
+ m_freem(control);
if (!rep->r_nmp) /* if unmounted then bailout */ {
thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
goto shutout;
}
- if (control)
- m_freem(control);
if (error == EWOULDBLOCK && rep) {
- if (rep->r_flags & R_SOFTTERM) {
- thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
- return (EINTR);
+ error2 = nfs_sigintr(rep->r_nmp, rep, p);
+ if (error2) {
+ thread_funnel_switch(NETWORK_FUNNEL,
+ KERNEL_FUNNEL);
+ return (error2);
}
}
} while (error == EWOULDBLOCK ||
"receive error %d from nfs server %s\n",
error,
rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
- error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+ error = nfs_sndlock(rep);
if (!error)
error = nfs_reconnect(rep);
if (!error)
goto tryagain;
}
} else {
- if ((so = rep->r_nmp->nm_so) == NULL)
- return (EACCES);
+ /*
+ * We could have failed while rebinding the datagram socket
+ * so we need to attempt to rebind here.
+ */
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ error = nfs_sndlock(rep);
+ if (!error) {
+ error = nfs_reconnect(rep);
+ nfs_sndunlock(rep);
+ }
+ if (error)
+ return (error);
+ if (!rep->r_nmp) /* if unmounted then bailout */
+ return (ENXIO);
+ so = rep->r_nmp->nm_so;
+ }
if (so->so_state & SS_ISCONNECTED)
getnam = (struct sockaddr **)0;
else
FREE(*getnam, M_SONAME);
*aname = mhck;
}
- if (!rep->r_nmp) /* if unmounted then bailout */ {
- thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
- goto shutout;
- }
-
- if (error == EWOULDBLOCK &&
- (rep->r_flags & R_SOFTTERM)) {
- thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
- return (EINTR);
+ if (!rep->r_nmp) /* if unmounted then bailout */
+ goto dgramout;
+ if (error) {
+ error2 = nfs_sigintr(rep->r_nmp, rep, p);
+ if (error2) {
+ error = error2;
+ goto dgramout;
+ }
+ }
+ /* Reconnect for all errors. We may be receiving
+ * soft/hard/blocking errors because of a network
+ * change.
+ * XXX: we should rate limit or delay this
+ * to once every N attempts or something.
+ * although TCP doesn't seem to.
+ */
+ if (error) {
+ thread_funnel_switch(NETWORK_FUNNEL,
+ KERNEL_FUNNEL);
+ error2 = nfs_sndlock(rep);
+ if (!error2) {
+ error2 = nfs_reconnect(rep);
+ if (error2)
+ error = error2;
+ else if (!rep->r_nmp) /* if unmounted then bailout */
+ error = ENXIO;
+ else
+ so = rep->r_nmp->nm_so;
+ nfs_sndunlock(rep);
+ } else {
+ error = error2;
+ }
+ thread_funnel_switch(KERNEL_FUNNEL,
+ NETWORK_FUNNEL);
}
} while (error == EWOULDBLOCK);
+dgramout:
thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
len -= auio.uio_resid;
}
return (0);
if (error)
return (error);
-
- /*
- * This is being checked after nfs_receive, but
- * it doesn't hurt to check prior, since nfs_receive
- * will dereference r_nmp also. Bullet-proofing code
- * since changing funnels since the request to the
- * receive can leave us vulnerable for kernel to unmount
- * us.
- */
- if (!myrep->r_nmp) {
- NFSTRACE4(NFSTRC_ECONN, myrep->r_xid, myrep, nmp, 1);
- return (ECONNABORTED);
- }
+
/*
* If we slept after putting bits otw, then reply may have
* arrived. In which case returning is required, or we
* would hang trying to nfs_receive an already received reply.
*/
if (myrep->r_mrep != NULL) {
- nfs_rcvunlock(&nmp->nm_flag);
- NFSTRACE4(NFSTRC_RCVALREADY, myrep->r_xid, myrep,
- myrep->r_nmp, 2);
+ nfs_rcvunlock(myrep);
+ FSDBG(530, myrep->r_xid, myrep, myrep->r_nmp, -1);
return (0);
}
/*
- * Get the next Rpc reply off the socket
+ * Get the next Rpc reply off the socket. Assume myrep->r_nmp
+ * is still intact by checks done in nfs_rcvlock.
*/
error = nfs_receive(myrep, &nam, &mrep);
+ if (nam)
+ m_freem(nam);
/*
- * Bailout asap if nfsmount struct gone (unmounted)
+ * Bailout asap if nfsmount struct gone (unmounted).
*/
- if (!myrep->r_nmp) {
- NFSTRACE4(NFSTRC_ECONN, myrep->r_xid, myrep, nmp, 2);
- return (ECONNABORTED);
+ if (!myrep->r_nmp || !nmp->nm_so) {
+ FSDBG(530, myrep->r_xid, myrep, nmp, -2);
+ return (ENXIO);
}
if (error) {
- NFSTRACE4(NFSTRC_RCVERR, myrep->r_xid, myrep, nmp,
- error);
- nfs_rcvunlock(&nmp->nm_flag);
+ FSDBG(530, myrep->r_xid, myrep, nmp, error);
+ nfs_rcvunlock(myrep);
+
+ /* Bailout asap if nfsmount struct gone (unmounted). */
+ if (!myrep->r_nmp || !nmp->nm_so)
+ return (ENXIO);
/*
* Ignore routing errors on connectionless protocols??
}
return (error);
}
- if (nam)
- m_freem(nam);
/*
* We assume all is fine, but if we did not have an error
* just check here and get out. (ekn)
*/
if (!mrep) {
- NFSTRACE4(NFSTRC_ECONN, myrep->r_xid, myrep, nmp, 3);
- return (ECONNABORTED); /* sounds good */
+ FSDBG(530, myrep->r_xid, myrep, nmp, -3);
+ return (ENXIO); /* sounds good */
}
/*
m_freem(mrep);
#endif
nfsmout:
- if (nmp->nm_flag & NFSMNT_RCVLOCK)
- nfs_rcvunlock(&nmp->nm_flag);
+ if (nmp->nm_state & NFSSTA_RCVLOCK)
+ nfs_rcvunlock(myrep);
if (myrep->r_flags & R_GETONEREP)
return (0); /* this path used by NQNFS */
continue;
* Loop through the request list to match up the reply
* Iff no match, just drop the datagram
*/
- for (rep = nfs_reqq.tqh_first; rep != 0;
- rep = rep->r_chain.tqe_next) {
+ TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
if (rep->r_mrep == NULL && rxid == rep->r_xid) {
/* Found it.. */
rep->r_mrep = mrep;
rep->r_md = md;
rep->r_dpos = dpos;
+ /*
+ * If we're tracking the round trip time
+ * then we update the circular log here
+ * with the stats from our current request.
+ */
if (nfsrtton) {
struct rttl *rt;
rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
- rt->tstamp = time;
+ microtime(&rt->tstamp); // XXX unused
if (rep->r_flags & R_TIMING)
rt->rtt = rep->r_rtt;
else
* Do the additive increase of
* one rpc/rtt.
*/
- NFSTRACE4(NFSTRC_CWND_REPLY, rep->r_xid, rep,
- nmp->nm_sent, nmp->nm_cwnd);
+ FSDBG(530, rep->r_xid, rep, nmp->nm_sent,
+ nmp->nm_cwnd);
if (nmp->nm_cwnd <= nmp->nm_sent) {
nmp->nm_cwnd +=
(NFS_CWNDSCALE * NFS_CWNDSCALE +
if (nmp->nm_cwnd > NFS_MAXCWND)
nmp->nm_cwnd = NFS_MAXCWND;
}
- if (!(rep->r_flags & R_SENT))
- printf("nfs_reply: unsent xid=%x",
- rep->r_xid);
- rep->r_flags &= ~R_SENT;
- nmp->nm_sent -= NFS_CWNDSCALE;
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_SENT;
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ }
/*
* Update rtt using a gain of 0.125 on the mean
* and a gain of 0.25 on the deviation.
break;
}
}
- nfs_rcvunlock(&nmp->nm_flag);
+ nfs_rcvunlock(myrep);
/*
* If not matched to a request, drop it.
* If it's mine, get out.
panic("nfs_reply: nil r_mrep");
return (0);
}
- NFSTRACE4(NFSTRC_NOTMINE, myrep->r_xid, myrep, rep,
- rep ? rep->r_xid : myrep->r_flags);
+ FSDBG(530, myrep->r_xid, myrep, rep,
+ rep ? rep->r_xid : myrep->r_flags);
if (myrep->r_flags & R_GETONEREP)
return (0); /* this path used by NQNFS */
}
* nb: always frees up mreq mbuf list
*/
int
-nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
+nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp)
struct vnode *vp;
struct mbuf *mrest;
int procnum;
struct mbuf **mrp;
struct mbuf **mdp;
caddr_t *dposp;
+ u_int64_t *xidp;
{
- register struct mbuf *m, *mrep;
+ register struct mbuf *m, *mrep, *m2;
register struct nfsreq *rep, *rp;
register u_long *tl;
register int i;
u_quad_t frev;
char *auth_str, *verf_str;
NFSKERBKEY_T key; /* save session key */
+ int nmsotype;
+ struct timeval now;
+
+ if (xidp)
+ *xidp = 0;
- nmp = VFSTONFS(vp->v_mount);
MALLOC_ZONE(rep, struct nfsreq *,
sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
- NFSTRACE4(NFSTRC_REQ, vp, procnum, nmp, rep);
- /*
- * make sure if we blocked above, that the file system didn't get
- * unmounted leaving nmp bogus value to trip on later and crash.
- * Note nfs_unmount will set rep->r_nmp if unmounted volume, but we
- * aren't that far yet. SO this is best we can do. I wanted to check
- * for vp->v_mount = 0 also below, but that caused reboot crash.
- * Something must think it's okay for vp-v_mount=0 during booting.
- * Thus the best I can do here is see if we still have a vnode.
- */
-
- if (vp->v_type == VBAD) {
- NFSTRACE4(NFSTRC_VBAD, 1, vp, nmp, rep);
- _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
- return (EINVAL);
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp == NULL ||
+ (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
+ (NFSSTA_FORCE|NFSSTA_TIMEO)) {
+ FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
+ return (ENXIO);
}
+ nmsotype = nmp->nm_sotype;
+
+ FSDBG_TOP(531, vp, procnum, nmp, rep);
+
rep->r_nmp = nmp;
rep->r_vp = vp;
rep->r_procp = procp;
rep->r_procnum = procnum;
+ microuptime(&now);
+ rep->r_lastmsg = now.tv_sec -
+ ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
i = 0;
m = mrest;
while (m) {
* Get the RPC header with authorization.
*/
kerbauth:
+ nmp = VFSTONFS(vp->v_mount);
+ if (!nmp) {
+ FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
+ FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
+ return (ENXIO);
+ }
verf_str = auth_str = (char *)0;
if (nmp->nm_flag & NFSMNT_KERB) {
verf_str = nickv;
bzero((caddr_t)key, sizeof (key));
if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
&auth_len, verf_str, verf_len)) {
+ nmp = VFSTONFS(vp->v_mount);
+ if (!nmp) {
+ FSDBG_BOT(531, 2, vp, error, rep);
+ FREE_ZONE((caddr_t)rep,
+ sizeof (struct nfsreq), M_NFSREQ);
+ m_freem(mrest);
+ return (ENXIO);
+ }
error = nfs_getauth(nmp, rep, cred, &auth_str,
&auth_len, verf_str, &verf_len, key);
+ nmp = VFSTONFS(vp->v_mount);
+ if (!error && !nmp)
+ error = ENXIO;
if (error) {
- _FREE_ZONE((caddr_t)rep,
+ FSDBG_BOT(531, 2, vp, error, rep);
+ FREE_ZONE((caddr_t)rep,
sizeof (struct nfsreq), M_NFSREQ);
m_freem(mrest);
return (error);
}
m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
+ if (xidp)
+ *xidp = ntohl(xid) + ((u_int64_t)nfs_xidwrap << 32);
if (auth_str)
_FREE(auth_str, M_TEMP);
/*
* For stream protocols, insert a Sun RPC Record Mark.
*/
- if (nmp->nm_sotype == SOCK_STREAM) {
+ if (nmsotype == SOCK_STREAM) {
M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
*mtod(m, u_long *) = htonl(0x80000000 |
(m->m_pkthdr.len - NFSX_UNSIGNED));
rep->r_mreq = m;
rep->r_xid = xid;
tryagain:
- if (nmp->nm_flag & NFSMNT_SOFT)
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp && (nmp->nm_flag & NFSMNT_SOFT))
rep->r_retry = nmp->nm_retry;
else
rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
/* Get send time for nqnfs */
- reqtime = time.tv_sec;
+ microtime(&now);
+ reqtime = now.tv_sec;
/*
* If backing off another request or avoiding congestion, don't
* send this one now but let timer do it. If not timing a request,
* do it now.
*/
- if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
+ if (nmp && nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
(nmp->nm_flag & NFSMNT_DUMBTIMR) ||
nmp->nm_sent < nmp->nm_cwnd)) {
+ int connrequired = (nmp->nm_soflags & PR_CONNREQUIRED);
+
splx(s);
- if (nmp->nm_soflags & PR_CONNREQUIRED)
- error = nfs_sndlock(&nmp->nm_flag, rep);
+ if (connrequired)
+ error = nfs_sndlock(rep);
/*
* Set the R_SENT before doing the send in case another thread
*/
if (!error) {
if ((rep->r_flags & R_MUSTRESEND) == 0) {
- NFSTRACE4(NFSTRC_CWND_REQ1, rep->r_xid, rep,
- nmp->nm_sent, nmp->nm_cwnd);
+ FSDBG(531, rep->r_xid, rep, nmp->nm_sent,
+ nmp->nm_cwnd);
nmp->nm_sent += NFS_CWNDSCALE;
rep->r_flags |= R_SENT;
}
- m = m_copym(m, 0, M_COPYALL, M_WAIT);
- error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
- if (nmp->nm_soflags & PR_CONNREQUIRED)
- nfs_sndunlock(&nmp->nm_flag);
+ m2 = m_copym(m, 0, M_COPYALL, M_WAIT);
+ error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
+ if (connrequired)
+ nfs_sndunlock(rep);
}
+ nmp = VFSTONFS(vp->v_mount);
if (error) {
- nmp->nm_sent -= NFS_CWNDSCALE;
+ if (nmp)
+ nmp->nm_sent -= NFS_CWNDSCALE;
rep->r_flags &= ~R_SENT;
}
} else {
/*
* RPC done, unlink the request.
*/
- s = splsoftclock();
- for (rp = nfs_reqq.tqh_first; rp;
- rp = rp->r_chain.tqe_next)
- if (rp == rep && rp->r_xid == xid)
- break;
- if (!rp)
- panic("nfs_request race, rep %x xid %x", rep, xid);
- TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
- splx(s);
+ nfs_repdequeue(rep);
+
+ nmp = VFSTONFS(vp->v_mount);
/*
* Decrement the outstanding request count.
*/
if (rep->r_flags & R_SENT) {
- NFSTRACE4(NFSTRC_CWND_REQ2, rep->r_xid, rep, nmp->nm_sent,
- nmp->nm_cwnd);
rep->r_flags &= ~R_SENT; /* paranoia */
- nmp->nm_sent -= NFS_CWNDSCALE;
+ if (nmp) {
+ FSDBG(531, rep->r_xid, rep, nmp->nm_sent, nmp->nm_cwnd);
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ }
}
/*
* If there was a successful reply and a tprintf msg.
* tprintf a response.
*/
- if (!error && (rep->r_flags & R_TPRINTFMSG))
- nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
- "is alive again");
+ nfs_up(rep, "is alive again", error);
mrep = rep->r_mrep;
md = rep->r_md;
dpos = rep->r_dpos;
+ if (!error && !nmp)
+ error = ENXIO;
if (error) {
m_freem(rep->r_mreq);
- NFSTRACE4(NFSTRC_REQERR, error, rep->r_xid, nmp, rep);
- _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
+ FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
+ FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
return (error);
}
error = EACCES;
m_freem(mrep);
m_freem(rep->r_mreq);
- NFSTRACE4(NFSTRC_RPCERR, error, rep->r_xid, nmp, rep);
- _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
+ FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
+ FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
return (error);
}
error == NFSERR_TRYLATER) {
m_freem(mrep);
error = 0;
- waituntil = time.tv_sec + trylater_delay;
+ microuptime(&now);
+ waituntil = now.tv_sec + trylater_delay;
NFS_DPF(DUP,
("nfs_request %s flag=%x trylater_cnt=%x waituntil=%lx trylater_delay=%x\n",
nmp->nm_mountp->mnt_stat.f_mntfromname,
nmp->nm_flag, trylater_cnt, waituntil,
trylater_delay));
- while (time.tv_sec < waituntil)
+ while (now.tv_sec < waituntil) {
(void)tsleep((caddr_t)&lbolt,
PSOCK, "nqnfstry", 0);
- trylater_delay *= nfs_backoff[trylater_cnt];
+ microuptime(&now);
+ }
+ trylater_delay *= 2;
+ if (trylater_delay > 60)
+ trylater_delay = 60;
if (trylater_cnt < 7)
trylater_cnt++;
goto tryagain;
} else
m_freem(mrep);
m_freem(rep->r_mreq);
- NFSTRACE4(NFSTRC_DISSECTERR, error, rep->r_xid, nmp,
- rep);
- _FREE_ZONE((caddr_t)rep,
+ FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
+ FREE_ZONE((caddr_t)rep,
sizeof (struct nfsreq), M_NFSREQ);
return (error);
}
nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
cachable = fxdr_unsigned(int, *tl++);
reqtime += fxdr_unsigned(int, *tl++);
- if (reqtime > time.tv_sec) {
+ microtime(&now);
+ if (reqtime > now.tv_sec) {
fxdr_hyper(tl, &frev);
nqnfs_clientlease(nmp, np, nqlflag,
cachable, reqtime, frev);
*mdp = md;
*dposp = dpos;
m_freem(rep->r_mreq);
- NFSTRACE4(NFSTRC_REQFREE, 0xf0f0f0f0, rep->r_xid, nmp, rep);
+ FSDBG_BOT(531, 0xf0f0f0f0, rep->r_xid, nmp, rep);
FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
return (0);
}
error = EPROTONOSUPPORT;
nfsmout:
m_freem(rep->r_mreq);
- NFSTRACE4(NFSTRC_REQFREE, error, rep->r_xid, nmp, rep);
- _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
+ FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
+ FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
return (error);
}
static void
nfs_softterm(struct nfsreq *rep)
{
+
rep->r_flags |= R_SOFTTERM;
if (rep->r_flags & R_SENT) {
- NFSTRACE4(NFSTRC_CWND_SOFT, rep->r_xid, rep,
- rep->r_nmp->nm_sent, rep->r_nmp->nm_cwnd);
+ FSDBG(532, rep->r_xid, rep, rep->r_nmp->nm_sent,
+ rep->r_nmp->nm_cwnd);
rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
rep->r_flags &= ~R_SENT;
}
}
+/*
+ * Ensure rep isn't in use by the timer, then dequeue it.
+ */
+void
+nfs_repdequeue(struct nfsreq *rep)
+{
+ int s;
+
+ while ((rep->r_flags & R_BUSY)) {
+ rep->r_flags |= R_WAITING;
+ tsleep(rep, PSOCK, "repdeq", 0);
+ }
+ s = splsoftclock();
+ TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
+ splx(s);
+}
+
+/*
+ * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
+ * free()'d out from under it.
+ */
+void
+nfs_repbusy(struct nfsreq *rep)
+{
+
+ if ((rep->r_flags & R_BUSY))
+ panic("rep locked");
+ rep->r_flags |= R_BUSY;
+}
+
+/*
+ * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
+ */
+struct nfsreq *
+nfs_repnext(struct nfsreq *rep)
+{
+ struct nfsreq * nextrep;
+
+ if (rep == NULL)
+ return (NULL);
+ /*
+ * We need to get and busy the next req before signalling the
+ * current one, otherwise wakeup() may block us and we'll race to
+ * grab the next req.
+ */
+ nextrep = TAILQ_NEXT(rep, r_chain);
+ if (nextrep != NULL)
+ nfs_repbusy(nextrep);
+ /* unbusy and signal. */
+ rep->r_flags &= ~R_BUSY;
+ if ((rep->r_flags & R_WAITING)) {
+ rep->r_flags &= ~R_WAITING;
+ wakeup(rep);
+ }
+ return (nextrep);
+}
+
/*
* Nfs timer routine
* Scan the nfsreq list and retranmit any requests that have timed out
nfs_timer(arg)
void *arg; /* never used */
{
- register struct nfsreq *rep, *rp;
+ register struct nfsreq *rep;
register struct mbuf *m;
register struct socket *so;
register struct nfsmount *nmp;
#endif
int flags, rexmit, cwnd, sent;
u_long xid;
+ struct timeval now;
s = splnet();
/*
* XXX If preemptable threads are implemented the spls used for the
* outstanding request queue must be replaced with mutexes.
*/
-rescan:
#ifdef NFSTRACESUSPENDERS
if (NFSTRACE_SUSPENDING) {
- for (rep = nfs_reqq.tqh_first; rep != 0;
- rep = rep->r_chain.tqe_next)
+ TAILQ_FOREACH(rep, &nfs_reqq, r_chain)
if (rep->r_xid == nfstracexid)
break;
if (!rep) {
}
}
#endif
- for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
+ rep = TAILQ_FIRST(&nfs_reqq);
+ if (rep != NULL)
+ nfs_repbusy(rep);
+ microuptime(&now);
+ for ( ; rep != NULL ; rep = nfs_repnext(rep)) {
#ifdef NFSTRACESUSPENDERS
if (rep->r_mrep && !NFSTRACE_SUSPENDING) {
nfstracexid = rep->r_xid;
continue;
if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
continue;
- if (nfs_sigintr(nmp, rep, rep->r_procp)) {
- nfs_softterm(rep);
+ if (nfs_sigintr(nmp, rep, rep->r_procp))
continue;
+ if (nmp->nm_tprintf_initial_delay != 0 &&
+ (rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) &&
+ rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
+ rep->r_lastmsg = now.tv_sec;
+ nfs_down(rep, "not responding", 0);
+ if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
+ /* we're not yet completely mounted and */
+ /* we can't complete an RPC, so we fail */
+ nfsstats.rpctimeouts++;
+ nfs_softterm(rep);
+ continue;
+ }
}
if (rep->r_rtt >= 0) {
rep->r_rtt++;
nmp->nm_timeouts++;
}
/*
- * Check for server not responding
+ * Check for too many retransmits. This is never true for
+ * 'hard' mounts because we set r_retry to NFS_MAXREXMIT + 1
+ * and never allow r_rexmit to be more than NFS_MAXREXMIT.
*/
- if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
- rep->r_rexmit > nmp->nm_deadthresh) {
- nfs_msg(rep->r_procp,
- nmp->nm_mountp->mnt_stat.f_mntfromname,
- "not responding");
- rep->r_flags |= R_TPRINTFMSG;
- }
if (rep->r_rexmit >= rep->r_retry) { /* too many */
nfsstats.rpctimeouts++;
nfs_softterm(rep);
rep->r_flags |= R_SENT;
nmp->nm_sent += NFS_CWNDSCALE;
}
- NFSTRACE4(NFSTRC_CWND_TIMER, xid, rep,
- nmp->nm_sent, nmp->nm_cwnd);
+ FSDBG(535, xid, rep, nmp->nm_sent, nmp->nm_cwnd);
thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
- NFSTRACE4(NFSTRC_CWND_TIMER, xid, error, sent, cwnd);
- /*
- * This is to fix "nfs_sigintr" DSI panics.
- * We may have slept during the send so the current
- * place in the request queue may have been released.
- * Due to zone_gc it may even be part of an
- * unrelated newly allocated data structure.
- * Restart the list scan from the top if needed...
- */
- for (rp = nfs_reqq.tqh_first; rp;
- rp = rp->r_chain.tqe_next)
- if (rp == rep && rp->r_xid == xid)
- break;
- if (!rp) {
- if (!error)
- goto rescan;
- panic("nfs_timer: race error %d xid 0x%x\n",
- error, xid);
- }
+ FSDBG(535, xid, error, sent, cwnd);
if (error) {
if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
so->so_error = 0;
- rep->r_flags = flags;
+ rep->r_flags = flags | R_RESENDERR;
rep->r_rexmit = rexmit;
nmp->nm_cwnd = cwnd;
nmp->nm_sent = sent;
/*
* Call the nqnfs server timer once a second to handle leases.
*/
- if (lasttime != time.tv_sec) {
- lasttime = time.tv_sec;
+ microuptime(&now);
+ if (lasttime != now.tv_sec) {
+ lasttime = now.tv_sec;
nqnfs_serverd();
}
* Scan the write gathering queues for writes that need to be
* completed now.
*/
- cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec;
- for (slp = nfssvc_sockhead.tqh_first; slp != 0;
- slp = slp->ns_chain.tqe_next) {
- if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec)
+ cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
+ TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
+ if (LIST_FIRST(&slp->ns_tq) &&
+ LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
nfsrv_wakenfsd(slp);
}
#endif /* NFS_NOSERVER */
/*
* Test for a termination condition pending on the process.
- * This is used for NFSMNT_INT mounts.
+ * This is used to determine if we need to bail on a mount.
+ * EIO is returned if there has been a soft timeout.
+ * EINTR is returned if there is a signal pending that is not being ignored
+ * and the mount is interruptable, or if we are a thread that is in the process
+ * of cancellation (also SIGKILL posted).
*/
int
nfs_sigintr(nmp, rep, p)
struct nfsmount *nmp;
struct nfsreq *rep;
- register struct proc *p;
+ struct proc *p;
{
+ struct uthread *curr_td;
+ sigset_t pending_sigs;
+ int context_good = 0;
+ struct nfsmount *repnmp;
+
+ if (nmp == NULL)
+ return (ENXIO);
+ if (rep != NULL) {
+ repnmp = rep->r_nmp;
+ /* we've had a forced unmount. */
+ if (repnmp == NULL)
+ return (ENXIO);
+ /* request has timed out on a 'soft' mount. */
+ if (rep->r_flags & R_SOFTTERM)
+ return (EIO);
+ /*
+ * We're in the progress of a force unmount and there's
+ * been a timeout we're dead and fail IO.
+ */
+ if ((repnmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
+ (NFSSTA_FORCE|NFSSTA_TIMEO))
+ return (EIO);
+ /* Someone is unmounting us, go soft and mark it. */
+ if ((repnmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT)) {
+ repnmp->nm_flag |= NFSMNT_SOFT;
+ nmp->nm_state |= NFSSTA_FORCE;
+ }
+ /*
+ * If the mount is hung and we've requested not to hang
+ * on remote filesystems, then bail now.
+ */
+ if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0 &&
+ (repnmp->nm_state & NFSSTA_TIMEO) != 0)
+ return (EIO);
+ }
+ /* XXX: is this valid? this probably should be an assertion. */
+ if (p == NULL)
+ return (0);
- if (rep && (rep->r_flags & R_SOFTTERM))
+ /*
+ * XXX: Since nfs doesn't have a good shot at getting the current
+ * thread we take a guess. (only struct proc * are passed to VOPs)
+ * What we do is look at the current thread, if it belongs to the
+ * passed in proc pointer then we have a "good/accurate" context
+ * and can make an accurate guess as to what to do.
+ * However if we have a bad context we have to make due with what
+ * is in the proc struct which may not be as up to date as we'd
+ * like.
+ * This is ok because the process will call us with the correct
+ * context after a short timeout while waiting for a response.
+ */
+ curr_td = (struct uthread *)get_bsdthread_info(current_act());
+ if (curr_td->uu_proc == p)
+ context_good = 1;
+ if (context_good && current_thread_aborted())
return (EINTR);
- if (!(nmp->nm_flag & NFSMNT_INT))
- return (0);
- if (p && p->p_siglist &&
- (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
- NFSINT_SIGMASK))
+ /* mask off thread and process blocked signals. */
+ if (context_good)
+ pending_sigs = curr_td->uu_siglist & ~curr_td->uu_sigmask;
+ else
+ pending_sigs = p->p_siglist;
+ /* mask off process level and NFS ignored signals. */
+ pending_sigs &= ~p->p_sigignore & NFSINT_SIGMASK;
+ if (pending_sigs && (nmp->nm_flag & NFSMNT_INT) != 0)
return (EINTR);
return (0);
}
* in progress when a reconnect is necessary.
*/
int
-nfs_sndlock(flagp, rep)
- register int *flagp;
+nfs_sndlock(rep)
struct nfsreq *rep;
{
+ register int *statep;
struct proc *p;
- int slpflag = 0, slptimeo = 0;
+ int error, slpflag = 0, slptimeo = 0;
- if (rep) {
- p = rep->r_procp;
- if (rep->r_nmp->nm_flag & NFSMNT_INT)
- slpflag = PCATCH;
- } else
- p = (struct proc *)0;
- while (*flagp & NFSMNT_SNDLOCK) {
- if (nfs_sigintr(rep->r_nmp, rep, p))
- return (EINTR);
- *flagp |= NFSMNT_WANTSND;
- (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
- slptimeo);
+ if (rep->r_nmp == NULL)
+ return (ENXIO);
+ statep = &rep->r_nmp->nm_state;
+
+ p = rep->r_procp;
+ if (rep->r_nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ while (*statep & NFSSTA_SNDLOCK) {
+ error = nfs_sigintr(rep->r_nmp, rep, p);
+ if (error)
+ return (error);
+ *statep |= NFSSTA_WANTSND;
+ if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0)
+ slptimeo = hz;
+ (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
+ "nfsndlck", slptimeo);
if (slpflag == PCATCH) {
slpflag = 0;
slptimeo = 2 * hz;
}
+ /*
+ * Make sure while we slept that the mountpoint didn't go away.
+ * nfs_sigintr and callers expect it in tact.
+ */
+ if (!rep->r_nmp)
+ return (ENXIO); /* don't have lock until out of loop */
}
- *flagp |= NFSMNT_SNDLOCK;
+ *statep |= NFSSTA_SNDLOCK;
return (0);
}
* Unlock the stream socket for others.
*/
void
-nfs_sndunlock(flagp)
- register int *flagp;
+nfs_sndunlock(rep)
+ struct nfsreq *rep;
{
+ register int *statep;
- if ((*flagp & NFSMNT_SNDLOCK) == 0)
+ if (rep->r_nmp == NULL)
+ return;
+ statep = &rep->r_nmp->nm_state;
+ if ((*statep & NFSSTA_SNDLOCK) == 0)
panic("nfs sndunlock");
- *flagp &= ~NFSMNT_SNDLOCK;
- if (*flagp & NFSMNT_WANTSND) {
- *flagp &= ~NFSMNT_WANTSND;
- wakeup((caddr_t)flagp);
+ *statep &= ~NFSSTA_SNDLOCK;
+ if (*statep & NFSSTA_WANTSND) {
+ *statep &= ~NFSSTA_WANTSND;
+ wakeup((caddr_t)statep);
}
}
nfs_rcvlock(rep)
register struct nfsreq *rep;
{
- register int *flagp = &rep->r_nmp->nm_flag;
- int slpflag, slptimeo = 0;
+ register int *statep;
+ int error, slpflag, slptimeo = 0;
+
+ /* make sure we still have our mountpoint */
+ if (!rep->r_nmp) {
+ if (rep->r_mrep != NULL)
+ return (EALREADY);
+ return (ENXIO);
+ }
- if (*flagp & NFSMNT_INT)
+ statep = &rep->r_nmp->nm_state;
+ FSDBG_TOP(534, rep->r_xid, rep, rep->r_nmp, *statep);
+ if (rep->r_nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
else
slpflag = 0;
- while (*flagp & NFSMNT_RCVLOCK) {
- if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) {
- NFSTRACE4(NFSTRC_RCVLCKINTR, rep->r_xid, rep,
- rep->r_nmp, *flagp);
- return (EINTR);
+ while (*statep & NFSSTA_RCVLOCK) {
+ if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp))) {
+ FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x100);
+ return (error);
} else if (rep->r_mrep != NULL) {
/*
* Don't bother sleeping if reply already arrived
*/
- NFSTRACE4(NFSTRC_RCVALREADY, rep->r_xid, rep,
- rep->r_nmp, 1);
+ FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x101);
return (EALREADY);
}
- NFSTRACE4(NFSTRC_RCVLCKW, rep->r_xid, rep, rep->r_nmp, *flagp);
- *flagp |= NFSMNT_WANTRCV;
- (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
- slptimeo);
+ FSDBG(534, rep->r_xid, rep, rep->r_nmp, 0x102);
+ *statep |= NFSSTA_WANTRCV;
+ /*
+ * We need to poll if we're P_NOREMOTEHANG so that we
+ * call nfs_sigintr periodically above.
+ */
+ if (rep->r_procp != NULL &&
+ (rep->r_procp->p_flag & P_NOREMOTEHANG) != 0)
+ slptimeo = hz;
+ (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
+ "nfsrcvlk", slptimeo);
if (slpflag == PCATCH) {
slpflag = 0;
slptimeo = 2 * hz;
}
+ /*
+ * Make sure while we slept that the mountpoint didn't go away.
+ * nfs_sigintr and caller nfs_reply expect it intact.
+ */
+ if (!rep->r_nmp) {
+ FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x103);
+ return (ENXIO); /* don't have lock until out of loop */
+ }
}
/*
* nfs_reply will handle it if reply already arrived.
* (We may have slept or been preempted while on network funnel).
*/
- NFSTRACE4(NFSTRC_RCVLCK, rep->r_xid, rep, rep->r_nmp, *flagp);
- *flagp |= NFSMNT_RCVLOCK;
+ FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *statep);
+ *statep |= NFSSTA_RCVLOCK;
return (0);
}
* Unlock the stream socket for others.
*/
static void
-nfs_rcvunlock(flagp)
- register int *flagp;
+nfs_rcvunlock(rep)
+ register struct nfsreq *rep;
{
+ register int *statep;
+
+ if (rep->r_nmp == NULL)
+ return;
+ statep = &rep->r_nmp->nm_state;
- if ((*flagp & NFSMNT_RCVLOCK) == 0)
+ FSDBG(533, statep, *statep, 0, 0);
+ if ((*statep & NFSSTA_RCVLOCK) == 0)
panic("nfs rcvunlock");
- *flagp &= ~NFSMNT_RCVLOCK;
- if (*flagp & NFSMNT_WANTRCV) {
- NFSTRACE(NFSTRC_RCVUNLW, flagp);
- *flagp &= ~NFSMNT_WANTRCV;
- wakeup((caddr_t)flagp);
- } else {
- NFSTRACE(NFSTRC_RCVUNL, flagp);
+ *statep &= ~NFSSTA_RCVLOCK;
+ if (*statep & NFSSTA_WANTRCV) {
+ *statep &= ~NFSSTA_WANTRCV;
+ wakeup((caddr_t)statep);
}
}
* be called with M_WAIT from an nfsd.
*/
/*
- * Needs to eun under network funnel
+ * Needs to run under network funnel
*/
void
nfsrv_rcv(so, arg, waitflag)
register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
register struct mbuf *m;
struct mbuf *mp, *mhck;
- struct sockaddr *nam=0;
+ struct sockaddr *nam;
struct uio auio;
- int flags, error;
+ int flags, ns_nflag=0, error;
struct sockaddr_in *sin;
if ((slp->ns_flag & SLP_VALID) == 0)
* Define this to test for nfsds handling this under heavy load.
*/
if (waitflag == M_DONTWAIT) {
- slp->ns_flag |= SLP_NEEDQ; goto dorecs;
+ ns_nflag = SLPN_NEEDQ;
+ goto dorecs;
}
#endif
auio.uio_procp = NULL;
* the nfs servers are heavily loaded.
*/
if (slp->ns_rec && waitflag == M_DONTWAIT) {
- slp->ns_flag |= SLP_NEEDQ;
+ ns_nflag = SLPN_NEEDQ;
goto dorecs;
}
error = soreceive(so, (struct sockaddr **) 0, &auio, &mp, (struct mbuf **)0, &flags);
if (error || mp == (struct mbuf *)0) {
if (error == EWOULDBLOCK)
- slp->ns_flag |= SLP_NEEDQ;
+ ns_nflag = SLPN_NEEDQ;
else
- slp->ns_flag |= SLP_DISCONN;
+ ns_nflag = SLPN_DISCONN;
goto dorecs;
}
m = mp;
error = nfsrv_getstream(slp, waitflag);
if (error) {
if (error == EPERM)
- slp->ns_flag |= SLP_DISCONN;
+ ns_nflag = SLPN_DISCONN;
else
- slp->ns_flag |= SLP_NEEDQ;
+ ns_nflag = SLPN_NEEDQ;
}
} else {
do {
auio.uio_resid = 1000000000;
- flags = MSG_DONTWAIT;
+ flags = MSG_DONTWAIT | MSG_NEEDSA;
nam = 0;
+ mp = 0;
error = soreceive(so, &nam, &auio, &mp,
(struct mbuf **)0, &flags);
sin = mtod(mhck, struct sockaddr_in *);
bcopy(nam, sin, sizeof(struct sockaddr_in));
mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
- FREE(nam, M_SONAME);
m = mhck;
m->m_next = mp;
slp->ns_recend = m;
m->m_nextpkt = (struct mbuf *)0;
}
+ if (nam) {
+ FREE(nam, M_SONAME);
+ }
if (error) {
if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
&& error != EWOULDBLOCK) {
- slp->ns_flag |= SLP_DISCONN;
+ ns_nflag = SLPN_DISCONN;
goto dorecs;
}
}
* Now try and process the request records, non-blocking.
*/
dorecs:
+ if (ns_nflag)
+ slp->ns_nflag |= ns_nflag;
if (waitflag == M_DONTWAIT &&
- (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) {
+ (slp->ns_rec || (slp->ns_nflag & (SLPN_NEEDQ | SLPN_DISCONN)))) {
thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
nfsrv_wakenfsd(slp);
thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
register struct mbuf *m, **mpp;
register char *cp1, *cp2;
register int len;
- struct mbuf *om, *m2, *recm = 0;
+ struct mbuf *om, *m2, *recm;
u_long recmark;
- if (slp->ns_flag & SLP_GETSTREAM)
+ if (slp->ns_nflag & SLPN_GETSTREAM)
panic("nfs getstream");
- slp->ns_flag |= SLP_GETSTREAM;
+ slp->ns_nflag |= SLPN_GETSTREAM;
for (;;) {
if (slp->ns_reclen == 0) {
if (slp->ns_cc < NFSX_UNSIGNED) {
- slp->ns_flag &= ~SLP_GETSTREAM;
+ slp->ns_nflag &= ~SLPN_GETSTREAM;
return (0);
}
m = slp->ns_raw;
recmark = ntohl(recmark);
slp->ns_reclen = recmark & ~0x80000000;
if (recmark & 0x80000000)
- slp->ns_flag |= SLP_LASTFRAG;
+ slp->ns_nflag |= SLPN_LASTFRAG;
else
- slp->ns_flag &= ~SLP_LASTFRAG;
+ slp->ns_nflag &= ~SLPN_LASTFRAG;
if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
- slp->ns_flag &= ~SLP_GETSTREAM;
+ slp->ns_nflag &= ~SLPN_GETSTREAM;
return (EPERM);
}
}
/*
* Now get the record part.
+ *
+ * Note that slp->ns_reclen may be 0. Linux sometimes
+ * generates 0-length RPCs
*/
+ recm = NULL;
if (slp->ns_cc == slp->ns_reclen) {
recm = slp->ns_raw;
slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
m->m_len -= slp->ns_reclen - len;
len = slp->ns_reclen;
} else {
- slp->ns_flag &= ~SLP_GETSTREAM;
+ slp->ns_nflag &= ~SLPN_GETSTREAM;
return (EWOULDBLOCK);
}
} else if ((len + m->m_len) == slp->ns_reclen) {
slp->ns_cc -= len;
slp->ns_reclen = 0;
} else {
- slp->ns_flag &= ~SLP_GETSTREAM;
+ slp->ns_nflag &= ~SLPN_GETSTREAM;
return (0);
}
while (*mpp)
mpp = &((*mpp)->m_next);
*mpp = recm;
- if (slp->ns_flag & SLP_LASTFRAG) {
+ if (slp->ns_nflag & SLPN_LASTFRAG) {
if (slp->ns_recend)
slp->ns_recend->m_nextpkt = slp->ns_frag;
else
nd->nd_dpos = mtod(m, caddr_t);
error = nfs_getreq(nd, nfsd, TRUE);
if (error) {
- m_freem(nam);
- _FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
+ if (nam)
+ m_freem(nam);
+ FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
return (error);
}
*ndp = nd;
int error = 0, nqnfs = 0, ticklen;
struct mbuf *mrep, *md;
register struct nfsuid *nuidp;
- struct timeval tvin, tvout;
+ struct timeval tvin, tvout, now;
#if 0 /* until encrypted keys are implemented */
NFSKERBKEYSCHED_T keys; /* stores key schedule */
#endif
tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
- if (nuidp->nu_expire < time.tv_sec ||
+ microtime(&now);
+ if (nuidp->nu_expire < now.tv_sec ||
nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
(nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
if ((slp->ns_flag & SLP_VALID) == 0)
return;
- for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) {
+ TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
if (nd->nfsd_flag & NFSD_WAITING) {
nd->nfsd_flag &= ~NFSD_WAITING;
if (nd->nfsd_slp)
#endif /* NFS_NOSERVER */
static int
-nfs_msg(p, server, msg)
+nfs_msg(p, server, msg, error)
struct proc *p;
- char *server, *msg;
+ const char *server, *msg;
+ int error;
{
tpr_t tpr;
tpr = tprintf_open(p);
else
tpr = NULL;
- tprintf(tpr, "nfs server %s: %s\n", server, msg);
+ if (error)
+ tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg,
+ error);
+ else
+ tprintf(tpr, "nfs server %s: %s\n", server, msg);
tprintf_close(tpr);
return (0);
}
+
+static void
+nfs_down(rep, msg, error)
+ struct nfsreq *rep;
+ const char *msg;
+ int error;
+{
+ int dosignal;
+
+ if (rep == NULL || rep->r_nmp == NULL)
+ return;
+ if (!(rep->r_nmp->nm_state & NFSSTA_TIMEO)) {
+ vfs_event_signal(&rep->r_nmp->nm_mountp->mnt_stat.f_fsid,
+ VQ_NOTRESP, 0);
+ rep->r_nmp->nm_state |= NFSSTA_TIMEO;
+ }
+ rep->r_flags |= R_TPRINTFMSG;
+ nfs_msg(rep->r_procp, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname,
+ msg, error);
+}
+
+static void
+nfs_up(rep, msg, error)
+ struct nfsreq *rep;
+ const char *msg;
+ int error;
+{
+
+ if (error != 0 || rep == NULL || rep->r_nmp == NULL)
+ return;
+ if ((rep->r_flags & R_TPRINTFMSG) != 0)
+ nfs_msg(rep->r_procp,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
+ if ((rep->r_nmp->nm_state & NFSSTA_TIMEO)) {
+ rep->r_nmp->nm_state &= ~NFSSTA_TIMEO;
+ vfs_event_signal(&rep->r_nmp->nm_mountp->mnt_stat.f_fsid,
+ VQ_NOTRESP, 1);
+ }
+}