bsd/nfs/nfs_socket.c

   1 /*
   2  * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  26 /*
  27  * Copyright (c) 1989, 1991, 1993, 1995
  28  *      The Regents of the University of California.  All rights reserved.
  29  *
  30  * This code is derived from software contributed to Berkeley by
  31  * Rick Macklem at The University of Guelph.
  32  *
  33  * Redistribution and use in source and binary forms, with or without
  34  * modification, are permitted provided that the following conditions
  35  * are met:
  36  * 1. Redistributions of source code must retain the above copyright
  37  *    notice, this list of conditions and the following disclaimer.
  38  * 2. Redistributions in binary form must reproduce the above copyright
  39  *    notice, this list of conditions and the following disclaimer in the
  40  *    documentation and/or other materials provided with the distribution.
  41  * 3. All advertising materials mentioning features or use of this software
  42  *    must display the following acknowledgement:
  43  *      This product includes software developed by the University of
  44  *      California, Berkeley and its contributors.
  45  * 4. Neither the name of the University nor the names of its contributors
  46  *    may be used to endorse or promote products derived from this software
  47  *    without specific prior written permission.
  48  *
  49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  59  * SUCH DAMAGE.
  60  *
  61  *      @(#)nfs_socket.c        8.5 (Berkeley) 3/30/95
  62  * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
  63  */
  64
  65 /*
  66  * Socket operations for use by nfs
  67  */
  68
  69 #include <sys/param.h>
  70 #include <sys/systm.h>
  71 #include <sys/proc.h>
  72 #include <sys/mount.h>
  73 #include <sys/kernel.h>
  74 #include <sys/mbuf.h>
  75 #include <sys/malloc.h>
  76 #include <sys/vnode.h>
  77 #include <sys/domain.h>
  78 #include <sys/protosw.h>
  79 #include <sys/socket.h>
  80 #include <sys/socketvar.h>
  81 #include <sys/syslog.h>
  82 #include <sys/tprintf.h>
  83 #include <machine/spl.h>
  84
  85 #include <sys/time.h>
  86 #include <kern/clock.h>
  87 #include <sys/user.h>
  88
  89 #include <netinet/in.h>
  90 #include <netinet/tcp.h>
  91
  92 #include <nfs/rpcv2.h>
  93 #include <nfs/nfsproto.h>
  94 #include <nfs/nfs.h>
  95 #include <nfs/xdr_subs.h>
  96 #include <nfs/nfsm_subs.h>
  97 #include <nfs/nfsmount.h>
  98 #include <nfs/nfsnode.h>
  99 #include <nfs/nfsrtt.h>
 100 #include <nfs/nqnfs.h>
 101
 102 #include <sys/kdebug.h>
 103
 104 #define FSDBG(A, B, C, D, E) \
 105         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
 106                 (int)(B), (int)(C), (int)(D), (int)(E), 0)
 107 #define FSDBG_TOP(A, B, C, D, E) \
 108         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
 109                 (int)(B), (int)(C), (int)(D), (int)(E), 0)
 110 #define FSDBG_BOT(A, B, C, D, E) \
 111         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
 112                 (int)(B), (int)(C), (int)(D), (int)(E), 0)
 113
 114 #define TRUE    1
 115 #define FALSE   0
 116
 117 /*
 118  * Estimate rto for an nfs rpc sent via. an unreliable datagram.
 119  * Use the mean and mean deviation of rtt for the appropriate type of rpc
 120  * for the frequent rpcs and a default for the others.
 121  * The justification for doing "other" this way is that these rpcs
 122  * happen so infrequently that timer est. would probably be stale.
 123  * Also, since many of these rpcs are
 124  * non-idempotent, a conservative timeout is desired.
 125  * getattr, lookup - A+2D
 126  * read, write     - A+4D
 127  * other           - nm_timeo
 128  */
 129 #define NFS_RTO(n, t) \
 130         ((t) == 0 ? (n)->nm_timeo : \
 131          ((t) < 3 ? \
 132           (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
 133           ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
 134 #define NFS_SRTT(r)     (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
 135 #define NFS_SDRTT(r)    (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
 136 /*
 137  * External data, mostly RPC constants in XDR form
 138  */
 139 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
 140         rpc_msgaccepted, rpc_call, rpc_autherr,
 141         rpc_auth_kerb;
 142 extern u_long nfs_prog, nqnfs_prog;
 143 extern time_t nqnfsstarttime;
 144 extern struct nfsstats nfsstats;
 145 extern int nfsv3_procid[NFS_NPROCS];
 146 extern int nfs_ticks;
 147 extern u_long nfs_xidwrap;
 148
 149 /*
 150  * Defines which timer to use for the procnum.
 151  * 0 - default
 152  * 1 - getattr
 153  * 2 - lookup
 154  * 3 - read
 155  * 4 - write
 156  */
 157 static int proct[NFS_NPROCS] = {
 158         0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
 159         0, 0, 0,
 160 };
 161
 162 /*
 163  * There is a congestion window for outstanding rpcs maintained per mount
 164  * point. The cwnd size is adjusted in roughly the way that:
 165  * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
 166  * SIGCOMM '88". ACM, August 1988.
 167  * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
 168  * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
 169  * of rpcs is in progress.
 170  * (The sent count and cwnd are scaled for integer arith.)
 171  * Variants of "slow start" were tried and were found to be too much of a
 172  * performance hit (ave. rtt 3 times larger),
 173  * I suspect due to the large rtt that nfs rpcs have.
 174  */
 175 #define NFS_CWNDSCALE   256
 176 #define NFS_MAXCWND     (NFS_CWNDSCALE * 32)
 177 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
 178 int nfsrtton = 0;
 179 struct nfsrtt nfsrtt;
 180
 181 static int      nfs_msg __P((struct proc *, const char *, const char *, int));
 182 static void     nfs_up(struct nfsreq *, const char *, int);
 183 static void     nfs_down(struct nfsreq *, const char *, int);
 184 static int      nfs_rcvlock __P((struct nfsreq *));
 185 static void     nfs_rcvunlock __P((struct nfsreq *));
 186 static int      nfs_receive __P((struct nfsreq *rep, struct mbuf **aname,
 187                                  struct mbuf **mp));
 188 static int      nfs_reconnect __P((struct nfsreq *rep));
 189 static void     nfs_repbusy(struct nfsreq *rep);
 190 static struct nfsreq *  nfs_repnext(struct nfsreq *rep);
 191 static void     nfs_repdequeue(struct nfsreq *rep);
 192 #ifndef NFS_NOSERVER
 193 static int      nfsrv_getstream __P((struct nfssvc_sock *,int));
 194
 195 int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
 196                                     struct nfssvc_sock *slp,
 197                                     struct proc *procp,
 198                                     struct mbuf **mreqp)) = {
 199         nfsrv_null,
 200         nfsrv_getattr,
 201         nfsrv_setattr,
 202         nfsrv_lookup,
 203         nfsrv3_access,
 204         nfsrv_readlink,
 205         nfsrv_read,
 206         nfsrv_write,
 207         nfsrv_create,
 208         nfsrv_mkdir,
 209         nfsrv_symlink,
 210         nfsrv_mknod,
 211         nfsrv_remove,
 212         nfsrv_rmdir,
 213         nfsrv_rename,
 214         nfsrv_link,
 215         nfsrv_readdir,
 216         nfsrv_readdirplus,
 217         nfsrv_statfs,
 218         nfsrv_fsinfo,
 219         nfsrv_pathconf,
 220         nfsrv_commit,
 221         nqnfsrv_getlease,
 222         nqnfsrv_vacated,
 223         nfsrv_noop,
 224         nfsrv_noop
 225 };
 226 #endif /* NFS_NOSERVER */
 227
 228 /*
 229  * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG)
 230  * But some of this code may prove useful someday...
 231  */
 232 #undef NFSDIAG
 233 #if NFSDIAG
 234 int nfstraceindx = 0;
 235 struct nfstracerec nfstracebuf[NFSTBUFSIZ] = {{0,0,0,0}};
 236
 237 #define NFSTRACESUSPENDERS
 238 #ifdef NFSTRACESUSPENDERS
 239 uint nfstracemask = 0xfff00200;
 240 int nfstracexid = -1;
 241 uint onfstracemask = 0;
 242 int nfstracesuspend = -1;
 243 #define NFSTRACE_SUSPEND                                        \
 244         {                                                       \
 245         if (nfstracemask) {                                     \
 246                 onfstracemask = nfstracemask;                   \
 247                 nfstracemask = 0;                               \
 248         }                                                       \
 249         }
 250 #define NFSTRACE_RESUME                                         \
 251         {                                                       \
 252         nfstracesuspend = -1;                                   \
 253         if (!nfstracemask)                                      \
 254                 nfstracemask = onfstracemask;                   \
 255         }
 256 #define NFSTRACE_STARTSUSPENDCOUNTDOWN                          \
 257         {                                                       \
 258         nfstracesuspend = (nfstraceindx+100) % NFSTBUFSIZ;      \
 259         }
 260 #define NFSTRACE_SUSPENDING (nfstracesuspend != -1)
 261 #define NFSTRACE_SUSPENSEOVER                                   \
 262         (nfstracesuspend > 100 ?                                \
 263                 (nfstraceindx >= nfstracesuspend ||             \
 264                  nfstraceindx < nfstracesuspend - 100) :        \
 265                 (nfstraceindx >= nfstracesuspend &&             \
 266                  nfstraceindx < nfstracesuspend + 8192 - 100))
 267 #else
 268 uint nfstracemask = 0;
 269 #endif  /* NFSTRACESUSPENDERS */
 270
 271 int nfsprnttimo = 1;
 272
 273 int nfsodata[1024];
 274 int nfsoprocnum, nfsolen;
 275 int nfsbt[32], nfsbtlen;
 276
 277 #if defined(__ppc__)
 278 int
 279 backtrace(int *where, int size)
 280 {
 281         int register sp, *fp, numsaved;
 282
 283         __asm__ volatile("mr %0,r1" : "=r" (sp));
 284
 285         fp = (int *)*((int *)sp);
 286         size /= sizeof(int);
 287         for (numsaved = 0; numsaved < size; numsaved++) {
 288                 *where++ = fp[2];
 289                 if ((int)fp <= 0)
 290                         break;
 291                 fp = (int *)*fp;
 292         }
 293         return (numsaved);
 294 }
 295 #elif defined(__i386__)
 296 int
 297 backtrace()
 298 {
 299        return (0);  /* Till someone implements a real routine */
 300 }
 301 #else
 302 #error architecture not implemented.
 303 #endif
 304
 305 void
 306 nfsdup(struct nfsreq *rep)
 307 {
 308         int *ip, i, first = 1, end;
 309         char *s, b[240];
 310         struct mbuf *mb;
 311
 312         if ((nfs_debug & NFS_DEBUG_DUP) == 0)
 313                 return;
 314         /* last mbuf in chain will be nfs content */
 315         for (mb = rep->r_mreq; mb->m_next; mb = mb->m_next)
 316                 ;
 317         if (rep->r_procnum == nfsoprocnum && mb->m_len == nfsolen &&
 318             !bcmp((caddr_t)nfsodata, mb->m_data, nfsolen)) {
 319                 s = b + sprintf(b, "nfsdup x=%x p=%d h=", rep->r_xid,
 320                                 rep->r_procnum);
 321                 end = (int)(VTONFS(rep->r_vp)->n_fhp);
 322                 ip = (int *)(end & ~3);
 323                 end += VTONFS(rep->r_vp)->n_fhsize;
 324                 while ((int)ip < end) {
 325                         i = *ip++;
 326                         if (first) { /* avoid leading zeroes */
 327                                 if (i == 0)
 328                                         continue;
 329                                 first = 0;
 330                                 s += sprintf(s, "%x", i);
 331                         } else
 332                                 s += sprintf(s, "%08x", i);
 333                 }
 334                 if (first)
 335                         sprintf(s, "%x", 0);
 336                 else /* eliminate trailing zeroes */
 337                         while (*--s == '0')
 338                                 *s = 0;
 339                 /*
 340                  * set a breakpoint here and you can view the
 341                  * current backtrace and the one saved in nfsbt
 342                  */
 343                 kprintf("%s\n", b);
 344         }
 345         nfsoprocnum = rep->r_procnum;
 346         nfsolen = mb->m_len;
 347         bcopy(mb->m_data, (caddr_t)nfsodata, mb->m_len);
 348         nfsbtlen = backtrace(&nfsbt, sizeof(nfsbt));
 349 }
 350 #endif /* NFSDIAG */
 351
 352 /*
 353  * Initialize sockets and congestion for a new NFS connection.
 354  * We do not free the sockaddr if error.
 355  */
 356 int
 357 nfs_connect(nmp, rep)
 358         register struct nfsmount *nmp;
 359         struct nfsreq *rep;
 360 {
 361         register struct socket *so;
 362         int s, error, rcvreserve, sndreserve;
 363         struct sockaddr *saddr;
 364         struct sockaddr_in sin;
 365         u_short tport;
 366
 367         thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
 368         nmp->nm_so = (struct socket *)0;
 369         saddr = mtod(nmp->nm_nam, struct sockaddr *);
 370         error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
 371                 nmp->nm_soproto);
 372         if (error) {
 373                 goto bad;
 374         }
 375         so = nmp->nm_so;
 376         nmp->nm_soflags = so->so_proto->pr_flags;
 377
 378         /*
 379          * Some servers require that the client port be a reserved port number.
 380          */
 381         if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
 382                 sin.sin_len = sizeof (struct sockaddr_in);
 383                 sin.sin_family = AF_INET;
 384                 sin.sin_addr.s_addr = INADDR_ANY;
 385                 tport = IPPORT_RESERVED - 1;
 386                 sin.sin_port = htons(tport);
 387
 388                 while ((error = sobind(so, (struct sockaddr *) &sin) == EADDRINUSE) &&
 389                        (--tport > IPPORT_RESERVED / 2))
 390                         sin.sin_port = htons(tport);
 391                 if (error) {
 392                         goto bad;
 393                 }
 394         }
 395
 396         /*
 397          * Protocols that do not require connections may be optionally left
 398          * unconnected for servers that reply from a port other than NFS_PORT.
 399          */
 400         if (nmp->nm_flag & NFSMNT_NOCONN) {
 401                 if (nmp->nm_soflags & PR_CONNREQUIRED) {
 402                         error = ENOTCONN;
 403                         goto bad;
 404                 }
 405         } else {
 406                 error = soconnect(so, mtod(nmp->nm_nam, struct sockaddr *));
 407                 if (error) {
 408                         goto bad;
 409                 }
 410
 411                 /*
 412                  * Wait for the connection to complete. Cribbed from the
 413                  * connect system call but with the wait timing out so
 414                  * that interruptible mounts don't hang here for a long time.
 415                  */
 416                 s = splnet();
 417                 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
 418                         (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
 419                                 "nfscon", 2 * hz);
 420                         if ((so->so_state & SS_ISCONNECTING) &&
 421                             so->so_error == 0 && rep &&
 422                             (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
 423                                 so->so_state &= ~SS_ISCONNECTING;
 424                                 splx(s);
 425                                 goto bad;
 426                         }
 427                 }
 428                 if (so->so_error) {
 429                         error = so->so_error;
 430                         so->so_error = 0;
 431                         splx(s);
 432                         goto bad;
 433                 }
 434                 splx(s);
 435         }
 436         /*
 437          * Always time out on recieve, this allows us to reconnect the
 438          * socket to deal with network changes.
 439          */
 440         so->so_rcv.sb_timeo = (2 * hz);
 441         if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
 442                 so->so_snd.sb_timeo = (5 * hz);
 443         } else {
 444                 so->so_snd.sb_timeo = 0;
 445         }
 446         if (nmp->nm_sotype == SOCK_DGRAM) {
 447                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
 448                 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
 449                         (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
 450         } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
 451                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
 452                 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) *
 453                         (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
 454         } else {
 455                 if (nmp->nm_sotype != SOCK_STREAM)
 456                         panic("nfscon sotype");
 457
 458                 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
 459                         struct sockopt sopt;
 460                         int val;
 461
 462                         bzero(&sopt, sizeof sopt);
 463                         sopt.sopt_dir = SOPT_SET;
 464                         sopt.sopt_level = SOL_SOCKET;
 465                         sopt.sopt_name = SO_KEEPALIVE;
 466                         sopt.sopt_val = &val;
 467                         sopt.sopt_valsize = sizeof val;
 468                         val = 1;
 469                         sosetopt(so, &sopt);
 470                 }
 471                 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
 472                         struct sockopt sopt;
 473                         int val;
 474
 475                         bzero(&sopt, sizeof sopt);
 476                         sopt.sopt_dir = SOPT_SET;
 477                         sopt.sopt_level = IPPROTO_TCP;
 478                         sopt.sopt_name = TCP_NODELAY;
 479                         sopt.sopt_val = &val;
 480                         sopt.sopt_valsize = sizeof val;
 481                         val = 1;
 482                         sosetopt(so, &sopt);
 483                 }
 484
 485                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 3;
 486                 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) *
 487                                 (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2);
 488         }
 489
 490         if (sndreserve > NFS_MAXSOCKBUF)
 491                 sndreserve = NFS_MAXSOCKBUF;
 492         if (rcvreserve > NFS_MAXSOCKBUF)
 493                 rcvreserve = NFS_MAXSOCKBUF;
 494         error = soreserve(so, sndreserve, rcvreserve);
 495         if (error) {
 496                 goto bad;
 497         }
 498         so->so_rcv.sb_flags |= SB_NOINTR;
 499         so->so_snd.sb_flags |= SB_NOINTR;
 500
 501         thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 502
 503         /* Initialize other non-zero congestion variables */
 504         nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
 505                 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
 506         nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
 507                 nmp->nm_sdrtt[3] = 0;
 508         nmp->nm_cwnd = NFS_MAXCWND / 2;     /* Initial send window */
 509         nmp->nm_sent = 0;
 510         FSDBG(529, nmp, nmp->nm_state, nmp->nm_soflags, nmp->nm_cwnd);
 511         nmp->nm_timeouts = 0;
 512         return (0);
 513
 514 bad:
 515         thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 516         nfs_disconnect(nmp);
 517         return (error);
 518 }
 519
 520 /*
 521  * Reconnect routine:
 522  * Called when a connection is broken on a reliable protocol.
 523  * - clean up the old socket
 524  * - nfs_connect() again
 525  * - set R_MUSTRESEND for all outstanding requests on mount point
 526  * If this fails the mount point is DEAD!
 527  * nb: Must be called with the nfs_sndlock() set on the mount point.
 528  */
 529 static int
 530 nfs_reconnect(rep)
 531         register struct nfsreq *rep;
 532 {
 533         register struct nfsreq *rp;
 534         register struct nfsmount *nmp = rep->r_nmp;
 535         int error;
 536
 537         nfs_disconnect(nmp);
 538         while ((error = nfs_connect(nmp, rep))) {
 539                 if (error == EINTR || error == ERESTART)
 540                         return (EINTR);
 541                 if (error == EIO)
 542                         return (EIO);
 543                 nfs_down(rep, "can not connect", error);
 544                 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
 545         }
 546
 547         NFS_DPF(DUP, ("nfs_reconnect RESEND\n"));
 548         /*
 549          * Loop through outstanding request list and fix up all requests
 550          * on old socket.
 551          */
 552         TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
 553                 if (rp->r_nmp == nmp)
 554                         rp->r_flags |= R_MUSTRESEND;
 555         }
 556         return (0);
 557 }
 558
 559 /*
 560  * NFS disconnect. Clean up and unlink.
 561  */
 562 void
 563 nfs_disconnect(nmp)
 564         register struct nfsmount *nmp;
 565 {
 566         register struct socket *so;
 567
 568         thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
 569         if (nmp->nm_so) {
 570                 so = nmp->nm_so;
 571                 nmp->nm_so = (struct socket *)0;
 572                 soshutdown(so, 2);
 573                 soclose(so);
 574         }
 575         thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 576 }
 577
 578 /*
 579  * This is the nfs send routine. For connection based socket types, it
 580  * must be called with an nfs_sndlock() on the socket.
 581  * "rep == NULL" indicates that it has been called from a server.
 582  * For the client side:
 583  * - return EINTR if the RPC is terminated, 0 otherwise
 584  * - set R_MUSTRESEND if the send fails for any reason
 585  * - do any cleanup required by recoverable socket errors (???)
 586  * For the server side:
 587  * - return EINTR or ERESTART if interrupted by a signal
 588  * - return EPIPE if a connection is lost for connection based sockets (TCP...)
 589  * - do any cleanup required by recoverable socket errors (???)
 590  */
 591 int
 592 nfs_send(so, nam, top, rep)
 593         register struct socket *so;
 594         struct mbuf *nam;
 595         register struct mbuf *top;
 596         struct nfsreq *rep;
 597 {
 598         struct sockaddr *sendnam;
 599         int error, error2, soflags, flags;
 600         int xidqueued = 0;
 601         struct nfsreq *rp;
 602         char savenametolog[MNAMELEN];
 603
 604         if (rep) {
 605                 error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
 606                 if (error) {
 607                         m_freem(top);
 608                         return (error);
 609                 }
 610                 if ((so = rep->r_nmp->nm_so) == NULL) {
 611                         rep->r_flags |= R_MUSTRESEND;
 612                         m_freem(top);
 613                         return (0);
 614                 }
 615                 rep->r_flags &= ~R_MUSTRESEND;
 616                 soflags = rep->r_nmp->nm_soflags;
 617                 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
 618                         if (rp == rep)
 619                                 break;
 620                 if (rp)
 621                         xidqueued = rp->r_xid;
 622         } else
 623                 soflags = so->so_proto->pr_flags;
 624         if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED) ||
 625             (nam == 0))
 626                 sendnam = (struct sockaddr *)0;
 627         else
 628                 sendnam = mtod(nam, struct sockaddr *);
 629
 630         if (so->so_type == SOCK_SEQPACKET)
 631                 flags = MSG_EOR;
 632         else
 633                 flags = 0;
 634
 635 #if NFSDIAG
 636         if (rep)
 637                 nfsdup(rep);
 638 #endif
 639         /*
 640          * Save the name here in case mount point goes away when we switch
 641          * funnels.  The name is using local stack and is large, but don't
 642          * want to block if we malloc.
 643          */
 644         if (rep)
 645                 strncpy(savenametolog,
 646                         rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname,
 647                         MNAMELEN);
 648         thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
 649         error = sosend(so, sendnam, (struct uio *)0, top,
 650                        (struct mbuf *)0, flags);
 651         thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 652
 653         if (error) {
 654                 if (rep) {
 655                         if (xidqueued) {
 656                                 TAILQ_FOREACH(rp, &nfs_reqq, r_chain)
 657                                         if (rp == rep && rp->r_xid == xidqueued)
 658                                                 break;
 659                                 if (!rp)
 660                                         panic("nfs_send: error %d xid %x gone",
 661                                               error, xidqueued);
 662                         }
 663                         log(LOG_INFO, "nfs send error %d for server %s\n",
 664                             error, savenametolog);
 665                         /*
 666                          * Deal with errors for the client side.
 667                          */
 668                         error2 = nfs_sigintr(rep->r_nmp, rep, rep->r_procp);
 669                         if (error2) {
 670                                 error = error2;
 671                         } else {
 672                                 rep->r_flags |= R_MUSTRESEND;
 673                                 NFS_DPF(DUP,
 674                                         ("nfs_send RESEND error=%d\n", error));
 675                         }
 676                 } else
 677                         log(LOG_INFO, "nfsd send error %d\n", error);
 678
 679                 /*
 680                  * Handle any recoverable (soft) socket errors here. (???)
 681                  */
 682                 if (error != EINTR && error != ERESTART && error != EIO &&
 683                         error != EWOULDBLOCK && error != EPIPE) {
 684                         error = 0;
 685                 }
 686         }
 687         return (error);
 688 }
 689
 690 /*
 691  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
 692  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
 693  * Mark and consolidate the data into a new mbuf list.
 694  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
 695  *     small mbufs.
 696  * For SOCK_STREAM we must be very careful to read an entire record once
 697  * we have read any of it, even if the system call has been interrupted.
 698  */
 699 static int
 700 nfs_receive(rep, aname, mp)
 701         register struct nfsreq *rep;
 702         struct mbuf **aname;
 703         struct mbuf **mp;
 704 {
 705         register struct socket *so;
 706         struct uio auio;
 707         struct iovec aio;
 708         register struct mbuf *m;
 709         struct mbuf *control;
 710         u_long len;
 711         struct sockaddr **getnam;
 712         struct sockaddr *tmp_nam;
 713         struct mbuf     *mhck;
 714         struct sockaddr_in *sin;
 715         int error, error2, sotype, rcvflg;
 716         struct proc *p = current_proc();        /* XXX */
 717
 718         /*
 719          * Set up arguments for soreceive()
 720          */
 721         *mp = (struct mbuf *)0;
 722         *aname = (struct mbuf *)0;
 723         sotype = rep->r_nmp->nm_sotype;
 724
 725         /*
 726          * For reliable protocols, lock against other senders/receivers
 727          * in case a reconnect is necessary.
 728          * For SOCK_STREAM, first get the Record Mark to find out how much
 729          * more there is to get.
 730          * We must lock the socket against other receivers
 731          * until we have an entire rpc request/reply.
 732          */
 733         if (sotype != SOCK_DGRAM) {
 734                 error = nfs_sndlock(rep);
 735                 if (error)
 736                         return (error);
 737 tryagain:
 738                 /*
 739                  * Check for fatal errors and resending request.
 740                  */
 741                 /*
 742                  * Ugh: If a reconnect attempt just happened, nm_so
 743                  * would have changed. NULL indicates a failed
 744                  * attempt that has essentially shut down this
 745                  * mount point.
 746                  */
 747                 if ((error = nfs_sigintr(rep->r_nmp, rep, p)) || rep->r_mrep) {
 748                         nfs_sndunlock(rep);
 749                         if (error)
 750                                 return (error);
 751                         return (EINTR);
 752                 }
 753                 so = rep->r_nmp->nm_so;
 754                 if (!so) {
 755                         error = nfs_reconnect(rep);
 756                         if (error) {
 757                                 nfs_sndunlock(rep);
 758                                 return (error);
 759                         }
 760                         goto tryagain;
 761                 }
 762                 while (rep->r_flags & R_MUSTRESEND) {
 763                         m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
 764                         nfsstats.rpcretries++;
 765                         NFS_DPF(DUP,
 766                                 ("nfs_receive RESEND %s\n",
 767                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname));
 768                         error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
 769                         /*
 770                          * we also hold rcv lock so rep is still
 771                          * legit this point
 772                          */
 773                         if (error) {
 774                                 if (error == EINTR || error == ERESTART ||
 775                                     (error = nfs_reconnect(rep))) {
 776                                         nfs_sndunlock(rep);
 777                                         return (error);
 778                                 }
 779                                 goto tryagain;
 780                         }
 781                 }
 782                 nfs_sndunlock(rep);
 783                 if (sotype == SOCK_STREAM) {
 784                         aio.iov_base = (caddr_t) &len;
 785                         aio.iov_len = sizeof(u_long);
 786                         auio.uio_iov = &aio;
 787                         auio.uio_iovcnt = 1;
 788                         auio.uio_segflg = UIO_SYSSPACE;
 789                         auio.uio_rw = UIO_READ;
 790                         auio.uio_offset = 0;
 791                         auio.uio_resid = sizeof(u_long);
 792                         auio.uio_procp = p;
 793                         do {
 794                            rcvflg = MSG_WAITALL;
 795                            thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
 796                            error = soreceive(so, (struct sockaddr **)0, &auio,
 797                                 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
 798                            thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 799                            if (!rep->r_nmp) /* if unmounted then bailout */
 800                                 goto shutout;
 801                            if (error == EWOULDBLOCK && rep) {
 802                                 error2 = nfs_sigintr(rep->r_nmp, rep, p);
 803                                 if (error2)
 804                                         error = error2;
 805                            }
 806                         } while (error == EWOULDBLOCK);
 807                         if (!error && auio.uio_resid > 0) {
 808                             log(LOG_INFO,
 809                                  "short receive (%d/%d) from nfs server %s\n",
 810                                  sizeof(u_long) - auio.uio_resid,
 811                                  sizeof(u_long),
 812                                  rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
 813                             error = EPIPE;
 814                         }
 815                         if (error)
 816                                 goto errout;
 817                         len = ntohl(len) & ~0x80000000;
 818                         /*
 819                          * This is SERIOUS! We are out of sync with the sender
 820                          * and forcing a disconnect/reconnect is all I can do.
 821                          */
 822                         if (len > NFS_MAXPACKET) {
 823                             log(LOG_ERR, "%s (%d) from nfs server %s\n",
 824                                 "impossible packet length",
 825                                 len,
 826                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
 827                             error = EFBIG;
 828                             goto errout;
 829                         }
 830                         auio.uio_resid = len;
 831
 832                         thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
 833                         do {
 834                             rcvflg = MSG_WAITALL;
 835                             error =  soreceive(so, (struct sockaddr **)0,
 836                                 &auio, mp, (struct mbuf **)0, &rcvflg);
 837                             if (!rep->r_nmp) /* if unmounted then bailout */ {
 838                                 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 839                                 goto shutout;
 840                             }
 841                         } while (error == EWOULDBLOCK || error == EINTR ||
 842                                  error == ERESTART);
 843
 844                         thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 845
 846                         if (!error && auio.uio_resid > 0) {
 847                             log(LOG_INFO,
 848                                 "short receive (%d/%d) from nfs server %s\n",
 849                                 len - auio.uio_resid, len,
 850                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
 851                             error = EPIPE;
 852                         }
 853                 } else {
 854                         /*
 855                          * NB: Since uio_resid is big, MSG_WAITALL is ignored
 856                          * and soreceive() will return when it has either a
 857                          * control msg or a data msg.
 858                          * We have no use for control msg., but must grab them
 859                          * and then throw them away so we know what is going
 860                          * on.
 861                          */
 862                         auio.uio_resid = len = 100000000; /* Anything Big */
 863                         auio.uio_procp = p;
 864
 865                         thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
 866                         do {
 867                             rcvflg = 0;
 868                             error =  soreceive(so, (struct sockaddr **)0,
 869                                                &auio, mp, &control, &rcvflg);
 870                             if (control)
 871                                 m_freem(control);
 872                             if (!rep->r_nmp) /* if unmounted then bailout */ {
 873                                 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 874                                 goto shutout;
 875                             }
 876                             if (error == EWOULDBLOCK && rep) {
 877                                 error2 = nfs_sigintr(rep->r_nmp, rep, p);
 878                                 if (error2) {
 879                                         thread_funnel_switch(NETWORK_FUNNEL,
 880                                             KERNEL_FUNNEL);
 881                                         return (error2);
 882                                 }
 883                             }
 884                         } while (error == EWOULDBLOCK ||
 885                                  (!error && *mp == NULL && control));
 886
 887                         thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 888
 889                         if ((rcvflg & MSG_EOR) == 0)
 890                                 printf("Egad!!\n");
 891                         if (!error && *mp == NULL)
 892                                 error = EPIPE;
 893                         len -= auio.uio_resid;
 894                 }
 895 errout:
 896                 if (error && error != EINTR && error != ERESTART) {
 897                         m_freem(*mp);
 898                         *mp = (struct mbuf *)0;
 899                         if (error != EPIPE)
 900                                 log(LOG_INFO,
 901                                     "receive error %d from nfs server %s\n",
 902                                     error,
 903                                  rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
 904                         error = nfs_sndlock(rep);
 905                         if (!error)
 906                                 error = nfs_reconnect(rep);
 907                         if (!error)
 908                                 goto tryagain;
 909                 }
 910         } else {
 911                 /*
 912                  * We could have failed while rebinding the datagram socket
 913                  * so we need to attempt to rebind here.
 914                  */
 915                 if ((so = rep->r_nmp->nm_so) == NULL) {
 916                         error = nfs_sndlock(rep);
 917                         if (!error) {
 918                                 error = nfs_reconnect(rep);
 919                                 nfs_sndunlock(rep);
 920                         }
 921                         if (error)
 922                                 return (error);
 923                         if (!rep->r_nmp) /* if unmounted then bailout */
 924                                 return (ENXIO);
 925                         so = rep->r_nmp->nm_so;
 926                 }
 927                 if (so->so_state & SS_ISCONNECTED)
 928                         getnam = (struct sockaddr **)0;
 929                 else
 930                         getnam = &tmp_nam;;
 931                 auio.uio_resid = len = 1000000;
 932                 auio.uio_procp = p;
 933
 934                 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
 935                 do {
 936                         rcvflg = 0;
 937                         error =  soreceive(so, getnam, &auio, mp,
 938                                 (struct mbuf **)0, &rcvflg);
 939
 940                         if ((getnam) && (*getnam)) {
 941                             MGET(mhck, M_WAIT, MT_SONAME);
 942                             mhck->m_len = (*getnam)->sa_len;
 943                             sin = mtod(mhck, struct sockaddr_in *);
 944                             bcopy(*getnam, sin, sizeof(struct sockaddr_in));
 945                             mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
 946                             FREE(*getnam, M_SONAME);
 947                             *aname = mhck;
 948                         }
 949                         if (!rep->r_nmp) /* if unmounted then bailout */
 950                                 goto dgramout;
 951                         if (error) {
 952                                 error2 = nfs_sigintr(rep->r_nmp, rep, p);
 953                                 if (error2) {
 954                                         error = error2;
 955                                         goto dgramout;
 956                                 }
 957                         }
 958                         /* Reconnect for all errors.  We may be receiving
 959                          * soft/hard/blocking errors because of a network
 960                          * change.
 961                          * XXX: we should rate limit or delay this
 962                          * to once every N attempts or something.
 963                          * although TCP doesn't seem to.
 964                          */
 965                         if (error) {
 966                                 thread_funnel_switch(NETWORK_FUNNEL,
 967                                     KERNEL_FUNNEL);
 968                                 error2 = nfs_sndlock(rep);
 969                                 if (!error2) {
 970                                         error2 = nfs_reconnect(rep);
 971                                         if (error2)
 972                                                 error = error2;
 973                                         else if (!rep->r_nmp) /* if unmounted then bailout */
 974                                                 error = ENXIO;
 975                                         else
 976                                                 so = rep->r_nmp->nm_so;
 977                                         nfs_sndunlock(rep);
 978                                 } else {
 979                                         error = error2;
 980                                 }
 981                                 thread_funnel_switch(KERNEL_FUNNEL,
 982                                     NETWORK_FUNNEL);
 983                         }
 984                 } while (error == EWOULDBLOCK);
 985
 986 dgramout:
 987                 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
 988                 len -= auio.uio_resid;
 989         }
 990 shutout:
 991         if (error) {
 992                 m_freem(*mp);
 993                 *mp = (struct mbuf *)0;
 994         }
 995         return (error);
 996 }
 997
 998 /*
 999  * Implement receipt of reply on a socket.
1000  * We must search through the list of received datagrams matching them
1001  * with outstanding requests using the xid, until ours is found.
1002  */
1003 /* ARGSUSED */
1004 int
1005 nfs_reply(myrep)
1006         struct nfsreq *myrep;
1007 {
1008         register struct nfsreq *rep;
1009         register struct nfsmount *nmp = myrep->r_nmp;
1010         register long t1;
1011         struct mbuf *mrep, *md;
1012         struct mbuf *nam;
1013         u_long rxid, *tl;
1014         caddr_t dpos, cp2;
1015         int error;
1016
1017         /*
1018          * Loop around until we get our own reply
1019          */
1020         for (;;) {
1021                 /*
1022                  * Lock against other receivers so that I don't get stuck in
1023                  * sbwait() after someone else has received my reply for me.
1024                  * Also necessary for connection based protocols to avoid
1025                  * race conditions during a reconnect.
1026                  * If nfs_rcvlock() returns EALREADY, that means that
1027                  * the reply has already been recieved by another
1028                  * process and we can return immediately.  In this
1029                  * case, the lock is not taken to avoid races with
1030                  * other processes.
1031                  */
1032                 error = nfs_rcvlock(myrep);
1033                 if (error == EALREADY)
1034                         return (0);
1035                 if (error)
1036                         return (error);
1037
1038                 /*
1039                  * If we slept after putting bits otw, then reply may have
1040                  * arrived.  In which case returning is required, or we
1041                  * would hang trying to nfs_receive an already received reply.
1042                  */
1043                 if (myrep->r_mrep != NULL) {
1044                         nfs_rcvunlock(myrep);
1045                         FSDBG(530, myrep->r_xid, myrep, myrep->r_nmp, -1);
1046                         return (0);
1047                 }
1048                 /*
1049                  * Get the next Rpc reply off the socket. Assume myrep->r_nmp
1050                  * is still intact by checks done in nfs_rcvlock.
1051                  */
1052                 error = nfs_receive(myrep, &nam, &mrep);
1053                 if (nam)
1054                         m_freem(nam);
1055                 /*
1056                  * Bailout asap if nfsmount struct gone (unmounted).
1057                  */
1058                 if (!myrep->r_nmp || !nmp->nm_so) {
1059                         FSDBG(530, myrep->r_xid, myrep, nmp, -2);
1060                         return (ENXIO);
1061                 }
1062                 if (error) {
1063                         FSDBG(530, myrep->r_xid, myrep, nmp, error);
1064                         nfs_rcvunlock(myrep);
1065
1066                         /* Bailout asap if nfsmount struct gone (unmounted). */
1067                         if (!myrep->r_nmp || !nmp->nm_so)
1068                                 return (ENXIO);
1069
1070                         /*
1071                          * Ignore routing errors on connectionless protocols??
1072                          */
1073                         if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
1074                                 nmp->nm_so->so_error = 0;
1075                                 if (myrep->r_flags & R_GETONEREP)
1076                                         return (0);
1077                                 continue;
1078                         }
1079                         return (error);
1080                 }
1081
1082                 /*
1083                  * We assume all is fine, but if we did not have an error
1084                  * and mrep is 0, better not dereference it. nfs_receieve
1085                  * calls soreceive which carefully sets error=0 when it got
1086                  * errors on sbwait (tsleep). In most cases, I assume that's
1087                  * so we could go back again. In tcp case, EPIPE is returned.
1088                  * In udp, case nfs_receive gets back here with no error and no
1089                  * mrep. Is the right fix to have soreceive check for process
1090                  * aborted after sbwait and return something non-zero? Should
1091                  * nfs_receive give an EPIPE?  Too risky to play with those
1092                  * two this late in game for a shutdown problem. Instead,
1093                  * just check here and get out. (ekn)
1094                  */
1095                 if (!mrep) {
1096                         FSDBG(530, myrep->r_xid, myrep, nmp, -3);
1097                         return (ENXIO); /* sounds good */
1098                 }
1099
1100                 /*
1101                  * Get the xid and check that it is an rpc reply
1102                  */
1103                 md = mrep;
1104                 dpos = mtod(md, caddr_t);
1105                 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
1106                 rxid = *tl++;
1107                 if (*tl != rpc_reply) {
1108 #ifndef NFS_NOSERVER
1109                         if (nmp->nm_flag & NFSMNT_NQNFS) {
1110                                 if (nqnfs_callback(nmp, mrep, md, dpos))
1111                                         nfsstats.rpcinvalid++;
1112                         } else {
1113                                 nfsstats.rpcinvalid++;
1114                                 m_freem(mrep);
1115                         }
1116 #else
1117                         nfsstats.rpcinvalid++;
1118                         m_freem(mrep);
1119 #endif
1120 nfsmout:
1121                         if (nmp->nm_state & NFSSTA_RCVLOCK)
1122                                 nfs_rcvunlock(myrep);
1123                         if (myrep->r_flags & R_GETONEREP)
1124                                 return (0); /* this path used by NQNFS */
1125                         continue;
1126                 }
1127
1128                 /*
1129                  * Loop through the request list to match up the reply
1130                  * Iff no match, just drop the datagram
1131                  */
1132                 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
1133                         if (rep->r_mrep == NULL && rxid == rep->r_xid) {
1134                                 /* Found it.. */
1135                                 rep->r_mrep = mrep;
1136                                 rep->r_md = md;
1137                                 rep->r_dpos = dpos;
1138                                 /*
1139                                  * If we're tracking the round trip time
1140                                  * then we update the circular log here
1141                                  * with the stats from our current request.
1142                                  */
1143                                 if (nfsrtton) {
1144                                         struct rttl *rt;
1145
1146                                         rt = &nfsrtt.rttl[nfsrtt.pos];
1147                                         rt->proc = rep->r_procnum;
1148                                         rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
1149                                         rt->sent = nmp->nm_sent;
1150                                         rt->cwnd = nmp->nm_cwnd;
1151                                         if (proct[rep->r_procnum] == 0)
1152                                                 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1153                                         rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
1154                                         rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
1155                                         rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
1156                                         microtime(&rt->tstamp); // XXX unused
1157                                         if (rep->r_flags & R_TIMING)
1158                                                 rt->rtt = rep->r_rtt;
1159                                         else
1160                                                 rt->rtt = 1000000;
1161                                         nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
1162                                 }
1163                                 /*
1164                                  * Update congestion window.
1165                                  * Do the additive increase of
1166                                  * one rpc/rtt.
1167                                  */
1168                                 FSDBG(530, rep->r_xid, rep, nmp->nm_sent,
1169                                       nmp->nm_cwnd);
1170                                 if (nmp->nm_cwnd <= nmp->nm_sent) {
1171                                         nmp->nm_cwnd +=
1172                                            (NFS_CWNDSCALE * NFS_CWNDSCALE +
1173                                            (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
1174                                         if (nmp->nm_cwnd > NFS_MAXCWND)
1175                                                 nmp->nm_cwnd = NFS_MAXCWND;
1176                                 }
1177                                 if (rep->r_flags & R_SENT) {
1178                                     rep->r_flags &= ~R_SENT;
1179                                     nmp->nm_sent -= NFS_CWNDSCALE;
1180                                }
1181                                 /*
1182                                  * Update rtt using a gain of 0.125 on the mean
1183                                  * and a gain of 0.25 on the deviation.
1184                                  */
1185                                 if (rep->r_flags & R_TIMING) {
1186                                         /*
1187                                          * Since the timer resolution of
1188                                          * NFS_HZ is so course, it can often
1189                                          * result in r_rtt == 0. Since
1190                                          * r_rtt == N means that the actual
1191                                          * rtt is between N+dt and N+2-dt ticks,
1192                                          * add 1.
1193                                          */
1194                                         if (proct[rep->r_procnum] == 0)
1195                                                 panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
1196                                         t1 = rep->r_rtt + 1;
1197                                         t1 -= (NFS_SRTT(rep) >> 3);
1198                                         NFS_SRTT(rep) += t1;
1199                                         if (t1 < 0)
1200                                                 t1 = -t1;
1201                                         t1 -= (NFS_SDRTT(rep) >> 2);
1202                                         NFS_SDRTT(rep) += t1;
1203                                 }
1204                                 nmp->nm_timeouts = 0;
1205                                 break;
1206                         }
1207                 }
1208                 nfs_rcvunlock(myrep);
1209                 /*
1210                  * If not matched to a request, drop it.
1211                  * If it's mine, get out.
1212                  */
1213                 if (rep == 0) {
1214                         nfsstats.rpcunexpected++;
1215                         m_freem(mrep);
1216                 } else if (rep == myrep) {
1217                         if (rep->r_mrep == NULL)
1218                                 panic("nfs_reply: nil r_mrep");
1219                         return (0);
1220                 }
1221                 FSDBG(530, myrep->r_xid, myrep, rep,
1222                       rep ? rep->r_xid : myrep->r_flags);
1223                 if (myrep->r_flags & R_GETONEREP)
1224                         return (0); /* this path used by NQNFS */
1225         }
1226 }
1227
1228 /*
1229  * nfs_request - goes something like this
1230  *      - fill in request struct
1231  *      - links it into list
1232  *      - calls nfs_send() for first transmit
1233  *      - calls nfs_receive() to get reply
1234  *      - break down rpc header and return with nfs reply pointed to
1235  *        by mrep or error
1236  * nb: always frees up mreq mbuf list
1237  */
1238 int
1239 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp)
1240         struct vnode *vp;
1241         struct mbuf *mrest;
1242         int procnum;
1243         struct proc *procp;
1244         struct ucred *cred;
1245         struct mbuf **mrp;
1246         struct mbuf **mdp;
1247         caddr_t *dposp;
1248         u_int64_t *xidp;
1249 {
1250         register struct mbuf *m, *mrep, *m2;
1251         register struct nfsreq *rep, *rp;
1252         register u_long *tl;
1253         register int i;
1254         struct nfsmount *nmp;
1255         struct mbuf *md, *mheadend;
1256         struct nfsnode *np;
1257         char nickv[RPCX_NICKVERF];
1258         time_t reqtime, waituntil;
1259         caddr_t dpos, cp2;
1260         int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
1261         int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
1262         int verf_len, verf_type;
1263         u_long xid;
1264         u_quad_t frev;
1265         char *auth_str, *verf_str;
1266         NFSKERBKEY_T key;               /* save session key */
1267         int nmsotype;
1268         struct timeval now;
1269
1270         if (xidp)
1271                 *xidp = 0;
1272
1273         MALLOC_ZONE(rep, struct nfsreq *,
1274                     sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
1275
1276         nmp = VFSTONFS(vp->v_mount);
1277         if (nmp == NULL ||
1278             (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
1279             (NFSSTA_FORCE|NFSSTA_TIMEO)) {
1280                 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1281                 return (ENXIO);
1282         }
1283         nmsotype = nmp->nm_sotype;
1284
1285         FSDBG_TOP(531, vp, procnum, nmp, rep);
1286
1287         rep->r_nmp = nmp;
1288         rep->r_vp = vp;
1289         rep->r_procp = procp;
1290         rep->r_procnum = procnum;
1291         microuptime(&now);
1292         rep->r_lastmsg = now.tv_sec -
1293             ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
1294         i = 0;
1295         m = mrest;
1296         while (m) {
1297                 i += m->m_len;
1298                 m = m->m_next;
1299         }
1300         mrest_len = i;
1301
1302         /*
1303          * Get the RPC header with authorization.
1304          */
1305 kerbauth:
1306         nmp = VFSTONFS(vp->v_mount);
1307         if (!nmp) {
1308                 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1309                 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1310                 return (ENXIO);
1311         }
1312         verf_str = auth_str = (char *)0;
1313         if (nmp->nm_flag & NFSMNT_KERB) {
1314                 verf_str = nickv;
1315                 verf_len = sizeof (nickv);
1316                 auth_type = RPCAUTH_KERB4;
1317                 bzero((caddr_t)key, sizeof (key));
1318                 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
1319                         &auth_len, verf_str, verf_len)) {
1320                         nmp = VFSTONFS(vp->v_mount);
1321                         if (!nmp) {
1322                                 FSDBG_BOT(531, 2, vp, error, rep);
1323                                 FREE_ZONE((caddr_t)rep,
1324                                         sizeof (struct nfsreq), M_NFSREQ);
1325                                 m_freem(mrest);
1326                                 return (ENXIO);
1327                         }
1328                         error = nfs_getauth(nmp, rep, cred, &auth_str,
1329                                 &auth_len, verf_str, &verf_len, key);
1330                         nmp = VFSTONFS(vp->v_mount);
1331                         if (!error && !nmp)
1332                                 error = ENXIO;
1333                         if (error) {
1334                                 FSDBG_BOT(531, 2, vp, error, rep);
1335                                 FREE_ZONE((caddr_t)rep,
1336                                         sizeof (struct nfsreq), M_NFSREQ);
1337                                 m_freem(mrest);
1338                                 return (error);
1339                         }
1340                 }
1341         } else {
1342                 auth_type = RPCAUTH_UNIX;
1343                 if (cred->cr_ngroups < 1)
1344                         panic("nfsreq nogrps");
1345                 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1346                         nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
1347                         5 * NFSX_UNSIGNED;
1348         }
1349         m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
1350              auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
1351         if (xidp)
1352                 *xidp = ntohl(xid) + ((u_int64_t)nfs_xidwrap << 32);
1353         if (auth_str)
1354                 _FREE(auth_str, M_TEMP);
1355
1356         /*
1357          * For stream protocols, insert a Sun RPC Record Mark.
1358          */
1359         if (nmsotype == SOCK_STREAM) {
1360                 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
1361                 *mtod(m, u_long *) = htonl(0x80000000 |
1362                                            (m->m_pkthdr.len - NFSX_UNSIGNED));
1363         }
1364         rep->r_mreq = m;
1365         rep->r_xid = xid;
1366 tryagain:
1367         nmp = VFSTONFS(vp->v_mount);
1368         if (nmp && (nmp->nm_flag & NFSMNT_SOFT))
1369                 rep->r_retry = nmp->nm_retry;
1370         else
1371                 rep->r_retry = NFS_MAXREXMIT + 1;       /* past clip limit */
1372         rep->r_rtt = rep->r_rexmit = 0;
1373         if (proct[procnum] > 0)
1374                 rep->r_flags = R_TIMING;
1375         else
1376                 rep->r_flags = 0;
1377         rep->r_mrep = NULL;
1378
1379         /*
1380          * Do the client side RPC.
1381          */
1382         nfsstats.rpcrequests++;
1383         /*
1384          * Chain request into list of outstanding requests. Be sure
1385          * to put it LAST so timer finds oldest requests first.
1386          */
1387         s = splsoftclock();
1388         TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
1389
1390         /* Get send time for nqnfs */
1391         microtime(&now);
1392         reqtime = now.tv_sec;
1393
1394         /*
1395          * If backing off another request or avoiding congestion, don't
1396          * send this one now but let timer do it. If not timing a request,
1397          * do it now.
1398          */
1399         if (nmp && nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
1400                            (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1401                            nmp->nm_sent < nmp->nm_cwnd)) {
1402                 int connrequired = (nmp->nm_soflags & PR_CONNREQUIRED);
1403
1404                 splx(s);
1405                 if (connrequired)
1406                         error = nfs_sndlock(rep);
1407
1408                 /*
1409                  * Set the R_SENT before doing the send in case another thread
1410                  * processes the reply before the nfs_send returns here
1411                  */
1412                 if (!error) {
1413                         if ((rep->r_flags & R_MUSTRESEND) == 0) {
1414                                 FSDBG(531, rep->r_xid, rep, nmp->nm_sent,
1415                                       nmp->nm_cwnd);
1416                                 nmp->nm_sent += NFS_CWNDSCALE;
1417                                 rep->r_flags |= R_SENT;
1418                         }
1419
1420                         m2 = m_copym(m, 0, M_COPYALL, M_WAIT);
1421                         error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
1422                         if (connrequired)
1423                                 nfs_sndunlock(rep);
1424                 }
1425                 nmp = VFSTONFS(vp->v_mount);
1426                 if (error) {
1427                         if (nmp)
1428                                 nmp->nm_sent -= NFS_CWNDSCALE;
1429                         rep->r_flags &= ~R_SENT;
1430                 }
1431         } else {
1432                 splx(s);
1433                 rep->r_rtt = -1;
1434         }
1435
1436         /*
1437          * Wait for the reply from our send or the timer's.
1438          */
1439         if (!error || error == EPIPE)
1440                 error = nfs_reply(rep);
1441
1442         /*
1443          * RPC done, unlink the request.
1444          */
1445         nfs_repdequeue(rep);
1446
1447         nmp = VFSTONFS(vp->v_mount);
1448
1449         /*
1450          * Decrement the outstanding request count.
1451          */
1452         if (rep->r_flags & R_SENT) {
1453                 rep->r_flags &= ~R_SENT;        /* paranoia */
1454                 if (nmp) {
1455                         FSDBG(531, rep->r_xid, rep, nmp->nm_sent, nmp->nm_cwnd);
1456                         nmp->nm_sent -= NFS_CWNDSCALE;
1457                 }
1458         }
1459
1460         /*
1461          * If there was a successful reply and a tprintf msg.
1462          * tprintf a response.
1463          */
1464         nfs_up(rep, "is alive again", error);
1465         mrep = rep->r_mrep;
1466         md = rep->r_md;
1467         dpos = rep->r_dpos;
1468         if (!error && !nmp)
1469                 error = ENXIO;
1470         if (error) {
1471                 m_freem(rep->r_mreq);
1472                 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1473                 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1474                 return (error);
1475         }
1476
1477         /*
1478          * break down the rpc header and check if ok
1479          */
1480         nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
1481         if (*tl++ == rpc_msgdenied) {
1482                 if (*tl == rpc_mismatch)
1483                         error = EOPNOTSUPP;
1484                 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1485                         if (!failed_auth) {
1486                                 failed_auth++;
1487                                 mheadend->m_next = (struct mbuf *)0;
1488                                 m_freem(mrep);
1489                                 m_freem(rep->r_mreq);
1490                                 goto kerbauth;
1491                         } else
1492                                 error = EAUTH;
1493                 } else
1494                         error = EACCES;
1495                 m_freem(mrep);
1496                 m_freem(rep->r_mreq);
1497                 FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1498                 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1499                 return (error);
1500         }
1501
1502         /*
1503          * Grab any Kerberos verifier, otherwise just throw it away.
1504          */
1505         verf_type = fxdr_unsigned(int, *tl++);
1506         i = fxdr_unsigned(int, *tl);
1507         if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1508                 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1509                 if (error)
1510                         goto nfsmout;
1511         } else if (i > 0)
1512                 nfsm_adv(nfsm_rndup(i));
1513         nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1514         /* 0 == ok */
1515         if (*tl == 0) {
1516                 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1517                 if (*tl != 0) {
1518                         error = fxdr_unsigned(int, *tl);
1519                         if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1520                                 error == NFSERR_TRYLATER) {
1521                                 m_freem(mrep);
1522                                 error = 0;
1523                                 microuptime(&now);
1524                                 waituntil = now.tv_sec + trylater_delay;
1525                                 NFS_DPF(DUP,
1526                                         ("nfs_request %s flag=%x trylater_cnt=%x waituntil=%lx trylater_delay=%x\n",
1527                                          nmp->nm_mountp->mnt_stat.f_mntfromname,
1528                                          nmp->nm_flag, trylater_cnt, waituntil,
1529                                          trylater_delay));
1530                                 while (now.tv_sec < waituntil) {
1531                                         (void)tsleep((caddr_t)&lbolt,
1532                                                      PSOCK, "nqnfstry", 0);
1533                                         microuptime(&now);
1534                                 }
1535                                 trylater_delay *= 2;
1536                                 if (trylater_delay > 60)
1537                                         trylater_delay = 60;
1538                                 if (trylater_cnt < 7)
1539                                         trylater_cnt++;
1540                                 goto tryagain;
1541                         }
1542
1543                         /*
1544                          * If the File Handle was stale, invalidate the
1545                          * lookup cache, just in case.
1546                          */
1547                         if (error == ESTALE)
1548                                 cache_purge(vp);
1549                         if (nmp->nm_flag & NFSMNT_NFSV3) {
1550                                 *mrp = mrep;
1551                                 *mdp = md;
1552                                 *dposp = dpos;
1553                                 error |= NFSERR_RETERR;
1554                         } else
1555                                 m_freem(mrep);
1556                         m_freem(rep->r_mreq);
1557                         FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1558                         FREE_ZONE((caddr_t)rep,
1559                                    sizeof (struct nfsreq), M_NFSREQ);
1560                         return (error);
1561                 }
1562
1563                 /*
1564                  * For nqnfs, get any lease in reply
1565                  */
1566                 if (nmp->nm_flag & NFSMNT_NQNFS) {
1567                         nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1568                         if (*tl) {
1569                                 np = VTONFS(vp);
1570                                 nqlflag = fxdr_unsigned(int, *tl);
1571                                 nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
1572                                 cachable = fxdr_unsigned(int, *tl++);
1573                                 reqtime += fxdr_unsigned(int, *tl++);
1574                                 microtime(&now);
1575                                 if (reqtime > now.tv_sec) {
1576                                     fxdr_hyper(tl, &frev);
1577                                     nqnfs_clientlease(nmp, np, nqlflag,
1578                                                       cachable, reqtime, frev);
1579                                 }
1580                         }
1581                 }
1582                 *mrp = mrep;
1583                 *mdp = md;
1584                 *dposp = dpos;
1585                 m_freem(rep->r_mreq);
1586                 FSDBG_BOT(531, 0xf0f0f0f0, rep->r_xid, nmp, rep);
1587                 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1588                 return (0);
1589         }
1590         m_freem(mrep);
1591         error = EPROTONOSUPPORT;
1592 nfsmout:
1593         m_freem(rep->r_mreq);
1594         FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
1595         FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1596         return (error);
1597 }
1598
1599 #ifndef NFS_NOSERVER
1600 /*
1601  * Generate the rpc reply header
1602  * siz arg. is used to decide if adding a cluster is worthwhile
1603  */
1604 int
1605 nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
1606         int siz;
1607         struct nfsrv_descript *nd;
1608         struct nfssvc_sock *slp;
1609         int err;
1610         int cache;
1611         u_quad_t *frev;
1612         struct mbuf **mrq;
1613         struct mbuf **mbp;
1614         caddr_t *bposp;
1615 {
1616         register u_long *tl;
1617         register struct mbuf *mreq;
1618         caddr_t bpos;
1619         struct mbuf *mb, *mb2;
1620
1621         MGETHDR(mreq, M_WAIT, MT_DATA);
1622         mb = mreq;
1623         /*
1624          * If this is a big reply, use a cluster else
1625          * try and leave leading space for the lower level headers.
1626          */
1627         siz += RPC_REPLYSIZ;
1628         if (siz >= MINCLSIZE) {
1629                 MCLGET(mreq, M_WAIT);
1630         } else
1631                 mreq->m_data += max_hdr;
1632         tl = mtod(mreq, u_long *);
1633         mreq->m_len = 6 * NFSX_UNSIGNED;
1634         bpos = ((caddr_t)tl) + mreq->m_len;
1635         *tl++ = txdr_unsigned(nd->nd_retxid);
1636         *tl++ = rpc_reply;
1637         if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1638                 *tl++ = rpc_msgdenied;
1639                 if (err & NFSERR_AUTHERR) {
1640                         *tl++ = rpc_autherr;
1641                         *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1642                         mreq->m_len -= NFSX_UNSIGNED;
1643                         bpos -= NFSX_UNSIGNED;
1644                 } else {
1645                         *tl++ = rpc_mismatch;
1646                         *tl++ = txdr_unsigned(RPC_VER2);
1647                         *tl = txdr_unsigned(RPC_VER2);
1648                 }
1649         } else {
1650                 *tl++ = rpc_msgaccepted;
1651
1652                 /*
1653                  * For Kerberos authentication, we must send the nickname
1654                  * verifier back, otherwise just RPCAUTH_NULL.
1655                  */
1656                 if (nd->nd_flag & ND_KERBFULL) {
1657                     register struct nfsuid *nuidp;
1658                     struct timeval ktvin, ktvout;
1659
1660                     for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
1661                         nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1662                         if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
1663                             (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
1664                              &nuidp->nu_haddr, nd->nd_nam2)))
1665                             break;
1666                     }
1667                     if (nuidp) {
1668                         ktvin.tv_sec =
1669                             txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
1670                         ktvin.tv_usec =
1671                             txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1672
1673                         /*
1674                          * Encrypt the timestamp in ecb mode using the
1675                          * session key.
1676                          */
1677 #if NFSKERB
1678                         XXX
1679 #endif
1680
1681                         *tl++ = rpc_auth_kerb;
1682                         *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1683                         *tl = ktvout.tv_sec;
1684                         nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1685                         *tl++ = ktvout.tv_usec;
1686                         *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
1687                     } else {
1688                         *tl++ = 0;
1689                         *tl++ = 0;
1690                     }
1691                 } else {
1692                         *tl++ = 0;
1693                         *tl++ = 0;
1694                 }
1695                 switch (err) {
1696                 case EPROGUNAVAIL:
1697                         *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1698                         break;
1699                 case EPROGMISMATCH:
1700                         *tl = txdr_unsigned(RPC_PROGMISMATCH);
1701                         nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1702                         if (nd->nd_flag & ND_NQNFS) {
1703                                 *tl++ = txdr_unsigned(3);
1704                                 *tl = txdr_unsigned(3);
1705                         } else {
1706                                 *tl++ = txdr_unsigned(2);
1707                                 *tl = txdr_unsigned(3);
1708                         }
1709                         break;
1710                 case EPROCUNAVAIL:
1711                         *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1712                         break;
1713                 case EBADRPC:
1714                         *tl = txdr_unsigned(RPC_GARBAGE);
1715                         break;
1716                 default:
1717                         *tl = 0;
1718                         if (err != NFSERR_RETVOID) {
1719                                 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1720                                 if (err)
1721                                     *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1722                                 else
1723                                     *tl = 0;
1724                         }
1725                         break;
1726                 };
1727         }
1728
1729         /*
1730          * For nqnfs, piggyback lease as requested.
1731          */
1732         if ((nd->nd_flag & ND_NQNFS) && err == 0) {
1733                 if (nd->nd_flag & ND_LEASE) {
1734                         nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
1735                         *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
1736                         *tl++ = txdr_unsigned(cache);
1737                         *tl++ = txdr_unsigned(nd->nd_duration);
1738                         txdr_hyper(frev, tl);
1739                 } else {
1740                         nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1741                         *tl = 0;
1742                 }
1743         }
1744         if (mrq != NULL)
1745                 *mrq = mreq;
1746         *mbp = mb;
1747         *bposp = bpos;
1748         if (err != 0 && err != NFSERR_RETVOID)
1749                 nfsstats.srvrpc_errs++;
1750         return (0);
1751 }
1752
1753
1754 #endif /* NFS_NOSERVER */
1755
1756
1757 /*
1758  * From FreeBSD 1.58, a Matt Dillon fix...
1759  * Flag a request as being about to terminate.
1760  * The nm_sent count is decremented now to avoid deadlocks when the process
1761  * in soreceive() hasn't yet managed to send its own request.
1762  */
1763 static void
1764 nfs_softterm(struct nfsreq *rep)
1765 {
1766
1767         rep->r_flags |= R_SOFTTERM;
1768         if (rep->r_flags & R_SENT) {
1769                 FSDBG(532, rep->r_xid, rep, rep->r_nmp->nm_sent,
1770                       rep->r_nmp->nm_cwnd);
1771                 rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
1772                 rep->r_flags &= ~R_SENT;
1773         }
1774 }
1775
1776 void
1777 nfs_timer_funnel(arg)
1778         void * arg;
1779 {
1780         (void) thread_funnel_set(kernel_flock, TRUE);
1781         nfs_timer(arg);
1782         (void) thread_funnel_set(kernel_flock, FALSE);
1783
1784 }
1785
1786 /*
1787  * Ensure rep isn't in use by the timer, then dequeue it.
1788  */
1789 void
1790 nfs_repdequeue(struct nfsreq *rep)
1791 {
1792         int s;
1793
1794         while ((rep->r_flags & R_BUSY)) {
1795                 rep->r_flags |= R_WAITING;
1796                 tsleep(rep, PSOCK, "repdeq", 0);
1797         }
1798         s = splsoftclock();
1799         TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
1800         splx(s);
1801 }
1802
1803 /*
1804  * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not
1805  * free()'d out from under it.
1806  */
1807 void
1808 nfs_repbusy(struct nfsreq *rep)
1809 {
1810
1811         if ((rep->r_flags & R_BUSY))
1812                 panic("rep locked");
1813         rep->r_flags |= R_BUSY;
1814 }
1815
1816 /*
1817  * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied.
1818  */
1819 struct nfsreq *
1820 nfs_repnext(struct nfsreq *rep)
1821 {
1822         struct nfsreq * nextrep;
1823
1824         if (rep == NULL)
1825                 return (NULL);
1826         /*
1827          * We need to get and busy the next req before signalling the
1828          * current one, otherwise wakeup() may block us and we'll race to
1829          * grab the next req.
1830          */
1831         nextrep = TAILQ_NEXT(rep, r_chain);
1832         if (nextrep != NULL)
1833                 nfs_repbusy(nextrep);
1834         /* unbusy and signal. */
1835         rep->r_flags &= ~R_BUSY;
1836         if ((rep->r_flags & R_WAITING)) {
1837                 rep->r_flags &= ~R_WAITING;
1838                 wakeup(rep);
1839         }
1840         return (nextrep);
1841 }
1842
1843 /*
1844  * Nfs timer routine
1845  * Scan the nfsreq list and retranmit any requests that have timed out
1846  * To avoid retransmission attempts on STREAM sockets (in the future) make
1847  * sure to set the r_retry field to 0 (implies nm_retry == 0).
1848  */
1849 void
1850 nfs_timer(arg)
1851         void *arg;      /* never used */
1852 {
1853         register struct nfsreq *rep;
1854         register struct mbuf *m;
1855         register struct socket *so;
1856         register struct nfsmount *nmp;
1857         register int timeo;
1858         int s, error;
1859 #ifndef NFS_NOSERVER
1860         static long lasttime = 0;
1861         register struct nfssvc_sock *slp;
1862         u_quad_t cur_usec;
1863 #endif /* NFS_NOSERVER */
1864 #if NFSDIAG
1865         int rttdiag;
1866 #endif
1867         int flags, rexmit, cwnd, sent;
1868         u_long xid;
1869         struct timeval now;
1870
1871         s = splnet();
1872         /*
1873          * XXX If preemptable threads are implemented the spls used for the
1874          * outstanding request queue must be replaced with mutexes.
1875          */
1876 #ifdef NFSTRACESUSPENDERS
1877         if (NFSTRACE_SUSPENDING) {
1878                 TAILQ_FOREACH(rep, &nfs_reqq, r_chain)
1879                         if (rep->r_xid == nfstracexid)
1880                                 break;
1881                 if (!rep) {
1882                         NFSTRACE_RESUME;
1883                 } else if (NFSTRACE_SUSPENSEOVER) {
1884                         NFSTRACE_SUSPEND;
1885                 }
1886         }
1887 #endif
1888         rep = TAILQ_FIRST(&nfs_reqq);
1889         if (rep != NULL)
1890                 nfs_repbusy(rep);
1891         microuptime(&now);
1892         for ( ; rep != NULL ; rep = nfs_repnext(rep)) {
1893 #ifdef NFSTRACESUSPENDERS
1894                 if (rep->r_mrep && !NFSTRACE_SUSPENDING) {
1895                         nfstracexid = rep->r_xid;
1896                         NFSTRACE_STARTSUSPENDCOUNTDOWN;
1897                 }
1898 #endif
1899                 nmp = rep->r_nmp;
1900                 if (!nmp) /* unmounted */
1901                     continue;
1902                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1903                         continue;
1904                 if (nfs_sigintr(nmp, rep, rep->r_procp))
1905                         continue;
1906                 if (nmp->nm_tprintf_initial_delay != 0 &&
1907                     (rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) &&
1908                     rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
1909                         rep->r_lastmsg = now.tv_sec;
1910                         nfs_down(rep, "not responding", 0);
1911                 }
1912                 if (rep->r_rtt >= 0) {
1913                         rep->r_rtt++;
1914                         if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1915                                 timeo = nmp->nm_timeo;
1916                         else
1917                                 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
1918                         /* ensure 62.5 ms floor */
1919                         while (16 * timeo < hz)
1920                             timeo *= 2;
1921                         if (nmp->nm_timeouts > 0)
1922                                 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1923                         if (rep->r_rtt <= timeo)
1924                                 continue;
1925                         if (nmp->nm_timeouts < 8)
1926                                 nmp->nm_timeouts++;
1927                 }
1928                 /*
1929                  * Check for too many retransmits.  This is never true for
1930                  * 'hard' mounts because we set r_retry to NFS_MAXREXMIT + 1
1931                  * and never allow r_rexmit to be more than NFS_MAXREXMIT.
1932                  */
1933                 if (rep->r_rexmit >= rep->r_retry) {    /* too many */
1934                         nfsstats.rpctimeouts++;
1935                         nfs_softterm(rep);
1936                         continue;
1937                 }
1938                 if (nmp->nm_sotype != SOCK_DGRAM) {
1939                         if (++rep->r_rexmit > NFS_MAXREXMIT)
1940                                 rep->r_rexmit = NFS_MAXREXMIT;
1941                         continue;
1942                 }
1943                 if ((so = nmp->nm_so) == NULL)
1944                         continue;
1945
1946                 /*
1947                  * If there is enough space and the window allows..
1948                  *      Resend it
1949                  * Set r_rtt to -1 in case we fail to send it now.
1950                  */
1951 #if NFSDIAG
1952                 rttdiag = rep->r_rtt;
1953 #endif
1954                 rep->r_rtt = -1;
1955                 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1956                    ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1957                     (rep->r_flags & R_SENT) ||
1958                     nmp->nm_sent < nmp->nm_cwnd) &&
1959                    (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1960
1961                         struct proc *p = current_proc();
1962
1963 #if NFSDIAG
1964                         if (rep->r_flags & R_SENT && nfsprnttimo &&
1965                             nmp->nm_timeouts >= nfsprnttimo) {
1966                                 int t = proct[rep->r_procnum];
1967                                 if (t)
1968                                         NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d A=%d D=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum, nmp->nm_srtt[t-1], nmp->nm_sdrtt[t-1]));
1969                                 else
1970                                         NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum));
1971                         }
1972                         nfsdup(rep);
1973 #endif /* NFSDIAG */
1974                         /*
1975                          * Iff first send, start timing
1976                          * else turn timing off, backoff timer
1977                          * and divide congestion window by 2.
1978                          * We update these *before* the send to avoid
1979                          * racing against receiving the reply.
1980                          * We save them so we can restore them on send error.
1981                          */
1982                         flags = rep->r_flags;
1983                         rexmit = rep->r_rexmit;
1984                         cwnd = nmp->nm_cwnd;
1985                         sent = nmp->nm_sent;
1986                         xid = rep->r_xid;
1987                         if (rep->r_flags & R_SENT) {
1988                                 rep->r_flags &= ~R_TIMING;
1989                                 if (++rep->r_rexmit > NFS_MAXREXMIT)
1990                                         rep->r_rexmit = NFS_MAXREXMIT;
1991                                 nmp->nm_cwnd >>= 1;
1992                                 if (nmp->nm_cwnd < NFS_CWNDSCALE)
1993                                         nmp->nm_cwnd = NFS_CWNDSCALE;
1994                                 nfsstats.rpcretries++;
1995                         } else {
1996                                 rep->r_flags |= R_SENT;
1997                                 nmp->nm_sent += NFS_CWNDSCALE;
1998                         }
1999                         FSDBG(535, xid, rep, nmp->nm_sent, nmp->nm_cwnd);
2000
2001                         thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
2002
2003                         if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
2004                             error = (*so->so_proto->pr_usrreqs->pru_send)
2005                                 (so, 0, m, 0, 0, p);
2006                         else
2007                             error = (*so->so_proto->pr_usrreqs->pru_send)
2008                                 (so, 0, m, mtod(nmp->nm_nam, struct sockaddr *), 0, p);
2009
2010                         thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
2011
2012                         FSDBG(535, xid, error, sent, cwnd);
2013
2014                         if (error) {
2015                                 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
2016                                         so->so_error = 0;
2017                                 rep->r_flags  = flags | R_RESENDERR;
2018                                 rep->r_rexmit = rexmit;
2019                                 nmp->nm_cwnd = cwnd;
2020                                 nmp->nm_sent = sent;
2021                                 if (flags & R_SENT)
2022                                         nfsstats.rpcretries--;
2023                         } else
2024                                 rep->r_rtt = 0;
2025                 }
2026         }
2027 #ifndef NFS_NOSERVER
2028         /*
2029          * Call the nqnfs server timer once a second to handle leases.
2030          */
2031         microuptime(&now);
2032         if (lasttime != now.tv_sec) {
2033                 lasttime = now.tv_sec;
2034                 nqnfs_serverd();
2035         }
2036
2037         /*
2038          * Scan the write gathering queues for writes that need to be
2039          * completed now.
2040          */
2041         cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
2042         TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
2043             if (LIST_FIRST(&slp->ns_tq) &&
2044                 LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
2045                 nfsrv_wakenfsd(slp);
2046         }
2047 #endif /* NFS_NOSERVER */
2048         splx(s);
2049         timeout(nfs_timer_funnel, (void *)0, nfs_ticks);
2050
2051 }
2052
2053
2054 /*
2055  * Test for a termination condition pending on the process.
2056  * This is used to determine if we need to bail on a mount.
2057  * EIO is returned if there has been a soft timeout.
2058  * EINTR is returned if there is a signal pending that is not being ignored
2059  * and the mount is interruptable, or if we are a thread that is in the process
2060  * of cancellation (also SIGKILL posted).
2061  */
2062 int
2063 nfs_sigintr(nmp, rep, p)
2064         struct nfsmount *nmp;
2065         struct nfsreq *rep;
2066         struct proc *p;
2067 {
2068         struct uthread *curr_td;
2069         sigset_t pending_sigs;
2070         int context_good = 0;
2071         struct nfsmount *repnmp;
2072
2073         if (nmp == NULL)
2074                 return (ENXIO);
2075         if (rep != NULL) {
2076                 repnmp = rep->r_nmp;
2077                 /* we've had a forced unmount. */
2078                 if (repnmp == NULL)
2079                         return (ENXIO);
2080                 /* request has timed out on a 'soft' mount. */
2081                 if (rep->r_flags & R_SOFTTERM)
2082                         return (EIO);
2083                 /*
2084                  * We're in the progress of a force unmount and there's
2085                  * been a timeout we're dead and fail IO.
2086                  */
2087                 if ((repnmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) ==
2088                    (NFSSTA_FORCE|NFSSTA_TIMEO))
2089                         return (EIO);
2090                 /* Someone is unmounting us, go soft and mark it. */
2091                 if ((repnmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT)) {
2092                         repnmp->nm_flag |= NFSMNT_SOFT;
2093                         nmp->nm_state |= NFSSTA_FORCE;
2094                 }
2095                 /*
2096                  * If the mount is hung and we've requested not to hang
2097                  * on remote filesystems, then bail now.
2098                  */
2099                 if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0 &&
2100                     (repnmp->nm_state & NFSSTA_TIMEO) != 0)
2101                         return (EIO);
2102         }
2103         /* XXX: is this valid?  this probably should be an assertion. */
2104         if (p == NULL)
2105                 return (0);
2106
2107         /*
2108          * XXX: Since nfs doesn't have a good shot at getting the current
2109          * thread we take a guess.  (only struct proc * are passed to VOPs)
2110          * What we do is look at the current thread, if it belongs to the
2111          * passed in proc pointer then we have a "good/accurate" context
2112          * and can make an accurate guess as to what to do.
2113          * However if we have a bad context we have to make due with what
2114          * is in the proc struct which may not be as up to date as we'd
2115          * like.
2116          * This is ok because the process will call us with the correct
2117          * context after a short timeout while waiting for a response.
2118          */
2119         curr_td = (struct uthread *)get_bsdthread_info(current_act());
2120         if (curr_td->uu_proc == p)
2121                 context_good = 1;
2122         if (context_good && current_thread_aborted())
2123                 return (EINTR);
2124         /* mask off thread and process blocked signals. */
2125         if (context_good)
2126                 pending_sigs = curr_td->uu_siglist & ~curr_td->uu_sigmask;
2127         else
2128                 pending_sigs = p->p_siglist;
2129         /* mask off process level and NFS ignored signals. */
2130         pending_sigs &= ~p->p_sigignore & NFSINT_SIGMASK;
2131         if (pending_sigs && (nmp->nm_flag & NFSMNT_INT) != 0)
2132                 return (EINTR);
2133         return (0);
2134 }
2135
2136 /*
2137  * Lock a socket against others.
2138  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2139  * and also to avoid race conditions between the processes with nfs requests
2140  * in progress when a reconnect is necessary.
2141  */
2142 int
2143 nfs_sndlock(rep)
2144         struct nfsreq *rep;
2145 {
2146         register int *statep;
2147         struct proc *p;
2148         int error, slpflag = 0, slptimeo = 0;
2149
2150         if (rep->r_nmp == NULL)
2151                 return (ENXIO);
2152         statep = &rep->r_nmp->nm_state;
2153
2154         p = rep->r_procp;
2155         if (rep->r_nmp->nm_flag & NFSMNT_INT)
2156                 slpflag = PCATCH;
2157         while (*statep & NFSSTA_SNDLOCK) {
2158                 error = nfs_sigintr(rep->r_nmp, rep, p);
2159                 if (error)
2160                         return (error);
2161                 *statep |= NFSSTA_WANTSND;
2162                 if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0)
2163                         slptimeo = hz;
2164                 (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
2165                         "nfsndlck", slptimeo);
2166                 if (slpflag == PCATCH) {
2167                         slpflag = 0;
2168                         slptimeo = 2 * hz;
2169                 }
2170                 /*
2171                  * Make sure while we slept that the mountpoint didn't go away.
2172                  * nfs_sigintr and callers expect it in tact.
2173                  */
2174                 if (!rep->r_nmp)
2175                         return (ENXIO); /* don't have lock until out of loop */
2176         }
2177         *statep |= NFSSTA_SNDLOCK;
2178         return (0);
2179 }
2180
2181 /*
2182  * Unlock the stream socket for others.
2183  */
2184 void
2185 nfs_sndunlock(rep)
2186         struct nfsreq *rep;
2187 {
2188         register int *statep;
2189
2190         if (rep->r_nmp == NULL)
2191                 return;
2192         statep = &rep->r_nmp->nm_state;
2193         if ((*statep & NFSSTA_SNDLOCK) == 0)
2194                 panic("nfs sndunlock");
2195         *statep &= ~NFSSTA_SNDLOCK;
2196         if (*statep & NFSSTA_WANTSND) {
2197                 *statep &= ~NFSSTA_WANTSND;
2198                 wakeup((caddr_t)statep);
2199         }
2200 }
2201
2202 static int
2203 nfs_rcvlock(rep)
2204         register struct nfsreq *rep;
2205 {
2206         register int *statep;
2207         int error, slpflag, slptimeo = 0;
2208
2209         /* make sure we still have our mountpoint */
2210         if (!rep->r_nmp) {
2211                 if (rep->r_mrep != NULL)
2212                         return (EALREADY);
2213                 return (ENXIO);
2214         }
2215
2216         statep = &rep->r_nmp->nm_state;
2217         FSDBG_TOP(534, rep->r_xid, rep, rep->r_nmp, *statep);
2218         if (rep->r_nmp->nm_flag & NFSMNT_INT)
2219                 slpflag = PCATCH;
2220         else
2221                 slpflag = 0;
2222         while (*statep & NFSSTA_RCVLOCK) {
2223                 if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp))) {
2224                         FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x100);
2225                         return (error);
2226                 } else if (rep->r_mrep != NULL) {
2227                         /*
2228                          * Don't bother sleeping if reply already arrived
2229                          */
2230                         FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x101);
2231                         return (EALREADY);
2232                 }
2233                 FSDBG(534, rep->r_xid, rep, rep->r_nmp, 0x102);
2234                 *statep |= NFSSTA_WANTRCV;
2235                 /*
2236                  * We need to poll if we're P_NOREMOTEHANG so that we
2237                  * call nfs_sigintr periodically above.
2238                  */
2239                 if (rep->r_procp != NULL &&
2240                     (rep->r_procp->p_flag & P_NOREMOTEHANG) != 0)
2241                         slptimeo = hz;
2242                 (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
2243                               "nfsrcvlk", slptimeo);
2244                 if (slpflag == PCATCH) {
2245                         slpflag = 0;
2246                         slptimeo = 2 * hz;
2247                 }
2248                 /*
2249                  * Make sure while we slept that the mountpoint didn't go away.
2250                  * nfs_sigintr and caller nfs_reply expect it intact.
2251                  */
2252                 if (!rep->r_nmp)  {
2253                         FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x103);
2254                         return (ENXIO); /* don't have lock until out of loop */
2255                 }
2256         }
2257         /*
2258          * nfs_reply will handle it if reply already arrived.
2259          * (We may have slept or been preempted while on network funnel).
2260          */
2261         FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *statep);
2262         *statep |= NFSSTA_RCVLOCK;
2263         return (0);
2264 }
2265
2266 /*
2267  * Unlock the stream socket for others.
2268  */
2269 static void
2270 nfs_rcvunlock(rep)
2271         register struct nfsreq *rep;
2272 {
2273         register int *statep;
2274
2275         if (rep->r_nmp == NULL)
2276                 return;
2277         statep = &rep->r_nmp->nm_state;
2278
2279         FSDBG(533, statep, *statep, 0, 0);
2280         if ((*statep & NFSSTA_RCVLOCK) == 0)
2281                 panic("nfs rcvunlock");
2282         *statep &= ~NFSSTA_RCVLOCK;
2283         if (*statep & NFSSTA_WANTRCV) {
2284                 *statep &= ~NFSSTA_WANTRCV;
2285                 wakeup((caddr_t)statep);
2286         }
2287 }
2288
2289
2290 #ifndef NFS_NOSERVER
2291 /*
2292  * Socket upcall routine for the nfsd sockets.
2293  * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2294  * Essentially do as much as possible non-blocking, else punt and it will
2295  * be called with M_WAIT from an nfsd.
2296  */
2297  /*
2298  * Needs to run under network funnel
2299  */
2300 void
2301 nfsrv_rcv(so, arg, waitflag)
2302         struct socket *so;
2303         caddr_t arg;
2304         int waitflag;
2305 {
2306         register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
2307         register struct mbuf *m;
2308         struct mbuf *mp, *mhck;
2309         struct sockaddr *nam=0;
2310         struct uio auio;
2311         int flags, ns_nflag=0, error;
2312         struct sockaddr_in  *sin;
2313
2314         if ((slp->ns_flag & SLP_VALID) == 0)
2315                 return;
2316 #ifdef notdef
2317         /*
2318          * Define this to test for nfsds handling this under heavy load.
2319          */
2320         if (waitflag == M_DONTWAIT) {
2321                 ns_nflag = SLPN_NEEDQ;
2322                 goto dorecs;
2323         }
2324 #endif
2325         auio.uio_procp = NULL;
2326         if (so->so_type == SOCK_STREAM) {
2327                 /*
2328                  * If there are already records on the queue, defer soreceive()
2329                  * to an nfsd so that there is feedback to the TCP layer that
2330                  * the nfs servers are heavily loaded.
2331                  */
2332                 if (slp->ns_rec && waitflag == M_DONTWAIT) {
2333                         ns_nflag = SLPN_NEEDQ;
2334                         goto dorecs;
2335                 }
2336
2337                 /*
2338                  * Do soreceive().
2339                  */
2340                 auio.uio_resid = 1000000000;
2341                 flags = MSG_DONTWAIT;
2342                 error = soreceive(so, (struct sockaddr **) 0, &auio, &mp, (struct mbuf **)0, &flags);
2343                 if (error || mp == (struct mbuf *)0) {
2344                         if (error == EWOULDBLOCK)
2345                                 ns_nflag = SLPN_NEEDQ;
2346                         else
2347                                 ns_nflag = SLPN_DISCONN;
2348                         goto dorecs;
2349                 }
2350                 m = mp;
2351                 if (slp->ns_rawend) {
2352                         slp->ns_rawend->m_next = m;
2353                         slp->ns_cc += 1000000000 - auio.uio_resid;
2354                 } else {
2355                         slp->ns_raw = m;
2356                         slp->ns_cc = 1000000000 - auio.uio_resid;
2357                 }
2358                 while (m->m_next)
2359                         m = m->m_next;
2360                 slp->ns_rawend = m;
2361
2362                 /*
2363                  * Now try and parse record(s) out of the raw stream data.
2364                  */
2365                 error = nfsrv_getstream(slp, waitflag);
2366                 if (error) {
2367                         if (error == EPERM)
2368                                 ns_nflag = SLPN_DISCONN;
2369                         else
2370                                 ns_nflag = SLPN_NEEDQ;
2371                 }
2372         } else {
2373                 do {
2374                         auio.uio_resid = 1000000000;
2375                         flags = MSG_DONTWAIT;
2376                         nam = 0;
2377                         error = soreceive(so, &nam, &auio, &mp,
2378                                                 (struct mbuf **)0, &flags);
2379
2380                         if (mp) {
2381                                 if (nam) {
2382                                         MGET(mhck, M_WAIT, MT_SONAME);
2383                                         mhck->m_len = nam->sa_len;
2384                                         sin = mtod(mhck, struct sockaddr_in *);
2385                                         bcopy(nam, sin, sizeof(struct sockaddr_in));
2386                                         mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
2387                                         FREE(nam, M_SONAME);
2388
2389                                         m = mhck;
2390                                         m->m_next = mp;
2391                                 } else
2392                                         m = mp;
2393                                 if (slp->ns_recend)
2394                                         slp->ns_recend->m_nextpkt = m;
2395                                 else
2396                                         slp->ns_rec = m;
2397                                 slp->ns_recend = m;
2398                                 m->m_nextpkt = (struct mbuf *)0;
2399                         }
2400                         if (error) {
2401                                 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
2402                                         && error != EWOULDBLOCK) {
2403                                         ns_nflag = SLPN_DISCONN;
2404                                         goto dorecs;
2405                                 }
2406                         }
2407                 } while (mp);
2408         }
2409
2410         /*
2411          * Now try and process the request records, non-blocking.
2412          */
2413 dorecs:
2414         if (ns_nflag)
2415                 slp->ns_nflag |= ns_nflag;
2416         if (waitflag == M_DONTWAIT &&
2417                 (slp->ns_rec || (slp->ns_nflag & (SLPN_NEEDQ | SLPN_DISCONN)))) {
2418                 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
2419                 nfsrv_wakenfsd(slp);
2420                 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
2421         }
2422 }
2423
2424 /*
2425  * Try and extract an RPC request from the mbuf data list received on a
2426  * stream socket. The "waitflag" argument indicates whether or not it
2427  * can sleep.
2428  */
2429 static int
2430 nfsrv_getstream(slp, waitflag)
2431         register struct nfssvc_sock *slp;
2432         int waitflag;
2433 {
2434         register struct mbuf *m, **mpp;
2435         register char *cp1, *cp2;
2436         register int len;
2437         struct mbuf *om, *m2, *recm;
2438         u_long recmark;
2439
2440         if (slp->ns_nflag & SLPN_GETSTREAM)
2441                 panic("nfs getstream");
2442         slp->ns_nflag |= SLPN_GETSTREAM;
2443         for (;;) {
2444             if (slp->ns_reclen == 0) {
2445                 if (slp->ns_cc < NFSX_UNSIGNED) {
2446                         slp->ns_nflag &= ~SLPN_GETSTREAM;
2447                         return (0);
2448                 }
2449                 m = slp->ns_raw;
2450                 if (m->m_len >= NFSX_UNSIGNED) {
2451                         bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
2452                         m->m_data += NFSX_UNSIGNED;
2453                         m->m_len -= NFSX_UNSIGNED;
2454                 } else {
2455                         cp1 = (caddr_t)&recmark;
2456                         cp2 = mtod(m, caddr_t);
2457                         while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2458                                 while (m->m_len == 0) {
2459                                         m = m->m_next;
2460                                         cp2 = mtod(m, caddr_t);
2461                                 }
2462                                 *cp1++ = *cp2++;
2463                                 m->m_data++;
2464                                 m->m_len--;
2465                         }
2466                 }
2467                 slp->ns_cc -= NFSX_UNSIGNED;
2468                 recmark = ntohl(recmark);
2469                 slp->ns_reclen = recmark & ~0x80000000;
2470                 if (recmark & 0x80000000)
2471                         slp->ns_nflag |= SLPN_LASTFRAG;
2472                 else
2473                         slp->ns_nflag &= ~SLPN_LASTFRAG;
2474                 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
2475                         slp->ns_nflag &= ~SLPN_GETSTREAM;
2476                         return (EPERM);
2477                 }
2478             }
2479
2480             /*
2481              * Now get the record part.
2482              *
2483              * Note that slp->ns_reclen may be 0.  Linux sometimes
2484              * generates 0-length RPCs
2485              */
2486             recm = NULL;
2487             if (slp->ns_cc == slp->ns_reclen) {
2488                 recm = slp->ns_raw;
2489                 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
2490                 slp->ns_cc = slp->ns_reclen = 0;
2491             } else if (slp->ns_cc > slp->ns_reclen) {
2492                 len = 0;
2493                 m = slp->ns_raw;
2494                 om = (struct mbuf *)0;
2495                 while (len < slp->ns_reclen) {
2496                         if ((len + m->m_len) > slp->ns_reclen) {
2497                                 m2 = m_copym(m, 0, slp->ns_reclen - len,
2498                                         waitflag);
2499                                 if (m2) {
2500                                         if (om) {
2501                                                 om->m_next = m2;
2502                                                 recm = slp->ns_raw;
2503                                         } else
2504                                                 recm = m2;
2505                                         m->m_data += slp->ns_reclen - len;
2506                                         m->m_len -= slp->ns_reclen - len;
2507                                         len = slp->ns_reclen;
2508                                 } else {
2509                                         slp->ns_nflag &= ~SLPN_GETSTREAM;
2510                                         return (EWOULDBLOCK);
2511                                 }
2512                         } else if ((len + m->m_len) == slp->ns_reclen) {
2513                                 om = m;
2514                                 len += m->m_len;
2515                                 m = m->m_next;
2516                                 recm = slp->ns_raw;
2517                                 om->m_next = (struct mbuf *)0;
2518                         } else {
2519                                 om = m;
2520                                 len += m->m_len;
2521                                 m = m->m_next;
2522                         }
2523                 }
2524                 slp->ns_raw = m;
2525                 slp->ns_cc -= len;
2526                 slp->ns_reclen = 0;
2527             } else {
2528                 slp->ns_nflag &= ~SLPN_GETSTREAM;
2529                 return (0);
2530             }
2531
2532             /*
2533              * Accumulate the fragments into a record.
2534              */
2535             mpp = &slp->ns_frag;
2536             while (*mpp)
2537                 mpp = &((*mpp)->m_next);
2538             *mpp = recm;
2539             if (slp->ns_nflag & SLPN_LASTFRAG) {
2540                 if (slp->ns_recend)
2541                     slp->ns_recend->m_nextpkt = slp->ns_frag;
2542                 else
2543                     slp->ns_rec = slp->ns_frag;
2544                 slp->ns_recend = slp->ns_frag;
2545                 slp->ns_frag = (struct mbuf *)0;
2546             }
2547         }
2548 }
2549
2550 /*
2551  * Parse an RPC header.
2552  */
2553 int
2554 nfsrv_dorec(slp, nfsd, ndp)
2555         register struct nfssvc_sock *slp;
2556         struct nfsd *nfsd;
2557         struct nfsrv_descript **ndp;
2558 {
2559         register struct mbuf *m;
2560         register struct mbuf *nam;
2561         register struct nfsrv_descript *nd;
2562         int error;
2563
2564         *ndp = NULL;
2565         if ((slp->ns_flag & SLP_VALID) == 0 ||
2566             (m = slp->ns_rec) == (struct mbuf *)0)
2567                 return (ENOBUFS);
2568         slp->ns_rec = m->m_nextpkt;
2569         if (slp->ns_rec)
2570                 m->m_nextpkt = (struct mbuf *)0;
2571         else
2572                 slp->ns_recend = (struct mbuf *)0;
2573         if (m->m_type == MT_SONAME) {
2574                 nam = m;
2575                 m = m->m_next;
2576                 nam->m_next = NULL;
2577         } else
2578                 nam = NULL;
2579         MALLOC_ZONE(nd, struct nfsrv_descript *,
2580                         sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2581         nd->nd_md = nd->nd_mrep = m;
2582         nd->nd_nam2 = nam;
2583         nd->nd_dpos = mtod(m, caddr_t);
2584         error = nfs_getreq(nd, nfsd, TRUE);
2585         if (error) {
2586                 if (nam)
2587                         m_freem(nam);
2588                 FREE_ZONE((caddr_t)nd,  sizeof *nd, M_NFSRVDESC);
2589                 return (error);
2590         }
2591         *ndp = nd;
2592         nfsd->nfsd_nd = nd;
2593         return (0);
2594 }
2595
2596 /*
2597  * Parse an RPC request
2598  * - verify it
2599  * - fill in the cred struct.
2600  */
2601 int
2602 nfs_getreq(nd, nfsd, has_header)
2603         register struct nfsrv_descript *nd;
2604         struct nfsd *nfsd;
2605         int has_header;
2606 {
2607         register int len, i;
2608         register u_long *tl;
2609         register long t1;
2610         struct uio uio;
2611         struct iovec iov;
2612         caddr_t dpos, cp2, cp;
2613         u_long nfsvers, auth_type;
2614         uid_t nickuid;
2615         int error = 0, nqnfs = 0, ticklen;
2616         struct mbuf *mrep, *md;
2617         register struct nfsuid *nuidp;
2618         struct timeval tvin, tvout, now;
2619 #if 0                           /* until encrypted keys are implemented */
2620         NFSKERBKEYSCHED_T keys; /* stores key schedule */
2621 #endif
2622
2623         mrep = nd->nd_mrep;
2624         md = nd->nd_md;
2625         dpos = nd->nd_dpos;
2626         if (has_header) {
2627                 nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED);
2628                 nd->nd_retxid = fxdr_unsigned(u_long, *tl++);
2629                 if (*tl++ != rpc_call) {
2630                         m_freem(mrep);
2631                         return (EBADRPC);
2632                 }
2633         } else
2634                 nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED);
2635         nd->nd_repstat = 0;
2636         nd->nd_flag = 0;
2637         if (*tl++ != rpc_vers) {
2638                 nd->nd_repstat = ERPCMISMATCH;
2639                 nd->nd_procnum = NFSPROC_NOOP;
2640                 return (0);
2641         }
2642         if (*tl != nfs_prog) {
2643                 if (*tl == nqnfs_prog)
2644                         nqnfs++;
2645                 else {
2646                         nd->nd_repstat = EPROGUNAVAIL;
2647                         nd->nd_procnum = NFSPROC_NOOP;
2648                         return (0);
2649                 }
2650         }
2651         tl++;
2652         nfsvers = fxdr_unsigned(u_long, *tl++);
2653         if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
2654                 (nfsvers != NQNFS_VER3 && nqnfs)) {
2655                 nd->nd_repstat = EPROGMISMATCH;
2656                 nd->nd_procnum = NFSPROC_NOOP;
2657                 return (0);
2658         }
2659         if (nqnfs)
2660                 nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
2661         else if (nfsvers == NFS_VER3)
2662                 nd->nd_flag = ND_NFSV3;
2663         nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
2664         if (nd->nd_procnum == NFSPROC_NULL)
2665                 return (0);
2666         if (nd->nd_procnum >= NFS_NPROCS ||
2667                 (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
2668                 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
2669                 nd->nd_repstat = EPROCUNAVAIL;
2670                 nd->nd_procnum = NFSPROC_NOOP;
2671                 return (0);
2672         }
2673         if ((nd->nd_flag & ND_NFSV3) == 0)
2674                 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
2675         auth_type = *tl++;
2676         len = fxdr_unsigned(int, *tl++);
2677         if (len < 0 || len > RPCAUTH_MAXSIZ) {
2678                 m_freem(mrep);
2679                 return (EBADRPC);
2680         }
2681
2682         nd->nd_flag &= ~ND_KERBAUTH;
2683         /*
2684          * Handle auth_unix or auth_kerb.
2685          */
2686         if (auth_type == rpc_auth_unix) {
2687                 len = fxdr_unsigned(int, *++tl);
2688                 if (len < 0 || len > NFS_MAXNAMLEN) {
2689                         m_freem(mrep);
2690                         return (EBADRPC);
2691                 }
2692                 nfsm_adv(nfsm_rndup(len));
2693                 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2694                 bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
2695                 nd->nd_cr.cr_ref = 1;
2696                 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
2697                 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
2698                 len = fxdr_unsigned(int, *tl);
2699                 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
2700                         m_freem(mrep);
2701                         return (EBADRPC);
2702                 }
2703                 nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED);
2704                 for (i = 1; i <= len; i++)
2705                     if (i < NGROUPS)
2706                         nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
2707                     else
2708                         tl++;
2709                 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
2710                 if (nd->nd_cr.cr_ngroups > 1)
2711                     nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
2712                 len = fxdr_unsigned(int, *++tl);
2713                 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2714                         m_freem(mrep);
2715                         return (EBADRPC);
2716                 }
2717                 if (len > 0)
2718                         nfsm_adv(nfsm_rndup(len));
2719         } else if (auth_type == rpc_auth_kerb) {
2720                 switch (fxdr_unsigned(int, *tl++)) {
2721                 case RPCAKN_FULLNAME:
2722                         ticklen = fxdr_unsigned(int, *tl);
2723                         *((u_long *)nfsd->nfsd_authstr) = *tl;
2724                         uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
2725                         nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
2726                         if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
2727                                 m_freem(mrep);
2728                                 return (EBADRPC);
2729                         }
2730                         uio.uio_offset = 0;
2731                         uio.uio_iov = &iov;
2732                         uio.uio_iovcnt = 1;
2733                         uio.uio_segflg = UIO_SYSSPACE;
2734                         iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
2735                         iov.iov_len = RPCAUTH_MAXSIZ - 4;
2736                         nfsm_mtouio(&uio, uio.uio_resid);
2737                         nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2738                         if (*tl++ != rpc_auth_kerb ||
2739                                 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
2740                                 printf("Bad kerb verifier\n");
2741                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2742                                 nd->nd_procnum = NFSPROC_NOOP;
2743                                 return (0);
2744                         }
2745                         nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
2746                         tl = (u_long *)cp;
2747                         if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
2748                                 printf("Not fullname kerb verifier\n");
2749                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2750                                 nd->nd_procnum = NFSPROC_NOOP;
2751                                 return (0);
2752                         }
2753                         cp += NFSX_UNSIGNED;
2754                         bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
2755                         nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
2756                         nd->nd_flag |= ND_KERBFULL;
2757                         nfsd->nfsd_flag |= NFSD_NEEDAUTH;
2758                         break;
2759                 case RPCAKN_NICKNAME:
2760                         if (len != 2 * NFSX_UNSIGNED) {
2761                                 printf("Kerb nickname short\n");
2762                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
2763                                 nd->nd_procnum = NFSPROC_NOOP;
2764                                 return (0);
2765                         }
2766                         nickuid = fxdr_unsigned(uid_t, *tl);
2767                         nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2768                         if (*tl++ != rpc_auth_kerb ||
2769                                 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
2770                                 printf("Kerb nick verifier bad\n");
2771                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2772                                 nd->nd_procnum = NFSPROC_NOOP;
2773                                 return (0);
2774                         }
2775                         nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2776                         tvin.tv_sec = *tl++;
2777                         tvin.tv_usec = *tl;
2778
2779                         for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
2780                             nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
2781                                 if (nuidp->nu_cr.cr_uid == nickuid &&
2782                                     (!nd->nd_nam2 ||
2783                                      netaddr_match(NU_NETFAM(nuidp),
2784                                       &nuidp->nu_haddr, nd->nd_nam2)))
2785                                         break;
2786                         }
2787                         if (!nuidp) {
2788                                 nd->nd_repstat =
2789                                         (NFSERR_AUTHERR|AUTH_REJECTCRED);
2790                                 nd->nd_procnum = NFSPROC_NOOP;
2791                                 return (0);
2792                         }
2793
2794                         /*
2795                          * Now, decrypt the timestamp using the session key
2796                          * and validate it.
2797                          */
2798 #if NFSKERB
2799                         XXX
2800 #endif
2801
2802                         tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
2803                         tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
2804                         microtime(&now);
2805                         if (nuidp->nu_expire < now.tv_sec ||
2806                             nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
2807                             (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
2808                              nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
2809                                 nuidp->nu_expire = 0;
2810                                 nd->nd_repstat =
2811                                     (NFSERR_AUTHERR|AUTH_REJECTVERF);
2812                                 nd->nd_procnum = NFSPROC_NOOP;
2813                                 return (0);
2814                         }
2815                         nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
2816                         nd->nd_flag |= ND_KERBNICK;
2817                 };
2818         } else {
2819                 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
2820                 nd->nd_procnum = NFSPROC_NOOP;
2821                 return (0);
2822         }
2823
2824         /*
2825          * For nqnfs, get piggybacked lease request.
2826          */
2827         if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
2828                 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2829                 nd->nd_flag |= fxdr_unsigned(int, *tl);
2830                 if (nd->nd_flag & ND_LEASE) {
2831                         nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2832                         nd->nd_duration = fxdr_unsigned(int, *tl);
2833                 } else
2834                         nd->nd_duration = NQ_MINLEASE;
2835         } else
2836                 nd->nd_duration = NQ_MINLEASE;
2837         nd->nd_md = md;
2838         nd->nd_dpos = dpos;
2839         return (0);
2840 nfsmout:
2841         return (error);
2842 }
2843
2844 /*
2845  * Search for a sleeping nfsd and wake it up.
2846  * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
2847  * running nfsds will go look for the work in the nfssvc_sock list.
2848  */
2849 void
2850 nfsrv_wakenfsd(slp)
2851         struct nfssvc_sock *slp;
2852 {
2853         register struct nfsd *nd;
2854
2855         if ((slp->ns_flag & SLP_VALID) == 0)
2856                 return;
2857         TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
2858                 if (nd->nfsd_flag & NFSD_WAITING) {
2859                         nd->nfsd_flag &= ~NFSD_WAITING;
2860                         if (nd->nfsd_slp)
2861                                 panic("nfsd wakeup");
2862                         slp->ns_sref++;
2863                         nd->nfsd_slp = slp;
2864                         wakeup((caddr_t)nd);
2865                         return;
2866                 }
2867         }
2868         slp->ns_flag |= SLP_DOREC;
2869         nfsd_head_flag |= NFSD_CHECKSLP;
2870 }
2871 #endif /* NFS_NOSERVER */
2872
2873 static int
2874 nfs_msg(p, server, msg, error)
2875         struct proc *p;
2876         const char *server, *msg;
2877         int error;
2878 {
2879         tpr_t tpr;
2880
2881         if (p)
2882                 tpr = tprintf_open(p);
2883         else
2884                 tpr = NULL;
2885         if (error)
2886                 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg,
2887                     error);
2888         else
2889                 tprintf(tpr, "nfs server %s: %s\n", server, msg);
2890         tprintf_close(tpr);
2891         return (0);
2892 }
2893
2894 static void
2895 nfs_down(rep, msg, error)
2896         struct nfsreq *rep;
2897         const char *msg;
2898         int error;
2899 {
2900         int dosignal;
2901
2902         if (rep == NULL || rep->r_nmp == NULL)
2903                 return;
2904         if (!(rep->r_nmp->nm_state & NFSSTA_TIMEO)) {
2905                 vfs_event_signal(&rep->r_nmp->nm_mountp->mnt_stat.f_fsid,
2906                     VQ_NOTRESP, 0);
2907                 rep->r_nmp->nm_state |= NFSSTA_TIMEO;
2908         }
2909         rep->r_flags |= R_TPRINTFMSG;
2910         nfs_msg(rep->r_procp, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname,
2911             msg, error);
2912 }
2913
2914 static void
2915 nfs_up(rep, msg, error)
2916         struct nfsreq *rep;
2917         const char *msg;
2918         int error;
2919 {
2920
2921         if (error != 0 || rep == NULL || rep->r_nmp == NULL)
2922                 return;
2923         if ((rep->r_flags & R_TPRINTFMSG) != 0)
2924                 nfs_msg(rep->r_procp,
2925                     rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
2926         if ((rep->r_nmp->nm_state & NFSSTA_TIMEO)) {
2927                 rep->r_nmp->nm_state &= ~NFSSTA_TIMEO;
2928                 vfs_event_signal(&rep->r_nmp->nm_mountp->mnt_stat.f_fsid,
2929                     VQ_NOTRESP, 1);
2930         }
2931 }