xnu-201.19.tar.gz

author Apple <opensource@apple.com>

Sun, 10 Aug 2003 07:38:02 +0000 (07:38 +0000)

committer Apple <opensource@apple.com>

Sun, 10 Aug 2003 07:38:02 +0000 (07:38 +0000)
author Apple <opensource@apple.com>
Sun, 10 Aug 2003 07:38:02 +0000 (07:38 +0000)
committer Apple <opensource@apple.com>
Sun, 10 Aug 2003 07:38:02 +0000 (07:38 +0000)
diff --git a/bsd/conf/version.minor b/bsd/conf/version.minor

index 0cfbf08886fca9a91cb753ec8734c84fcbe52c9f..00750edc07d6415dcc07ae0351e9397b0222b7ba 100644 (file)
--- a/bsd/conf/version.minor
+++ b/bsd/conf/version.minor
@@ -1 +1 @@
-2
+3
diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c

index 001c0f600e383d9055157cb694ac996cf237f796..6329bd20785c9074e7ed85242b3624cdf8373ba7 100644 (file)
--- a/bsd/hfs/hfs_vfsops.c
+++ b/bsd/hfs/hfs_vfsops.c
@@ -1246,6 +1246,8 @@ struct proc *p;
      struct hfsnode             *hp;
      struct hfsmount            *hfsmp = VFSTOHFS(mp);
      ExtendedVCB                        *vcb;
+    struct vnode               *meta_vp[3];
+    int i;
      int error, allerror = 0;
  
      DBG_FUNC_NAME("hfs_sync");
@@ -1285,7 +1287,8 @@ loop:;
          nvp = vp->v_mntvnodes.le_next;
          hp = VTOH(vp);
  
-        if ((vp->v_type == VNON) || (((hp->h_nodeflags & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
+        if ((vp->v_flag & VSYSTEM) || (vp->v_type == VNON) ||
+            (((hp->h_nodeflags & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
              (vp->v_dirtyblkhd.lh_first == NULL) && !(vp->v_flag & VHASDIRTY))) {
              simple_unlock(&vp->v_interlock);
             simple_unlock(&mntvnode_slock);
@@ -1315,30 +1318,31 @@ loop:;
          simple_lock(&mntvnode_slock);
      };
  
-       vcb = HFSTOVCB(hfsmp);
+    vcb = HFSTOVCB(hfsmp);
+    meta_vp[0] = vcb->extentsRefNum;
+    meta_vp[1] = vcb->catalogRefNum;
+    meta_vp[2] = vcb->allocationsRefNum;  /* This is NULL for standard HFS */
+
+    /* Now sync our three metadata files */
+    for (i = 0; i < 3; ++i) {
+       struct vnode *btvp;
+  
+        btvp = meta_vp[i];
  
-    /* Now reprocess the BTree node, stored above */
-    {
-    struct vnode               *btvp;
-        /*
-         * If the vnode that we are about to sync is no longer
-         * associated with this mount point, start over.
-         */
-        btvp = vcb->extentsRefNum;
          if ((btvp==0) || (btvp->v_type == VNON) || (btvp->v_mount != mp))
-            goto skipBtree;
+            continue;
          simple_lock(&btvp->v_interlock);
          hp = VTOH(btvp);
          if (((hp->h_nodeflags & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
              (btvp->v_dirtyblkhd.lh_first == NULL) && !(btvp->v_flag & VHASDIRTY)) {
              simple_unlock(&btvp->v_interlock);
-            goto skipBtree;
+            continue;
          }
          simple_unlock(&mntvnode_slock);
          error = vget(btvp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
          if (error) {
              simple_lock(&mntvnode_slock);
-            goto skipBtree;
+            continue;
          }
          if ((error = VOP_FSYNC(btvp, cred, waitfor, p)))
              allerror = error;
@@ -1347,15 +1351,15 @@ loop:;
          simple_lock(&mntvnode_slock);
      };
  
-skipBtree:;
-
      simple_unlock(&mntvnode_slock);
  
      /*
       * Force stale file system control information to be flushed.
       */
-    if ((error = VOP_FSYNC(hfsmp->hfs_devvp, cred, waitfor, p)))
-        allerror = error;
+    if (vcb->vcbSigWord == kHFSSigWord) {
+        if ((error = VOP_FSYNC(hfsmp->hfs_devvp, cred, waitfor, p)))
+            allerror = error;
+    }
      /*
       * Write back modified superblock.
       */
diff --git a/bsd/isofs/cd9660/cd9660_node.c b/bsd/isofs/cd9660/cd9660_node.c

index c9400cd784abfc55b8282887e28a2e35648a12b9..f9e201a3210f1c9d6191f335d0702be89d4996a7 100644 (file)
--- a/bsd/isofs/cd9660/cd9660_node.c
+++ b/bsd/isofs/cd9660/cd9660_node.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
@@ -306,8 +306,9 @@ cd9660_reclaim(ap)
          */
         cache_purge(vp);
         if (ip->i_devvp) {
-               vrele(ip->i_devvp);
-               ip->i_devvp = 0;
+               struct vnode *tvp = ip->i_devvp;
+               ip->i_devvp = NULL;
+               vrele(tvp);
         }
         if (ip->i_namep != isonullname)
                 FREE(ip->i_namep, M_TEMP);
diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c

index 1934f20f9923509504e83aae0ab0eaaf4c03f43f..6a055af65f58b5bf5fa71d6fcc816647ba78f319 100644 (file)
--- a/bsd/kern/bsd_init.c
+++ b/bsd/kern/bsd_init.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
@@ -496,8 +496,8 @@ bsd_init()
         /* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
         if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
                 panic("bsd_init: cannot find root vnode");
-       filedesc0.fd_cdir = rootvnode;
         VREF(rootvnode);
+       filedesc0.fd_cdir = rootvnode;
         VOP_UNLOCK(rootvnode, 0, p);
         
  
diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c

index 7ede17b05fefee92e80d9e6d0c8eda8590117759..c592c4140223d59469fd273bfec5f06002ac4112 100644 (file)
--- a/bsd/kern/kern_descrip.c
+++ b/bsd/kern/kern_descrip.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
@@ -58,14 +58,6 @@
   * SUCH DAMAGE.
   *
   *     @(#)kern_descrip.c      8.8 (Berkeley) 2/14/95
- *
- *     History:
- *             CHW     8/5/98    Added F_SETSIZE command to truncate without
- *                                     zero filling space 
- *             CHW     7/6/98    Updated Preallocate command to take a structure
- *                               and return output.
- *             CHW     6/25/98   Fixed a bug in the lock call in fcntl 
- *                               Preallocate command
   */
  
  #include <sys/param.h>
@@ -103,7 +95,6 @@ getdtablesize(p, uap, retval)
         void *uap;
         register_t *retval;
  {
-
         *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
         return (0);
  }
@@ -115,7 +106,6 @@ ogetdtablesize(p, uap, retval)
         void *uap;
         register_t *retval;
  {
-
         *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, NOFILE);
         return (0);
  }
@@ -200,8 +190,7 @@ dup2(p, uap, retval)
                         _fdrelse(fdp, i);
                         goto closeit;
                 }
-       }
-       else {
+       } else {
                 struct file **fpp;
                 char flags;
  closeit:
@@ -214,7 +203,8 @@ closeit:
                 if (*(fpp = &fdp->fd_ofiles[new])) {
                         struct file *fp = *fpp;
  
-                       *fpp = NULL; (void) closef(fp, p);
+                       *fpp = NULL;
+                       (void) closef(fp, p);
                 }
         }
         return (finishdup(fdp, old, new, retval));
@@ -972,9 +962,9 @@ ffree(fp)
                 fp->f_cred = NOCRED;
                 crfree(cred);
         }
-#if 1 || DIAGNOSTIC
+
         fp->f_count = 0;
-#endif
+
         nfiles--;
         FREE_ZONE(fp, sizeof *fp, M_FILE);
  }
@@ -1062,8 +1052,7 @@ fdcopy(p)
                                 *fpp = NULL;
                                 *flags = 0;
                         }
-       }
-       else
+       } else
                 (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
  
         return (newfdp);
@@ -1076,9 +1065,10 @@ void
  fdfree(p)
         struct proc *p;
  {
-       register struct filedesc *fdp;
-       register struct file **fpp;
-       register int i;
+       struct filedesc *fdp;
+       struct file **fpp;
+       int i;
+       struct vnode *tvp;
  
         if ((fdp = p->p_fd) == NULL)
                 return;
@@ -1093,9 +1083,14 @@ fdfree(p)
                 FREE_ZONE(fdp->fd_ofiles,
                                 fdp->fd_nfiles * OFILESIZE, M_OFILETABL);
         }
-       vrele(fdp->fd_cdir);
-       if (fdp->fd_rdir)
-               vrele(fdp->fd_rdir);
+       tvp = fdp->fd_cdir;
+       fdp->fd_cdir = NULL;
+       vrele(tvp);
+       if (fdp->fd_rdir) {
+               tvp = fdp->fd_rdir;
+               fdp->fd_rdir = NULL;
+               vrele(tvp);
+       }
         FREE_ZONE(fdp, sizeof *fdp, M_FILEDESC);
  }
  
diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c

index 7dd27b97cbecc7d4c71917223286b8b601abda24..88348db3ff6a14cfe7b34cbdd60984a0cc954502 100644 (file)
--- a/bsd/kern/kern_exec.c
+++ b/bsd/kern/kern_exec.c
@@ -530,9 +530,10 @@ again:
                  * root set it.
                  */
                 if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT)) {
-                       vrele(p->p_tracep);
+                       struct vnode *tvp = p->p_tracep;
                         p->p_tracep = NULL;
                         p->p_traceflag = 0;
+                       vrele(tvp);
                 }
  #endif
                 if (origvattr.va_mode & VSUID)
diff --git a/bsd/kern/kern_exit.c b/bsd/kern/kern_exit.c

index 84c51a09c7dbb7db0435ce8c41eda14f8ebdf9a5..f588872ac6fbf1ab3319dda102d2650b419e526b 100644 (file)
--- a/bsd/kern/kern_exit.c
+++ b/bsd/kern/kern_exit.c
@@ -267,6 +267,8 @@ proc_exit(struct proc *p)
                 register struct session *sp = p->p_session;
  
                 if (sp->s_ttyvp) {
+                       struct vnode *ttyvp;
+
                         /*
                          * Controlling process.
                          * Signal foreground pgrp,
@@ -284,9 +286,10 @@ proc_exit(struct proc *p)
                                 if (sp->s_ttyvp)
                                         VOP_REVOKE(sp->s_ttyvp, REVOKEALL);
                         }
-                       if (sp->s_ttyvp)
-                               vrele(sp->s_ttyvp);
+                       ttyvp = sp->s_ttyvp;
                         sp->s_ttyvp = NULL;
+                       if (ttyvp)
+                               vrele(ttyvp);
                         /*
                          * s_ttyp is not zero'd; we use this to indicate
                          * that the session once had a controlling terminal.
@@ -303,8 +306,11 @@ proc_exit(struct proc *p)
          * release trace file
          */
         p->p_traceflag = 0;     /* don't trace the vrele() */
-       if (p->p_tracep)
-               vrele(p->p_tracep);
+       if (p->p_tracep) {
+               struct vnode *tvp = p->p_tracep;
+               p->p_tracep = NULL;
+               vrele(tvp);
+       }
  #endif
  
  
@@ -520,6 +526,7 @@ wait1(q, uap, retval, compat)
         register int nfound;
         register struct proc *p, *t;
         int status, error;
+       struct vnode *tvp;
  
  retry:
         if (uap->pid == 0)
@@ -610,8 +617,10 @@ loop:
                         /*
                          * Release reference to text vnode
                          */
-                       if (p->p_textvp)
-                               vrele(p->p_textvp);
+                       tvp = p->p_textvp;
+                       p->p_textvp = NULL;
+                       if (tvp)
+                               vrele(tvp);
  
                         /*
                          * Finally finished with old proc entry.
@@ -824,6 +833,8 @@ vproc_exit(struct proc *p)
                 register struct session *sp = p->p_session;
  
                 if (sp->s_ttyvp) {
+                       struct vnode *ttyvp;
+
                         /*
                          * Controlling process.
                          * Signal foreground pgrp,
@@ -841,9 +852,10 @@ vproc_exit(struct proc *p)
                                 if (sp->s_ttyvp)
                                         VOP_REVOKE(sp->s_ttyvp, REVOKEALL);
                         }
-                       if (sp->s_ttyvp)
-                               vrele(sp->s_ttyvp);
+                       ttyvp = sp->s_ttyvp;
                         sp->s_ttyvp = NULL;
+                       if (ttyvp)
+                               vrele(ttyvp);
                         /*
                          * s_ttyp is not zero'd; we use this to indicate
                          * that the session once had a controlling terminal.
@@ -860,8 +872,11 @@ vproc_exit(struct proc *p)
          * release trace file
          */
         p->p_traceflag = 0;     /* don't trace the vrele() */
-       if (p->p_tracep)
-               vrele(p->p_tracep);
+       if (p->p_tracep) {
+               struct vnode *tvp = p->p_tracep;
+               p->p_tracep = NULL;
+               vrele(tvp);
+       }
  #endif
  
         q = p->p_children.lh_first;
diff --git a/bsd/kern/ubc_subr.c b/bsd/kern/ubc_subr.c

index 32a5924cb79e2b8e58c87c98d5567b5aee32f0e9..c82fd94a2b5fe84f4bc16b7a0ddffea6e2a17d93 100644 (file)
--- a/bsd/kern/ubc_subr.c
+++ b/bsd/kern/ubc_subr.c
@@ -322,13 +322,10 @@ ubc_setsize(struct vnode *vp, off_t nsize)
  
  /*
   * Get the size of the file
- * For local file systems the size is locally cached. For NFS
- * there might be a network transaction for this.
   */
  off_t
  ubc_getsize(struct vnode *vp)
  {
-       /* XXX deal with NFS */
         return (vp->v_ubcinfo->ui_size);
  }
  
diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c

index 7f7250d3cb8c9d9ce171dd2231546ce05fbb99c7..513cc8e5370ebc2f4f4e3efb56d243679e25e7e4 100644 (file)
--- a/bsd/kern/uipc_mbuf.c
+++ b/bsd/kern/uipc_mbuf.c
@@ -121,6 +121,13 @@ mbinit()
         if (nclpp < 1) nclpp = 1;
         MBUF_LOCKINIT();
  //     NETISR_LOCKINIT();
+
+        mbstat.m_msize = MSIZE;
+        mbstat.m_mclbytes = MCLBYTES;
+        mbstat.m_minclsize = MINCLSIZE;
+        mbstat.m_mlen = MLEN;
+        mbstat.m_mhlen = MHLEN;
+
         if (nmbclusters == 0)
                 nmbclusters = NMBCLUSTERS;
         MALLOC(mclrefcnt, short *, nmbclusters * sizeof (short),
@@ -330,6 +337,14 @@ m_retryhdr(canwait, type)
         if (m = m_retry(canwait, type)) {
                 m->m_flags |= M_PKTHDR;
                 m->m_data = m->m_pktdat;
+                m->m_pkthdr.rcvif = NULL;
+                m->m_pkthdr.len = 0;
+                m->m_pkthdr.header = NULL;
+                m->m_pkthdr.csum_flags = 0;
+                m->m_pkthdr.csum_data = 0;
+                m->m_pkthdr.aux = (struct mbuf *)NULL;
+                m->m_pkthdr.reserved1 = NULL;
+                m->m_pkthdr.reserved2 = NULL;
         }
         return (m);
  }
@@ -456,13 +471,18 @@ m_getpacket(void)
                  mclfree = ((union mcluster *)(m->m_ext.ext_buf))->mcl_next;
  
                  m->m_next = m->m_nextpkt = 0;
-               m->m_ext.ext_free = 0;
                  m->m_type = MT_DATA;
                  m->m_data = m->m_ext.ext_buf;
                  m->m_flags = M_PKTHDR | M_EXT;
-                m->m_pkthdr.aux = (struct mbuf *)NULL;
+               m->m_pkthdr.len = 0;
+               m->m_pkthdr.rcvif = NULL;
+                m->m_pkthdr.header = NULL;
                  m->m_pkthdr.csum_data  = 0;
                  m->m_pkthdr.csum_flags = 0;
+                m->m_pkthdr.aux = (struct mbuf *)NULL;
+                m->m_pkthdr.reserved1 = 0;  
+                m->m_pkthdr.reserved2 = 0;  
+               m->m_ext.ext_free = 0;
                  m->m_ext.ext_size = MCLBYTES;
                  m->m_ext.ext_refs.forward = m->m_ext.ext_refs.backward =
                       &m->m_ext.ext_refs;
@@ -485,6 +505,142 @@ m_getpacket(void)
         return (m);
  }
  
+
+struct mbuf *
+m_getpackets(int num_needed, int num_with_pkthdrs, int how)
+{
+       struct mbuf *m;
+       struct mbuf **np, *top;
+
+       top = NULL;
+       np = &top;
+
+       m_clalloc(num_needed, how);     /* takes the MBUF_LOCK, but doesn't release it... */
+
+       while (num_needed--) {
+           if (mfree && mclfree) {     /* mbuf + cluster are available */
+               m = mfree;
+                MCHECK(m);
+                mfree = m->m_next;
+                ++mclrefcnt[mtocl(m)];
+                mbstat.m_mtypes[MT_FREE]--;
+                mbstat.m_mtypes[MT_DATA]++;
+                m->m_ext.ext_buf = (caddr_t)mclfree; /* get the cluster */
+                ++mclrefcnt[mtocl(m->m_ext.ext_buf)];
+                mbstat.m_clfree--;
+                mclfree = ((union mcluster *)(m->m_ext.ext_buf))->mcl_next;
+
+                m->m_next = m->m_nextpkt = 0;
+                m->m_type = MT_DATA;
+                m->m_data = m->m_ext.ext_buf;
+               m->m_ext.ext_free = 0;
+                m->m_ext.ext_size = MCLBYTES;
+                m->m_ext.ext_refs.forward = m->m_ext.ext_refs.backward = &m->m_ext.ext_refs;
+
+               if (num_with_pkthdrs == 0)
+                   m->m_flags = M_EXT;
+               else {
+                   m->m_flags = M_PKTHDR | M_EXT;
+                   m->m_pkthdr.len = 0;
+                   m->m_pkthdr.rcvif = NULL;
+                   m->m_pkthdr.header = NULL;
+                   m->m_pkthdr.csum_flags = 0;
+                   m->m_pkthdr.csum_data = 0;
+                   m->m_pkthdr.aux = (struct mbuf *)NULL;
+                   m->m_pkthdr.reserved1 = NULL;
+                   m->m_pkthdr.reserved2 = NULL;
+
+                   num_with_pkthdrs--;
+               }
+
+           } else {
+
+               MBUF_UNLOCK();
+
+               if (num_with_pkthdrs == 0) {
+                   MGET(m, how, MT_DATA );
+               } else {
+                   MGETHDR(m, how, MT_DATA);
+                   
+                   if (m)
+                           m->m_pkthdr.len = 0;
+                   num_with_pkthdrs--;
+               }
+                if (m == 0)
+                   return(top);
+
+                MCLGET(m, how);
+                if ((m->m_flags & M_EXT) == 0) {
+                   m_free(m);
+                   return(top);
+                }
+               MBUF_LOCK();
+           }
+           *np = m; 
+
+           if (num_with_pkthdrs)
+               np = &m->m_nextpkt;
+           else
+               np = &m->m_next;
+       }
+       MBUF_UNLOCK();
+
+       return (top);
+}
+
+
+struct mbuf *
+m_getpackethdrs(int num_needed, int how)
+{
+       struct mbuf *m;
+       struct mbuf **np, *top;
+
+       top = NULL;
+       np = &top;
+
+       MBUF_LOCK();
+
+       while (num_needed--) {
+           if (m = mfree) {    /* mbufs are available */
+                MCHECK(m);
+                mfree = m->m_next;
+                ++mclrefcnt[mtocl(m)];
+                mbstat.m_mtypes[MT_FREE]--;
+                mbstat.m_mtypes[MT_DATA]++;
+
+                m->m_next = m->m_nextpkt = 0;
+                m->m_type = MT_DATA;
+               m->m_flags = M_PKTHDR;
+                m->m_data = m->m_pktdat;
+               m->m_pkthdr.len = 0;
+               m->m_pkthdr.rcvif = NULL;
+               m->m_pkthdr.header = NULL;
+               m->m_pkthdr.csum_flags = 0;
+               m->m_pkthdr.csum_data = 0;
+               m->m_pkthdr.aux = (struct mbuf *)NULL;
+               m->m_pkthdr.reserved1 = NULL;
+               m->m_pkthdr.reserved2 = NULL;
+
+           } else {
+
+               MBUF_UNLOCK();
+
+               m = m_retryhdr(how, MT_DATA);
+
+                if (m == 0)
+                   return(top);
+
+               MBUF_LOCK();
+           }
+           *np = m; 
+           np = &m->m_nextpkt;
+       }
+       MBUF_UNLOCK();
+
+       return (top);
+}
+
+
  /* free and mbuf list (m_nextpkt) while following m_next under one lock.
   * returns the count for mbufs packets freed. Used by the drivers.
   */
@@ -493,22 +649,25 @@ m_freem_list(m)
         struct mbuf *m;
  {
         struct mbuf *nextpkt;
-       int i, s, count=0;
+       int i, count=0;
  
-//     s = splimp();
         MBUF_LOCK();
+
         while (m) {
                 if (m) 
-                   nextpkt = m->m_nextpkt; /* chain of linked mbufs from driver */
+                       nextpkt = m->m_nextpkt; /* chain of linked mbufs from driver */
                 else 
-                   nextpkt = 0;
+                       nextpkt = 0;
                 count++;
+
                 while (m) { /* free the mbuf chain (like mfreem) */
                         struct mbuf *n = m->m_next;
+
                         if (n && n->m_nextpkt)
                                 panic("m_freem_list: m_nextpkt of m_next != NULL");
                         if (m->m_type == MT_FREE)
                                 panic("freeing free mbuf");
+
                         if (m->m_flags & M_EXT) {
                                 if (MCLHASREFERENCE(m)) {
                                         remque((queue_t)&m->m_ext.ext_refs);
@@ -526,8 +685,8 @@ m_freem_list(m)
                         }
                         mbstat.m_mtypes[m->m_type]--;
                         (void) MCLUNREF(m);
+                       mbstat.m_mtypes[MT_FREE]++;
                         m->m_type = MT_FREE;
-                       mbstat.m_mtypes[m->m_type]++;
                         m->m_flags = 0;
                         m->m_len = 0;
                         m->m_next = mfree;
@@ -536,10 +695,14 @@ m_freem_list(m)
                 }
                 m = nextpkt; /* bump m with saved nextpkt if any */
         }
-       i = m_want;
-       m_want = 0;
+       if (i = m_want)
+               m_want = 0;
+
         MBUF_UNLOCK();
-       if (i) wakeup((caddr_t)&mfree);
+
+       if (i)
+               wakeup((caddr_t)&mfree);
+
         return (count);
  }
  
@@ -638,24 +801,41 @@ m_copym(m, off0, len, wait)
                 panic("m_copym");
         if (off == 0 && m->m_flags & M_PKTHDR)
                 copyhdr = 1;
-       while (off > 0) {
+
+       while (off >= m->m_len) {
                 if (m == 0)
                         panic("m_copym");
-               if (off < m->m_len)
-                       break;
                 off -= m->m_len;
                 m = m->m_next;
         }
         np = &top;
         top = 0;
+
+       MBUF_LOCK();
+
         while (len > 0) {
                 if (m == 0) {
                         if (len != M_COPYALL)
                                 panic("m_copym");
                         break;
                 }
-               MGET(n, wait, m->m_type);
+               if (n = mfree) {
+                       MCHECK(n);
+                       ++mclrefcnt[mtocl(n)];
+                       mbstat.m_mtypes[MT_FREE]--;
+                       mbstat.m_mtypes[m->m_type]++;
+                       mfree = n->m_next;
+                       n->m_next = n->m_nextpkt = 0;
+                       n->m_type = m->m_type;
+                       n->m_data = n->m_dat;
+                       n->m_flags = 0;
+               } else {
+                       MBUF_UNLOCK();
+                       n = m_retry(wait, m->m_type);
+                       MBUF_LOCK();
+               }
                 *np = n;
+
                 if (n == 0)
                         goto nospace;
                 if (copyhdr) {
@@ -679,30 +859,158 @@ m_copym(m, off0, len, wait)
                     n->m_len = MHLEN;
                 }
                 if (m->m_flags & M_EXT) {
-                       MBUF_LOCK();
                         n->m_ext = m->m_ext;
                         insque((queue_t)&n->m_ext.ext_refs, (queue_t)&m->m_ext.ext_refs);
-                       MBUF_UNLOCK();
                         n->m_data = m->m_data + off;
                         n->m_flags |= M_EXT;
-               } else
+               } else {
                         bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
                             (unsigned)n->m_len);
+               }
                 if (len != M_COPYALL)
                         len -= n->m_len;
                 off = 0;
                 m = m->m_next;
                 np = &n->m_next;
         }
+       MBUF_UNLOCK();
+
         if (top == 0)
                 MCFail++;
+
         return (top);
  nospace:
+       MBUF_UNLOCK();
+
         m_freem(top);
         MCFail++;
         return (0);
  }
  
+
+
+struct mbuf *
+m_copym_with_hdrs(m, off0, len, wait, m_last, m_off)
+       register struct mbuf *m;
+       int off0, wait;
+       register int len;
+       struct mbuf **m_last;
+       int          *m_off;
+{
+       register struct mbuf *n, **np;
+       register int off = off0;
+       struct mbuf *top = 0;
+       int copyhdr = 0;
+       int type;
+
+       if (off == 0 && m->m_flags & M_PKTHDR)
+               copyhdr = 1;
+
+       if (*m_last) {
+               m   = *m_last;
+               off = *m_off;
+       } else {
+               while (off >= m->m_len) {
+                       off -= m->m_len;
+                       m = m->m_next;
+               }
+       }
+       MBUF_LOCK();
+
+       while (len > 0) {
+               if (top == 0)
+                       type = MT_HEADER;
+               else {
+                       if (m == 0)
+                               panic("m_gethdr_and_copym");
+                       type = m->m_type;
+               }
+               if (n = mfree) {
+                       MCHECK(n);
+                       ++mclrefcnt[mtocl(n)];
+                       mbstat.m_mtypes[MT_FREE]--;
+                       mbstat.m_mtypes[type]++;
+                       mfree = n->m_next;
+                       n->m_next = n->m_nextpkt = 0;
+                       n->m_type = type;
+
+                       if (top) {
+                               n->m_data = n->m_dat;
+                               n->m_flags = 0;
+                       } else {
+                               n->m_data = n->m_pktdat;
+                               n->m_flags = M_PKTHDR;
+                               n->m_pkthdr.len = 0;
+                               n->m_pkthdr.rcvif = NULL;
+                               n->m_pkthdr.header = NULL;
+                               n->m_pkthdr.csum_flags = 0;
+                               n->m_pkthdr.csum_data = 0;
+                               n->m_pkthdr.aux = (struct mbuf *)NULL;
+                               n->m_pkthdr.reserved1 = NULL;
+                               n->m_pkthdr.reserved2 = NULL;
+                       }
+               } else {
+                       MBUF_UNLOCK();
+                       if (top)
+                               n = m_retry(wait, type);
+                       else
+                               n = m_retryhdr(wait, type);
+                       MBUF_LOCK();
+               }
+               if (n == 0)
+                       goto nospace;
+               if (top == 0) {
+                       top = n;
+                       np = &top->m_next;
+                       continue;
+               } else
+                       *np = n;
+
+               if (copyhdr) {
+                       M_COPY_PKTHDR(n, m);
+                       n->m_pkthdr.len = len;
+                       copyhdr = 0;
+               }
+               n->m_len = min(len, (m->m_len - off));
+
+               if (m->m_flags & M_EXT) {
+                       n->m_ext = m->m_ext;
+                       insque((queue_t)&n->m_ext.ext_refs, (queue_t)&m->m_ext.ext_refs);
+                       n->m_data = m->m_data + off;
+                       n->m_flags |= M_EXT;
+               } else {
+                       bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
+                           (unsigned)n->m_len);
+               }
+               len -= n->m_len;
+               
+               if (len == 0) {
+                       if ((off + n->m_len) == m->m_len) {
+                              *m_last = m->m_next;
+                              *m_off  = 0;
+                       } else {
+                              *m_last = m;
+                              *m_off  = off + n->m_len;
+                       }
+                       break;
+               }
+               off = 0;
+               m = m->m_next;
+               np = &n->m_next;
+       }
+       MBUF_UNLOCK();
+
+       return (top);
+nospace:
+       MBUF_UNLOCK();
+
+       if (top)
+               m_freem(top);
+       MCFail++;
+       return (0);
+}
+
+
  /*
   * Copy data from an mbuf chain starting "off" bytes from the beginning,
   * continuing for "len" bytes, into the indicated buffer.
@@ -1172,7 +1480,11 @@ m_dup(register struct mbuf *m, int how)
                                  n->m_pkthdr.len = m->m_pkthdr.len;
                                  n->m_pkthdr.rcvif = m->m_pkthdr.rcvif;
                                  n->m_pkthdr.header = NULL;
+                                n->m_pkthdr.csum_flags = 0;
+                                n->m_pkthdr.csum_data = 0;
                                  n->m_pkthdr.aux = NULL;
+                                n->m_pkthdr.reserved1 = 0;
+                                n->m_pkthdr.reserved2 = 0;
                                  bcopy(m->m_data, n->m_data, m->m_pkthdr.len);
                                 return(n);
                         }
diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c

index f8fbf313db763867d15f8d2beab64d5dd1416eb6..c5e6b4891b6f7bba4704a1991f97661d66053986 100644 (file)
--- a/bsd/kern/uipc_socket.c
+++ b/bsd/kern/uipc_socket.c
@@ -117,6 +117,7 @@ SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
            0, "");
  
  /* Should we get a maximum also ??? */
+static int sosendmaxchain = 65536;
  static int sosendminchain = 16384;
  SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
             0, "");
@@ -818,7 +819,7 @@ sosend(so, addr, uio, top, control, flags)
  
  {
         struct mbuf **mp;
-       register struct mbuf *m;
+       register struct mbuf *m, *freelist = NULL;
         register long space, len, resid;
         int clen = 0, error, s, dontroute, mlen, sendflags;
         int atomic = sosendallatonce(so) || top;
@@ -911,6 +912,7 @@ restart:
                 splx(s);
                 mp = &top;
                 space -= clen;
+
                 do {
                     if (uio == NULL) {
                         /*
@@ -920,41 +922,69 @@ restart:
                         if (flags & MSG_EOR)
                                 top->m_flags |= M_EOR;
                     } else {
-                       boolean_t       funnel_state = TRUE;
-                       int             chainmbufs = (sosendminchain > 0 && resid >= sosendminchain);
-            
-                       if (chainmbufs)
-                           funnel_state = thread_funnel_set(network_flock, FALSE);
+                       boolean_t       dropped_funnel = FALSE;
+                       int             chainlength;
+                       int             bytes_to_copy;
+
+                       bytes_to_copy = min(resid, space);
+
+                       if (sosendminchain > 0) {
+                           if (bytes_to_copy >= sosendminchain) {
+                               dropped_funnel = TRUE;
+                               (void)thread_funnel_set(network_flock, FALSE);
+                           }
+                           chainlength = 0;
+                       } else
+                           chainlength = sosendmaxchain;
+
                         do {
-                       KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_NONE, -1, 0, 0, 0, 0);
-                       if (top == 0) {
+
+                       if (bytes_to_copy >= MINCLSIZE) {
+                         if ((m = freelist) == NULL) {
+                               int num_needed;
+                               int hdrs_needed = 0;
+                               
+                               if (top == 0)
+                                   hdrs_needed = 1;
+                               num_needed = bytes_to_copy / MCLBYTES;
+
+                               if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
+                                   num_needed++;
+
+                               if ((freelist = m_getpackets(num_needed, hdrs_needed, M_WAIT)) == NULL)
+                                   goto getpackets_failed;
+                               m = freelist;
+                           }
+                           freelist = m->m_next;
+                           m->m_next = NULL;
+
+                           mlen = MCLBYTES;
+                           len = min(mlen, bytes_to_copy);
+                       } else {
+getpackets_failed:
+                           if (top == 0) {
                                 MGETHDR(m, M_WAIT, MT_DATA);
                                 mlen = MHLEN;
                                 m->m_pkthdr.len = 0;
                                 m->m_pkthdr.rcvif = (struct ifnet *)0;
-                       } else {
+                           } else {
                                 MGET(m, M_WAIT, MT_DATA);
                                 mlen = MLEN;
+                           }
+                           len = min(mlen, bytes_to_copy);
+                           /*
+                            * For datagram protocols, leave room
+                            * for protocol headers in first mbuf.
+                            */
+                           if (atomic && top == 0 && len < mlen)
+                               MH_ALIGN(m, len);
                         }
-                       if (resid >= MINCLSIZE) {
-                               MCLGET(m, M_WAIT);
-                               if ((m->m_flags & M_EXT) == 0)
-                                       goto nopages;
-                               mlen = MCLBYTES;
-                               len = min(min(mlen, resid), space);
-                       } else {
-nopages:
-                               len = min(min(mlen, resid), space);
-                               /*
-                                * For datagram protocols, leave room
-                                * for protocol headers in first mbuf.
-                                */
-                               if (atomic && top == 0 && len < mlen)
-                                       MH_ALIGN(m, len);
-                       }
-                       KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_NONE, -1, 0, 0, 0, 0);
+                       chainlength += len;
+                       
                         space -= len;
+
                         error = uiomove(mtod(m, caddr_t), (int)len, uio);
+
                         resid = uio->uio_resid;
                         
                         m->m_len = len;
@@ -968,9 +998,12 @@ nopages:
                                         top->m_flags |= M_EOR;
                                 break;
                         }
-                   } while (space > 0 && (chainmbufs || atomic || resid < MINCLSIZE));
-                   if (chainmbufs)
-                       funnel_state = thread_funnel_set(network_flock, TRUE);
+                       bytes_to_copy = min(resid, space);
+
+                   } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
+
+                   if (dropped_funnel == TRUE)
+                       (void)thread_funnel_set(network_flock, TRUE);
                     if (error)
                         goto release;
                     }
@@ -1024,6 +1057,9 @@ nopages:
                                 {       splx(s);
                                         if (error == EJUSTRETURN)
                                         {       sbunlock(&so->so_snd);
+                                       
+                                               if (freelist)
+                                                       m_freem_list(freelist);     
                                                 return(0);
                                         }
                                         goto release;
@@ -1056,6 +1092,8 @@ out:
                 m_freem(top);
         if (control)
                 m_freem(control);
+       if (freelist)
+               m_freem_list(freelist);     
  
         KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
                      so,
@@ -1093,6 +1131,7 @@ soreceive(so, psa, uio, mp0, controlp, flagsp)
         int *flagsp;
  {
         register struct mbuf *m, **mp;
+       register struct mbuf *free_list, *ml;
         register int flags, len, error, s, offset;
         struct protosw *pr = so->so_proto;
         struct mbuf *nextrecord;
@@ -1295,6 +1334,10 @@ dontblock:
         }
         moff = 0;
         offset = 0;
+
+       free_list = m;
+       ml = (struct mbuf *)0;
+
         while (m && uio->uio_resid > 0 && error == 0) {
                 if (m->m_type == MT_OOBDATA) {
                         if (type != MT_OOBDATA)
@@ -1357,8 +1400,9 @@ dontblock:
                                         so->so_rcv.sb_mb = m = m->m_next;
                                         *mp = (struct mbuf *)0;
                                 } else {
-                                       MFREE(m, so->so_rcv.sb_mb);
-                                       m = so->so_rcv.sb_mb;
+                                       m->m_nextpkt = 0;
+                                       ml = m;
+                                       m = m->m_next;
                                 }
                                 if (m)
                                         m->m_nextpkt = nextrecord;
@@ -1401,6 +1445,12 @@ dontblock:
                     !sosendallatonce(so) && !nextrecord) {
                         if (so->so_error || so->so_state & SS_CANTRCVMORE)
                                 break;
+
+                       if (ml) {
+                               so->so_rcv.sb_mb = ml->m_next;
+                               ml->m_next = (struct mbuf *)0;
+                               m_freem_list(free_list);
+                       }
                         error = sbwait(&so->so_rcv);
                         if (error) {
                                 sbunlock(&so->so_rcv);
@@ -1409,10 +1459,18 @@ dontblock:
                                 return (0);
                         }
                         m = so->so_rcv.sb_mb;
-                       if (m)
+                       if (m) {
                                 nextrecord = m->m_nextpkt;
+                               free_list = m;
+                       }
+                       ml = (struct mbuf *)0;
                 }
         }
+       if (ml) {
+               so->so_rcv.sb_mb = ml->m_next;
+               ml->m_next = (struct mbuf *)0;
+               m_freem_list(free_list);
+       }
  
         if (m && pr->pr_flags & PR_ATOMIC) {
                 if (so->so_options & SO_DONTTRUNC)
diff --git a/bsd/kern/uipc_socket2.c b/bsd/kern/uipc_socket2.c

index 4026fe6773f4c75e2a16b27bb7d7127c25bb1bb2..33d2ede3b7c4ff949b1b14a646d72b0ace16a56f 100644 (file)
--- a/bsd/kern/uipc_socket2.c
+++ b/bsd/kern/uipc_socket2.c
@@ -71,6 +71,12 @@
  #include <sys/sysctl.h>
  #include <sys/ev.h>
  
+#include <sys/kdebug.h>
+
+#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
+#define DBG_FNC_SBAPPEND       NETDBG_CODE(DBG_NETSOCK, 5)
+
+
  /*
   * Primitive routines for operating on sockets and socket buffers
   */
@@ -281,6 +287,7 @@ sonewconn(head, connstatus)
         so->so_pgid  = head->so_pgid;
         so->so_uid = head->so_uid;
         so->so_rcv.sb_flags |= SB_RECV; /* XXX */
+
         (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
  
         if (so->so_proto->pr_sfilter.tqh_first)
@@ -572,6 +579,9 @@ sbappend(sb, m)
  {      register struct kextcb *kp;
         register struct mbuf *n;
  
+
+       KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
+
         if (m == 0)
                 return;
         kp = sotokextcb(sbtoso(sb));
@@ -594,6 +604,8 @@ sbappend(sb, m)
                 } while (n->m_next && (n = n->m_next));
         }
         sbcompress(sb, m, n);
+
+       KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
  }
  
  #ifdef SOCKBUF_DEBUG
@@ -920,10 +932,12 @@ sbdrop(sb, len)
         register struct sockbuf *sb;
         register int len;
  {
-       register struct mbuf *m, *mn;
-       struct mbuf *next;
+       register struct mbuf *m, *free_list, *ml;
+       struct mbuf *next, *last;
         register struct kextcb *kp;
  
+       KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
+
         kp = sotokextcb(sbtoso(sb));
         while (kp)
         {       if (kp->e_sout && kp->e_sout->su_sbdrop)
@@ -932,13 +946,15 @@ sbdrop(sb, len)
                 }
                 kp = kp->e_next;
         }
-
         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+       free_list = last = m;
+       ml = (struct mbuf *)0;
+
         while (len > 0) {
                 if (m == 0) {
                         if (next == 0)
                                 panic("sbdrop");
-                       m = next;
+                       m = last = next;
                         next = m->m_nextpkt;
                         continue;
                 }
@@ -950,20 +966,30 @@ sbdrop(sb, len)
                 }
                 len -= m->m_len;
                 sbfree(sb, m);
-               MFREE(m, mn);
-               m = mn;
+
+               ml = m;
+               m = m->m_next;
         }
         while (m && m->m_len == 0) {
                 sbfree(sb, m);
-               MFREE(m, mn);
-               m = mn;
+
+               ml = m;
+               m = m->m_next;
+       }
+       if (ml) {
+               ml->m_next = (struct mbuf *)0;
+               last->m_nextpkt = (struct mbuf *)0;
+               m_freem_list(free_list);
         }
         if (m) {
                 sb->sb_mb = m;
                 m->m_nextpkt = next;
         } else
                 sb->sb_mb = next;
+
         postevent(0, sb, EV_RWBYTES);
+
+       KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
  }
  
  /*
diff --git a/bsd/kern/uipc_usrreq.c b/bsd/kern/uipc_usrreq.c

index 35f27aa4fe4f47c49c9a31d58ecb4f75dbeb8f2a..459a67a057ca89ef8c31e564aae71e5ec8420e8a 100644 (file)
--- a/bsd/kern/uipc_usrreq.c
+++ b/bsd/kern/uipc_usrreq.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
@@ -524,11 +524,12 @@ unp_detach(unp)
         unp->unp_gencnt = ++unp_gencnt;
         --unp_count;
         if (unp->unp_vnode) {
+               struct vnode *tvp = unp->unp_vnode;
                 unp->unp_vnode->v_socket = 0;
+               unp->unp_vnode = 0;
                 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-               vrele(unp->unp_vnode);
+               vrele(tvp);
                 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
-               unp->unp_vnode = 0;
         }
         if (unp->unp_conn)
                 unp_disconnect(unp);
diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c

index 86dba15885b5c776c0fb31aeb8bd5e12260d6c37..81b836085dc1b5cb8bdc9531a07acb3507543514 100644 (file)
--- a/bsd/miscfs/specfs/spec_vnops.c
+++ b/bsd/miscfs/specfs/spec_vnops.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
@@ -663,8 +663,8 @@ spec_close(ap)
                  */
                 if (vcount(vp) == 2 && ap->a_p &&
                     vp == ap->a_p->p_session->s_ttyvp) {
-                       vrele(vp);
                         ap->a_p->p_session->s_ttyvp = NULL;
+                       vrele(vp);
                 }
                 /*
                  * If the vnode is locked, then we are in the midst
diff --git a/bsd/miscfs/synthfs/synthfs_vfsops.c b/bsd/miscfs/synthfs/synthfs_vfsops.c

index 35c00e8dcb1c144ff95190211c2a9618160f92eb..121b99dcb8d593145e932dc05405c7c598f08d3a 100644 (file)
--- a/bsd/miscfs/synthfs/synthfs_vfsops.c
+++ b/bsd/miscfs/synthfs/synthfs_vfsops.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
@@ -34,7 +34,6 @@
  #include <sys/proc.h>
  #include <sys/kernel.h>
  #include <mach/machine/vm_types.h>
-//#include <mach/mach_init.h>
  #include <sys/vnode.h>
  #include <sys/socket.h>
  #include <sys/mount.h>
diff --git a/bsd/netat/adsp_stream.c b/bsd/netat/adsp_stream.c

index 8dd2ec37434efa34f6660506bada4eda145a01a1..c3ea0ae2f28045e95e41bfe4b6028d9b1f197597 100644 (file)
--- a/bsd/netat/adsp_stream.c
+++ b/bsd/netat/adsp_stream.c
@@ -339,7 +339,12 @@ int adsp_wput(gref, mp)
         int s;
         gbuf_t *xm;
         ioc_t *iocbp;
-       CCBPtr sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info));
+       CCBPtr sp;
+       
+       if (gref->info)
+               sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info));
+       else
+               sp = 0;
  
         if (gbuf_type(mp) == MSG_IOCTL) {
                 iocbp = (ioc_t *)gbuf_rptr(mp);
diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c

index ff9b2da853fbddad59a28bd070a4f1fdad6c8df1..fa63025aa9d0e137ec17009f3428fb514ae40feb 100644 (file)
--- a/bsd/netinet/ip_output.c
+++ b/bsd/netinet/ip_output.c
@@ -351,8 +351,10 @@ ip_output(m0, opt, ro, flags, imo)
                  */
                 if (imo != NULL) {
                         ip->ip_ttl = imo->imo_multicast_ttl;
-                       if (imo->imo_multicast_ifp != NULL)
+                       if (imo->imo_multicast_ifp != NULL) {
                                 ifp = imo->imo_multicast_ifp;
+                               dl_tag = ifp->if_data.default_proto;
+                       }
                         if (imo->imo_multicast_vif != -1)
                                 ip->ip_src.s_addr =
                                     ip_mcast_src(imo->imo_multicast_vif);
diff --git a/bsd/netinet/tcp_input.c b/bsd/netinet/tcp_input.c

index 599c6e0d958ffed549a549877b37260b5062889a..92f3697c21ca1845db649212b1da6460c8fcdf71 100644 (file)
--- a/bsd/netinet/tcp_input.c
+++ b/bsd/netinet/tcp_input.c
@@ -721,6 +721,12 @@ findpcb:
                 goto dropwithreset;
         if (tp->t_state == TCPS_CLOSED)
                 goto drop;
+        /* 
+         * Bogus state when listening port owned by SharedIP with loopback as the 
+         * only configured interface: BlueBox does not filters loopback
+         */ 
+       if (tp->t_state == TCP_NSTATES)
+               goto drop;
  
         /* Unscale the window into a 32-bit value. */
         if ((thflags & TH_SYN) == 0)
diff --git a/bsd/netinet/tcp_output.c b/bsd/netinet/tcp_output.c

index c648029c645b6c1f936d7070faabb8a69f114ec3..56043b0597ff688a4cd57d48dfdd3fc2d098e54e 100644 (file)
--- a/bsd/netinet/tcp_output.c
+++ b/bsd/netinet/tcp_output.c
@@ -93,6 +93,7 @@
  #endif
  #include <sys/kdebug.h>
  
+
  #define DBG_LAYER_BEG          NETDBG_CODE(DBG_NETTCP, 1)
  #define DBG_LAYER_END          NETDBG_CODE(DBG_NETTCP, 3)
  #define DBG_FNC_TCP_OUTPUT     NETDBG_CODE(DBG_NETTCP, (4 << 8) | 1)
@@ -128,14 +129,19 @@ tcp_output(tp)
  #if INET6
         int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV4) == 0;
  #endif
+       int    last_off;
+       int    m_off;
+       struct mbuf *m_last = 0;
+       struct mbuf *m_head = 0;
+
  
         KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
+
         KERNEL_DEBUG(DBG_LAYER_BEG,
                      ((tp->t_template->tt_dport << 16) | tp->t_template->tt_sport),
                      (((tp->t_template->tt_src.s_addr & 0xffff) << 16) |
                       (tp->t_template->tt_dst.s_addr & 0xffff)),
                      0,0,0);
-
         /*
          * Determine length of data that should be transmitted,
          * and flags that will be used.
@@ -563,33 +569,57 @@ send:
                 m->m_len += hdrlen;
                 m->m_data -= hdrlen;
  #else
-               MGETHDR(m, M_DONTWAIT, MT_HEADER);
-               if (m == NULL) {
-                       error = ENOBUFS;
-                       goto out;
-               }
+               m = NULL;
  #if INET6
                 if (MHLEN < hdrlen + max_linkhdr) {
+                       MGETHDR(m, M_DONTWAIT, MT_HEADER);
+                       if (m == NULL) {
+                               error = ENOBUFS;
+                               goto out;
+                       }
                         MCLGET(m, M_DONTWAIT);
                         if ((m->m_flags & M_EXT) == 0) {
                                 m_freem(m);
                                 error = ENOBUFS;
                                 goto out;
                         }
+                       m->m_data += max_linkhdr;
+                       m->m_len = hdrlen;
                 }
  #endif
-               m->m_data += max_linkhdr;
-               m->m_len = hdrlen;
                 if (len <= MHLEN - hdrlen - max_linkhdr) {
+                       if (m == NULL) {
+                               MGETHDR(m, M_DONTWAIT, MT_HEADER);
+                               if (m == NULL) {
+                                       error = ENOBUFS;
+                                       goto out;
+                               }
+                               m->m_data += max_linkhdr;
+                               m->m_len = hdrlen;
+                       }
                         m_copydata(so->so_snd.sb_mb, off, (int) len,
                             mtod(m, caddr_t) + hdrlen);
                         m->m_len += len;
                 } else {
-                       m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
-                       if (m->m_next == 0) {
-                               (void) m_free(m);
-                               error = ENOBUFS;
-                               goto out;
+                       if (m != NULL) {
+                               m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
+                               if (m->m_next == 0) {
+                                       (void) m_free(m);
+                                       error = ENOBUFS;
+                                       goto out;
+                               }
+                       } else {
+                               if (m_head != so->so_snd.sb_mb || last_off != off)
+                                       m_last = NULL;
+                               last_off = off + len;
+                               m_head = so->so_snd.sb_mb;
+
+                               if ((m = m_copym_with_hdrs(so->so_snd.sb_mb, off, (int) len, M_DONTWAIT, &m_last, &m_off)) == NULL) {
+                                       error = ENOBUFS;
+                                       goto out;
+                               }
+                               m->m_data += max_linkhdr;
+                               m->m_len = hdrlen;
                         }
                 }
  #endif
@@ -701,6 +731,7 @@ send:
                  */
                 tp->snd_up = tp->snd_una;               /* drag it along */
  
+
         /*
          * Put TCP length in extended header, and then
          * checksum extended header and data.
@@ -857,8 +888,6 @@ send:
         KERNEL_DEBUG(DBG_LAYER_END, ((th->th_dport << 16) | th->th_sport),
                    (((thtoti(th)->ti_src.s_addr & 0xffff) << 16) | (thtoti(th)->ti_dst.s_addr & 0xffff)),
                     th->th_seq, th->th_ack, th->th_win);
-
-
  #if 1
         /*
          * See if we should do MTU discovery.  We do it only if the following
diff --git a/bsd/nfs/nfs.h b/bsd/nfs/nfs.h

index 00cc5f4683e77c1264975f69e8b00917a8a3d89e..c98c92458015b2d5976c782d9b491815434720ef 100644 (file)
--- a/bsd/nfs/nfs.h
+++ b/bsd/nfs/nfs.h
@@ -638,9 +638,9 @@ void        nfsm_srvpostopattr __P((struct nfsrv_descript *, int, struct vattr *,
  int    netaddr_match __P((int, union nethostaddr *, struct mbuf *));
  int    nfs_request __P((struct vnode *, struct mbuf *, int, struct proc *,
                          struct ucred *, struct mbuf **, struct mbuf **,
-                        caddr_t *));
+                        caddr_t *, u_int64_t *));
  int    nfs_loadattrcache __P((struct vnode **, struct mbuf **, caddr_t *,
-                              struct vattr *));
+                              struct vattr *, int, u_int64_t *));
  int    nfs_namei __P((struct nameidata *, fhandle_t *, int,
                        struct nfssvc_sock *, struct mbuf *, struct mbuf **,
                        caddr_t *, struct vnode **, struct proc *, int, int));
@@ -747,6 +747,11 @@ int        nfsrv_write __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  void   nfsrv_rcv __P((struct socket *so, caddr_t arg, int waitflag));
  void   nfsrv_slpderef __P((struct nfssvc_sock *slp));
  
+/*
+ * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG)
+ * But some of this code may prove useful someday...
+ */
+#undef NFSDIAG
  #if NFSDIAG
  
  extern int nfstraceindx;
diff --git a/bsd/nfs/nfs_bio.c b/bsd/nfs/nfs_bio.c

index 040678d37c28a4e394eedf97419de6e6c442e249..926de3419bb343ac2fdf92bfd078f9f7d32777d6 100644 (file)
--- a/bsd/nfs/nfs_bio.c
+++ b/bsd/nfs/nfs_bio.c
@@ -58,7 +58,6 @@
   *     @(#)nfs_bio.c   8.9 (Berkeley) 3/30/95
   * FreeBSD-Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $
   */
-
  #include <sys/param.h>
  #include <sys/systm.h>
  #include <sys/resourcevar.h>
@@ -86,11 +85,18 @@
  
  #include <sys/kdebug.h>
  
+#define FSDBG(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_TOP(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_BOT(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
+
  static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
                                         struct proc *p, int operation));
-static struct buf *nfs_getwriteblk __P((struct vnode *vp, daddr_t bn,
-                                       int size, struct proc *p,
-                                       struct ucred *cred, int off, int len));
  
  extern int nfs_numasync;
  extern struct nfsstats nfsstats;
@@ -129,7 +135,7 @@ nfs_bioread(vp, uio, ioflag, cred, getpages)
         p = uio->uio_procp;
         if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
                 (void)nfs_fsinfo(nmp, vp, cred, p);
-        /*due to getblk/vm interractions, use vm page size or less values */
+       /*due to getblk/vm interractions, use vm page size or less values */
         biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
         /*
          * For nfs, cache consistency can only be maintained approximately.
@@ -231,7 +237,8 @@ nfs_bioread(vp, uio, ioflag, cred, getpages)
                  */
                 if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
                     for (nra = 0; nra < nmp->nm_readahead &&
-                               (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
+                                 (off_t)(lbn + 1 + nra) * biosize < np->n_size;
+                        nra++) {
                                 rabn = lbn + 1 + nra;
                                 if (!incore(vp, rabn)) {
                                         rabp = nfs_getcacheblk(vp, rabn, biosize, p, operation);
@@ -335,36 +342,52 @@ again:
                     SET(bp->b_flags, B_READ);
                     error = nfs_doio(bp, cred, p);
                     if (error) {
-                               brelse(bp);
-                               while (error == NFSERR_BAD_COOKIE) {
-                                       nfs_invaldir(vp);
-                                       error = nfs_vinvalbuf(vp, 0, cred, p, 1);
-                                       /*
-                                        * Yuck! The directory has been modified on the
-                                        * server. The only way to get the block is by
-                                        * reading from the beginning to get all the
-                                        * offset cookies.
-                                        */
-                                       for (i = 0; i <= lbn && !error; i++) {
-                                       if (np->n_direofoffset
-                                               && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
-                                               return (0);
-                                       bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p, operation);
-                                       if (!bp)
-                                               return (EINTR);
-                                       if (!ISSET(bp->b_flags, B_DONE)) {
-                                               SET(bp->b_flags, B_READ);
-                                               error = nfs_doio(bp, cred, p);
-                                               if (error) {
-                                                       brelse(bp);
-                                               } else if (i < lbn)
-                                                       brelse(bp);
-                                       }
-                                       }
-                               }
-                               if (error)
-                                       return (error);
+                       brelse(bp);
                     }
+                   while (error == NFSERR_BAD_COOKIE) {
+                       nfs_invaldir(vp);
+                       error = nfs_vinvalbuf(vp, 0, cred, p, 1);
+                       /*
+                        * Yuck! The directory has been modified on the
+                        * server. The only way to get the block is by
+                        * reading from the beginning to get all the
+                        * offset cookies.
+                        */
+                       for (i = 0; i <= lbn && !error; i++) {
+                           if (np->n_direofoffset
+                               && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
+                                   return (0);
+                           bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p,
+                                                operation);
+                           if (!bp)
+                                   return (EINTR);
+                           if (!ISSET(bp->b_flags, B_CACHE)) {
+                                   SET(bp->b_flags, B_READ);
+                                   error = nfs_doio(bp, cred, p);
+                                   /*
+                                    * no error + B_INVAL == directory EOF,
+                                    * use the block.
+                                    */
+                                   if (error == 0 && (bp->b_flags & B_INVAL))
+                                           break;
+                           }
+                           /*
+                            * An error will throw away the block and the
+                            * for loop will break out.  If no error and this
+                            * is not the block we want, we throw away the
+                            * block and go for the next one via the for loop.
+                            */
+                           if (error || i < lbn)
+                                   brelse(bp);
+                       }
+                   }
+                   /*
+                    * The above while is repeated if we hit another cookie
+                    * error.  If we hit an error and it wasn't a cookie error,
+                    * we give up.
+                    */
+                   if (error)
+                       return (error);
                 }
  
                 /*
@@ -377,17 +400,18 @@ again:
                     (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
                     !(np->n_flag & NQNFSNONCACHE) &&
                     !incore(vp, lbn + 1)) {
-                       rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p, operation);
+                       rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p,
+                                              operation);
                         if (rabp) {
                             if (!ISSET(rabp->b_flags, (B_CACHE|B_DELWRI))) {
-                                       SET(rabp->b_flags, (B_READ | B_ASYNC));
-                                       if (nfs_asyncio(rabp, cred)) {
-                                               SET(rabp->b_flags, (B_INVAL|B_ERROR));
-                                               rabp->b_error = EIO;
-                                               brelse(rabp);
-                                       }
+                               SET(rabp->b_flags, (B_READ | B_ASYNC));
+                               if (nfs_asyncio(rabp, cred)) {
+                                   SET(rabp->b_flags, (B_INVAL|B_ERROR));
+                                   rabp->b_error = EIO;
+                                   brelse(rabp);
+                               }
                             } else {
-                                       brelse(rabp);
+                               brelse(rabp);
                             }
                         }
                 }
@@ -396,6 +420,21 @@ again:
                  * the second term may be negative.
                  */
                 n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
+               /*
+                * Unlike VREG files, whos buffer size ( bp->b_bcount ) is
+                * chopped for the EOF condition, we cannot tell how large
+                * NFS directories are going to be until we hit EOF.  So
+                * an NFS directory buffer is *not* chopped to its EOF.  Now,
+                * it just so happens that b_resid will effectively chop it
+                * to EOF.  *BUT* this information is lost if the buffer goes
+                * away and is reconstituted into a B_CACHE state (recovered
+                * from VM) later.  So we keep track of the directory eof
+                * in np->n_direofoffset and chop it off as an extra step
+                * right here.
+                */
+               if (np->n_direofoffset &&
+                   n > np->n_direofoffset - uio->uio_offset)
+                       n = np->n_direofoffset - uio->uio_offset;
                 break;
             default:
                 printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
@@ -423,6 +462,7 @@ again:
         return (error);
  }
  
+
  /*
   * Vnode op for write using bio
   */
@@ -448,6 +488,9 @@ nfs_write(ap)
         daddr_t lbn;
         int bufsize;
         int n, on, error = 0, iomode, must_commit;
+       off_t boff;
+       struct iovec iov;
+       struct uio auio;
  
  #if DIAGNOSTIC
         if (uio->uio_rw != UIO_WRITE)
@@ -496,8 +539,8 @@ nfs_write(ap)
          * will be the same size within a filesystem. nfs_writerpc will
          * still use nm_wsize when sizing the rpc's.
          */
-        /*due to getblk/vm interractions, use vm page size or less values */
-        biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
+       /*due to getblk/vm interractions, use vm page size or less values */
+       biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
  
         do {
                 /*
@@ -530,12 +573,6 @@ nfs_write(ap)
                 on = uio->uio_offset & (biosize-1);
                 n = min((unsigned)(biosize - on), uio->uio_resid);
  again:
-               if (uio->uio_offset + n > np->n_size) {
-                       np->n_size = uio->uio_offset + n;
-                       np->n_flag |= NMODIFIED;
-                       if (UBCISVALID(vp))
-                               ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
-               }
                 bufsize = biosize;
  #if 0
  /* (removed for UBC) */
@@ -544,21 +581,175 @@ again:
                         bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
                 }
  #endif
-               bp = nfs_getwriteblk(vp, lbn, bufsize, p, cred, on, n);
+               /*
+                * Get a cache block for writing.  The range to be written is
+                * (off..off+len) within the block.  We ensure that the block
+                * either has no dirty region or that the given range is
+                * contiguous with the existing dirty region.
+                */
+               bp = nfs_getcacheblk(vp, lbn, bufsize, p, BLK_WRITE);
                 if (!bp)
                         return (EINTR);
+               /*
+                * Resize nfsnode *after* we busy the buffer to prevent
+                * readers from reading garbage.
+                * If there was a partial buf at the old eof, validate
+                * and zero the new bytes. 
+                */
+               if (uio->uio_offset + n > np->n_size) {
+                       struct buf *bp0 = NULL;
+                       daddr_t bn = np->n_size / biosize;
+                       int off = np->n_size & (biosize - 1);
+
+                       if (off && bn < lbn && incore(vp, bn))
+                               bp0 = nfs_getcacheblk(vp, bn, biosize, p,
+                                                     BLK_WRITE);
+                       np->n_flag |= NMODIFIED;
+                       np->n_size = uio->uio_offset + n;
+                       ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
+                       if (bp0) {
+                               bzero((char *)bp0->b_data + off, biosize - off);
+                               bp0->b_validend = biosize;
+                               brelse(bp0);
+                       }
+               }
+               /*
+                * NFS has embedded ucred so crhold() risks zone corruption
+                */
+               if (bp->b_wcred == NOCRED)
+                       bp->b_wcred = crdup(cred);
+               /*
+                * If dirtyend exceeds file size, chop it down.  This should
+                * not occur unless there is a race.
+                */
+               if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend >
+                   np->n_size)
+                       bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno *
+                                                     DEV_BSIZE;
+               /*
+                * UBC doesn't (yet) handle partial pages so nfs_biowrite was
+                * hacked to never bdwrite, to start every little write right
+                * away.  Running IE Avie noticed the performance problem, thus
+                * this code, which permits those delayed writes by ensuring an
+                * initial read of the entire page.  The read may hit eof
+                * ("short read") but that we will handle.
+                *
+                * We are quite dependant on the correctness of B_CACHE so check
+                * that first in case of problems.
+                */
+               if (!ISSET(bp->b_flags, B_CACHE) && n < PAGE_SIZE) {
+                       boff = (off_t)bp->b_blkno * DEV_BSIZE;
+                       auio.uio_iov = &iov;
+                       auio.uio_iovcnt = 1;
+                       auio.uio_offset = boff;
+                       auio.uio_resid = PAGE_SIZE;
+                       auio.uio_segflg = UIO_SYSSPACE;
+                       auio.uio_rw = UIO_READ;
+                       auio.uio_procp = p;
+                       iov.iov_base = bp->b_data;
+                       iov.iov_len = PAGE_SIZE;
+                       error = nfs_readrpc(vp, &auio, cred);
+                       if (error) {
+                               bp->b_error = error;
+                               SET(bp->b_flags, B_ERROR);
+                               printf("nfs_write: readrpc %d", error);
+                       }
+                       if (auio.uio_resid > 0)
+                               bzero(iov.iov_base, auio.uio_resid);
+                       bp->b_validoff = 0;
+                       bp->b_validend = PAGE_SIZE - auio.uio_resid;
+                       if (np->n_size > boff + bp->b_validend)
+                               bp->b_validend = min(np->n_size - boff,
+                                                    PAGE_SIZE);
+                       bp->b_dirtyoff = 0;
+                       bp->b_dirtyend = 0;
+               }
+       
+               /*
+                * If the new write will leave a contiguous dirty
+                * area, just update the b_dirtyoff and b_dirtyend,
+                * otherwise try to extend the dirty region.
+                */
+               if (bp->b_dirtyend > 0 &&
+                   (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
+                       off_t start, end;
+       
+                       boff = (off_t)bp->b_blkno * DEV_BSIZE;
+                       if (on > bp->b_dirtyend) {
+                               start = boff + bp->b_validend;
+                               end = boff + on;
+                       } else {
+                               start = boff + on + n;
+                               end = boff + bp->b_validoff;
+                       }
+                       
+                       /*
+                        * It may be that the valid region in the buffer
+                        * covers the region we want, in which case just
+                        * extend the dirty region.  Otherwise we try to
+                        * extend the valid region.
+                        */
+                       if (end > start) {
+                               auio.uio_iov = &iov;
+                               auio.uio_iovcnt = 1;
+                               auio.uio_offset = start;
+                               auio.uio_resid = end - start;
+                               auio.uio_segflg = UIO_SYSSPACE;
+                               auio.uio_rw = UIO_READ;
+                               auio.uio_procp = p;
+                               iov.iov_base = bp->b_data + (start - boff);
+                               iov.iov_len = end - start;
+                               error = nfs_readrpc(vp, &auio, cred);
+                               /*
+                                * If we couldn't read, do not do a VOP_BWRITE
+                                * as originally coded. That could also error
+                                * and looping back to "again" as it was doing
+                                * could have us stuck trying to write same buf
+                                * again. nfs_write, will get the entire region
+                                * if nfs_readrpc succeeded. If unsuccessful
+                                * we should just error out. Errors like ESTALE
+                                * would keep us looping rather than transient
+                                * errors justifying a retry. We can return here
+                                * instead of altering dirty region later.  We
+                                * did not write old dirty region at this point.
+                                */
+                               if (error) {
+                                       bp->b_error = error;
+                                       SET(bp->b_flags, B_ERROR);
+                                       printf("nfs_write: readrpc2 %d", error);
+                                       brelse(bp);
+                                       return (error);
+                               }
+                               /*
+                                * The read worked.
+                                * If there was a short read, just zero fill.
+                                */
+                               if (auio.uio_resid > 0)
+                                       bzero(iov.iov_base, auio.uio_resid);
+                               if (on > bp->b_dirtyend)
+                                       bp->b_validend = on;
+                               else
+                                       bp->b_validoff = on + n;
+                       }
+                       /*
+                        * We now have a valid region which extends up to the
+                        * dirty region which we want.
+                        */
+                       if (on > bp->b_dirtyend)
+                               bp->b_dirtyend = on;
+                       else
+                               bp->b_dirtyoff = on + n;
+               }
                 if (ISSET(bp->b_flags, B_ERROR)) {
                         error = bp->b_error;
                         brelse(bp);
                         return (error);
                 }
-               if (bp->b_wcred == NOCRED) {
-            /*
-             * NFS has embedded ucred.
-             * Can not crhold() here as that causes zone corruption
-             */
+               /*
+                * NFS has embedded ucred so crhold() risks zone corruption
+                */
+               if (bp->b_wcred == NOCRED)
                         bp->b_wcred = crdup(cred);
-               }
                 np->n_flag |= NMODIFIED;
  
                 /*
@@ -636,168 +827,6 @@ again:
         return (0);
  }
  
-/*
- * Get a cache block for writing.  The range to be written is
- * (off..off+len) within the block.  This routine ensures that the
- * block is either has no dirty region or that the given range is
- * contiguous with the existing dirty region.
- */
-static struct buf *
-nfs_getwriteblk(vp, bn, size, p, cred, off, len)
-       struct vnode *vp;
-       daddr_t bn;
-       int size;
-       struct proc *p;
-       struct ucred *cred;
-       int off, len;
-{
-       struct nfsnode *np = VTONFS(vp);
-       struct buf *bp;
-       int error;
-       struct iovec iov;
-       struct uio uio;
-       off_t boff;
-
- again:
-       bp = nfs_getcacheblk(vp, bn, size, p, BLK_WRITE);
-       if (!bp)
-               return (NULL);
-       if (bp->b_wcred == NOCRED) {
-               /*
-                * NFS has embedded ucred.
-                * Can not crhold() here as that causes zone corruption
-                */
-               bp->b_wcred = crdup(cred);
-       }
-
-       if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) {
-               bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
-       }
-
-       /*
-        * UBC doesn't (yet) handle partial pages so nfs_biowrite was
-        * hacked to never bdwrite, to start every little write right away.
-        * Running IE Avie noticed the performance problem, thus this code,
-        * which permits those delayed writes by ensuring an initial read
-        * of the entire page.  The read may hit eof ("short read") but
-        * that we will handle.
-        *
-        * We are quite dependant on the correctness of B_CACHE so check
-        * that first in case of problems.
-        */
-       if (!ISSET(bp->b_flags, B_CACHE) && len < PAGE_SIZE) {
-               struct nfsnode *np = VTONFS(vp);
-
-               boff = (off_t)bp->b_blkno * DEV_BSIZE;
-               uio.uio_iov = &iov;
-               uio.uio_iovcnt = 1;
-               uio.uio_offset = boff;
-               uio.uio_resid = PAGE_SIZE;
-               uio.uio_segflg = UIO_SYSSPACE;
-               uio.uio_rw = UIO_READ;
-               uio.uio_procp = p;
-               iov.iov_base = bp->b_data;
-               iov.iov_len = PAGE_SIZE;
-               error = nfs_readrpc(vp, &uio, cred);
-               if (error) {
-                       bp->b_error = error;
-                       SET(bp->b_flags, B_ERROR);
-                       printf("nfs_getwriteblk: readrpc returned %d", error);
-               }
-               if (uio.uio_resid > 0)
-                       bzero(iov.iov_base, uio.uio_resid);
-               bp->b_validoff = 0;
-               bp->b_validend = PAGE_SIZE - uio.uio_resid;
-               if (np->n_size > boff + bp->b_validend)
-                       bp->b_validend = min(np->n_size - boff, PAGE_SIZE);
-               bp->b_dirtyoff = 0;
-               bp->b_dirtyend = 0;
-       }
-
-       /*
-        * If the new write will leave a contiguous dirty
-        * area, just update the b_dirtyoff and b_dirtyend,
-        * otherwise try to extend the dirty region.
-        */
-       if (bp->b_dirtyend > 0 &&
-           (off > bp->b_dirtyend || (off + len) < bp->b_dirtyoff)) {
-               off_t start, end;
-
-               boff = (off_t)bp->b_blkno * DEV_BSIZE;
-               if (off > bp->b_dirtyend) {
-                       start = boff + bp->b_validend;
-                       end = boff + off;
-               } else {
-                       start = boff + off + len;
-                       end = boff + bp->b_validoff;
-               }
-               
-               /*
-                * It may be that the valid region in the buffer
-                * covers the region we want, in which case just
-                * extend the dirty region.  Otherwise we try to
-                * extend the valid region.
-                */
-               if (end > start) {
-                       uio.uio_iov = &iov;
-                       uio.uio_iovcnt = 1;
-                       uio.uio_offset = start;
-                       uio.uio_resid = end - start;
-                       uio.uio_segflg = UIO_SYSSPACE;
-                       uio.uio_rw = UIO_READ;
-                       uio.uio_procp = p;
-                       iov.iov_base = bp->b_data + (start - boff);
-                       iov.iov_len = end - start;
-                       error = nfs_readrpc(vp, &uio, cred);
-                       if (error) {
-                               /*
-                                * If we couldn't read, do not do a VOP_BWRITE
-                                 * as originally coded. That, could also error
-                                 * and looping back to "again" as it was doing
-                                 * could have us stuck trying to write same buffer
-                                 * again. nfs_write, will get the entire region
-                                 * if nfs_readrpc was successful. If not successful
-                                 * we should just error out. Errors like ESTALE
-                                 * would keep us in this loop rather than transient
-                                 * errors justifying a retry. We can return from here
-                                 * instead of altering dirty region later in routine.
-                                 * We did not write out old dirty region at this point.
-                                 */
-                                bp->b_error = error;
-                                SET(bp->b_flags, B_ERROR);
-                                printf("nfs_getwriteblk: readrpc (2) returned %d", error);
-                                return bp; 
-                       } else {
-                               /*
-                                * The read worked.
-                                */
-                               if (uio.uio_resid > 0) {
-                                       /*
-                                        * If there was a short read,
-                                        * just zero fill.
-                                        */
-                                       bzero(iov.iov_base,
-                                             uio.uio_resid);
-                               }
-                               if (off > bp->b_dirtyend)
-                                       bp->b_validend = off;
-                               else
-                                       bp->b_validoff = off + len;
-                       }
-               }
-
-               /*
-                * We now have a valid region which extends up to the
-                * dirty region which we want.
-                */
-               if (off > bp->b_dirtyend)
-                       bp->b_dirtyend = off;
-               else
-                       bp->b_dirtyoff = off + len;
-       }
-
-       return bp;
-}
  
  /*
   * Get an nfs cache block.
@@ -816,7 +845,7 @@ nfs_getcacheblk(vp, bn, size, p, operation)
  {
         register struct buf *bp;
         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
-        /*due to getblk/vm interractions, use vm page size or less values */
+       /*due to getblk/vm interractions, use vm page size or less values */
         int biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
  
         if (nmp->nm_flag & NFSMNT_INT) {
@@ -887,7 +916,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg)
                  * necessary. -- EKN
                  */
                 if ((intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) ||
-                               ((error == EINTR) && current_thread_aborted())) {
+                   (error == EINTR && current_thread_aborted())) {
                         np->n_flag &= ~NFLUSHINPROG;
                         if (np->n_flag & NFLUSHWANT) {
                                 np->n_flag &= ~NFLUSHWANT;
@@ -904,7 +933,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg)
         }
         didhold = ubc_hold(vp);
         if (didhold) {
-         (void) ubc_clean(vp, 1); /* get the pages out of vm also */
+               (void) ubc_clean(vp, 1); /* get the pages out of vm also */
                 ubc_rele(vp);
         }
         return (0);
@@ -1053,7 +1082,6 @@ nfs_doio(bp, cr, p)
         struct iovec io;
  
         vp = bp->b_vp;
-       NFSTRACE(NFSTRC_DIO, vp);
         np = VTONFS(vp);
         nmp = VFSTONFS(vp->v_mount);
         uiop = &uio;
@@ -1068,7 +1096,7 @@ nfs_doio(bp, cr, p)
          * NFS being stateless, this case poses a problem.
          * By definition, the NFS server should always be consulted
          * for the data in that page.
-        * So we choose to clear the B_DONE and to the IO.
+        * So we choose to clear the B_DONE and to do the IO.
          *
          * XXX revisit this if there is a performance issue.
          * XXX In that case, we could play the attribute cache games ...
@@ -1078,13 +1106,10 @@ nfs_doio(bp, cr, p)
                         panic("nfs_doio: done and not async");
                 CLR(bp->b_flags, B_DONE);
         }
-
-       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 256)) | DBG_FUNC_START,
-                    (int)np->n_size, bp->b_blkno * DEV_BSIZE, bp->b_bcount, bp->b_flags, 0);
-
-       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 257)) | DBG_FUNC_NONE,
-                    bp->b_validoff, bp->b_validend, bp->b_dirtyoff, bp->b_dirtyend, 0);
-
+       FSDBG_TOP(256, np->n_size, bp->b_blkno * DEV_BSIZE, bp->b_bcount,
+                 bp->b_flags);
+       FSDBG(257, bp->b_validoff, bp->b_validend, bp->b_dirtyoff,
+             bp->b_dirtyend);
         /*
          * Historically, paging was done with physio, but no more.
          */
@@ -1095,7 +1120,7 @@ nfs_doio(bp, cr, p)
             io.iov_len = uiop->uio_resid = bp->b_bcount;
             /* mapping was done by vmapbuf() */
             io.iov_base = bp->b_data;
-           uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
+           uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE;
             if (ISSET(bp->b_flags, B_READ)) {
                         uiop->uio_rw = UIO_READ;
                         nfsstats.read_physios++;
@@ -1118,14 +1143,11 @@ nfs_doio(bp, cr, p)
             uiop->uio_rw = UIO_READ;
             switch (vp->v_type) {
             case VREG:
-               uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
+               uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE;
                 nfsstats.read_bios++;
                 error = nfs_readrpc(vp, uiop, cr);
-
-               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 262)) | DBG_FUNC_NONE,
-                            (int)np->n_size, bp->b_blkno * DEV_BSIZE, uiop->uio_resid, error, 0);
-
-
+               FSDBG(262, np->n_size, bp->b_blkno * DEV_BSIZE,
+                     uiop->uio_resid, error);
                 if (!error) {
                     bp->b_validoff = 0;
                     if (uiop->uio_resid) {
@@ -1136,33 +1158,32 @@ nfs_doio(bp, cr, p)
                          * Just zero fill the rest of the valid area.
                          */
                         diff = bp->b_bcount - uiop->uio_resid;
-                       len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
-                               + diff);
-                               if (len > 0) {
-                                       len = min(len, uiop->uio_resid);
-                                       bzero((char *)bp->b_data + diff, len);
-                                       bp->b_validend = diff + len;
-
-                                       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 258)) | DBG_FUNC_NONE,
-                                                    diff, len, 0, 1, 0);
-
-                               } else
-                                       bp->b_validend = diff;
+                       len = np->n_size - ((u_quad_t)bp->b_blkno * DEV_BSIZE +
+                                           diff);
+                       if (len > 0) {
+                               len = min(len, uiop->uio_resid);
+                               bzero((char *)bp->b_data + diff, len);
+                               bp->b_validend = diff + len;
+                               FSDBG(258, diff, len, 0, 1);
+                       } else
+                               bp->b_validend = diff;
                     } else
                                 bp->b_validend = bp->b_bcount;
  #if 1 /* USV + JOE [ */
                     if (bp->b_validend < bp->b_bufsize) {
-                           /*
-                            * we're about to release a partial buffer after a read... the only
-                            * way we should get here is if this buffer contains the EOF
-                            * before releasing it, we'll zero out to the end of the buffer
-                            * so that if a mmap of this page occurs, we'll see zero's even
-                            * if a ftruncate extends the file in the meantime
+                           /*
+                            * we're about to release a partial buffer after a
+                            * read... the only way we should get here is if
+                            * this buffer contains the EOF before releasing it,
+                            * we'll zero out to the end of the buffer so that
+                            * if a mmap of this page occurs, we'll see zero's
+                            * even if a ftruncate extends the file in the
+                            * meantime
                              */
-                           bzero((caddr_t)(bp->b_data + bp->b_validend), (bp->b_bufsize - bp->b_validend));
-
-                           KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 258)) | DBG_FUNC_NONE,
-                                        bp->b_validend, (bp->b_bufsize - bp->b_validend), 0, 2, 0);
+                           bzero((caddr_t)(bp->b_data + bp->b_validend),
+                                 bp->b_bufsize - bp->b_validend);
+                           FSDBG(258, bp->b_validend,
+                                 bp->b_bufsize - bp->b_validend, 0, 2);
                     }
  #endif /* ] USV + JOE */
                 }
@@ -1196,28 +1217,40 @@ nfs_doio(bp, cr, p)
                         error = nfs_readdirrpc(vp, uiop, cr);
                 break;
             default:
-               printf("nfs_doio:  type %x unexpected\n",vp->v_type);
+               printf("nfs_doio: type %x unexpected\n", vp->v_type);
                 break;
             };
             if (error) {
-                       SET(bp->b_flags, B_ERROR);
-                       bp->b_error = error;
+               SET(bp->b_flags, B_ERROR);
+               bp->b_error = error;
             }
         } else {
-           if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size)
-               bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
+           /*
+            * mapped I/O may have altered any bytes, so we extend
+            * the dirty zone to the valid zone.  For best performance
+            * a better solution would be to save & restore page dirty bits
+            * around the uiomove which brings write-data into the buffer.
+            * Then here we'd check if the page is dirty rather than WASMAPPED
+            * Also vnode_pager would change - if a page is clean it might
+            * still need to be written due to DELWRI.
+            */
+           if (UBCINFOEXISTS(vp) && ubc_issetflags(vp, UI_WASMAPPED)) {
+               bp->b_dirtyoff = min(bp->b_dirtyoff, bp->b_validoff);
+               bp->b_dirtyend = max(bp->b_dirtyend, bp->b_validend);
+           }
+           if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
+               bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
  
             if (bp->b_dirtyend > bp->b_dirtyoff) {
-
-               io.iov_len = uiop->uio_resid = bp->b_dirtyend
-                   - bp->b_dirtyoff;
-               uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
-                   + bp->b_dirtyoff;
+               io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff;
+               uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE +
+                                  bp->b_dirtyoff;
                 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
                 uiop->uio_rw = UIO_WRITE;
  
                 nfsstats.write_bios++;
-               if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
+               if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) ==
+                   B_ASYNC)
                     iomode = NFSV3WRITE_UNSTABLE;
                 else
                     iomode = NFSV3WRITE_FILESYNC;
@@ -1228,7 +1261,6 @@ nfs_doio(bp, cr, p)
                 else
                     CLR(bp->b_flags, B_NEEDCOMMIT);
                 CLR(bp->b_flags, B_WRITEINPROG);
-
                 /*
                  * For an interrupted write, the buffer is still valid
                  * and the write hasn't been pushed to the server yet,
@@ -1242,20 +1274,17 @@ nfs_doio(bp, cr, p)
                  * the block is reused. This is indicated by setting
                  * the B_DELWRI and B_NEEDCOMMIT flags.
                  */
-               if (error == EINTR
-                       || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
+               if (error == EINTR || (!error && bp->b_flags & B_NEEDCOMMIT)) {
                         int s;
  
-                       CLR(bp->b_flags, (B_INVAL|B_NOCACHE));
+                       CLR(bp->b_flags, B_INVAL | B_NOCACHE);
                         SET(bp->b_flags, B_DELWRI);
-                       
-                       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 261)) | DBG_FUNC_NONE,
-                                    bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0);
-            
+                       FSDBG(261, bp->b_validoff, bp->b_validend,
+                             bp->b_bufsize, bp->b_bcount);
                         /*
-                        * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
-                        * buffer to the clean list, we have to reassign it back to the
-                        * dirty one. Ugh.
+                        * Since for the B_ASYNC case, nfs_bwrite() has
+                        * reassigned the buffer to the clean list, we have to
+                        * reassign it back to the dirty one. Ugh.
                          */
                         if (ISSET(bp->b_flags, B_ASYNC)) {
                                 s = splbio();
@@ -1271,51 +1300,54 @@ nfs_doio(bp, cr, p)
                                 np->n_flag |= NWRITEERR;
                         }
                         bp->b_dirtyoff = bp->b_dirtyend = 0;
-
  #if 1  /* JOE */
                         /*
-                        * validoff and validend represent the real data present in this buffer
-                        * if validoff is non-zero, than we have to invalidate the buffer and kill
-                        * the page when biodone is called... the same is also true when validend
-                        * doesn't extend all the way to the end of the buffer and validend doesn't
-                        * equate to the current EOF... eventually we need to deal with this in a 
-                        * more humane way (like keeping the partial buffer without making it immediately
-                        * available to the VM page cache).
+                        * validoff and validend represent the real data present
+                        * in this buffer if validoff is non-zero, than we have
+                        * to invalidate the buffer and kill the page when
+                        * biodone is called... the same is also true when
+                        * validend doesn't extend all the way to the end of the
+                        * buffer and validend doesn't equate to the current
+                        * EOF... eventually we need to deal with this in a more
+                        * humane way (like keeping the partial buffer without
+                        * making it immediately available to the VM page cache)
                          */
                         if (bp->b_validoff)
                                 SET(bp->b_flags, B_INVAL);
                         else
                         if (bp->b_validend < bp->b_bufsize) {
-                               if ((((off_t)bp->b_blkno * (off_t)DEV_BSIZE) + bp->b_validend) == np->n_size) {
-                                       bzero((caddr_t)(bp->b_data + bp->b_validend), (bp->b_bufsize - bp->b_validend));
-
-                                       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 259)) | DBG_FUNC_NONE,
-                                                    bp->b_validend, (bp->b_bufsize - bp->b_validend), 0, 0, 0);;
-                               }
-                               else
-                                       SET(bp->b_flags, B_INVAL);
+                               if ((off_t)bp->b_blkno * DEV_BSIZE +
+                                   bp->b_validend == np->n_size) {
+                                       bzero((caddr_t)(bp->b_data +
+                                                       bp->b_validend),
+                                             bp->b_bufsize - bp->b_validend);
+                                       FSDBG(259, bp->b_validend,
+                                             bp->b_bufsize - bp->b_validend, 0,
+                                             0);
+                               } else
+                                       SET(bp->b_flags, B_INVAL);
                         }
  #endif
                 }
  
             } else {
-
  #if 1  /* JOE */
-                       if (bp->b_validoff)
-                               SET(bp->b_flags, B_INVAL);
-                       else if (bp->b_validend < bp->b_bufsize) {
-                               if ((((off_t)bp->b_blkno * (off_t)DEV_BSIZE) + bp->b_validend) != np->n_size)
-                                        SET(bp->b_flags, B_INVAL);
-                       }
-                       if (bp->b_flags & B_INVAL) {
-                               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 260)) | DBG_FUNC_NONE,
-                                            bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0);
-                       }
+               if (bp->b_validoff ||
+                   (bp->b_validend < bp->b_bufsize &&
+                    (off_t)bp->b_blkno * DEV_BSIZE + bp->b_validend !=
+                    np->n_size)) {
+                       SET(bp->b_flags, B_INVAL);
+               }
+               if (bp->b_flags & B_INVAL) {
+                       FSDBG(260, bp->b_validoff, bp->b_validend,
+                             bp->b_bufsize, bp->b_bcount);
+               }
  #endif
-                       bp->b_resid = 0;
-                       biodone(bp);
-                       NFSTRACE(NFSTRC_DIO_DONE, vp);
-                       return (0);
+               bp->b_resid = 0;
+               biodone(bp);
+               FSDBG_BOT(256, bp->b_validoff, bp->b_validend, bp->b_bufsize,
+                         np->n_size);
+               return (0);
             }
         }
         bp->b_resid = uiop->uio_resid;
@@ -1323,13 +1355,11 @@ nfs_doio(bp, cr, p)
                 nfs_clearcommit(vp->v_mount);
  
         if (bp->b_flags & B_INVAL) {
-               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 260)) | DBG_FUNC_NONE,
-                            bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0);
+               FSDBG(260, bp->b_validoff, bp->b_validend, bp->b_bufsize,
+                     bp->b_bcount);
         }
-       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 256)) | DBG_FUNC_END,
-                    bp->b_validoff, bp->b_validend, bp->b_bcount, error, 0);
+       FSDBG_BOT(256, bp->b_validoff, bp->b_validend, bp->b_bcount, error);
  
         biodone(bp);
-       NFSTRACE(NFSTRC_DIO_DONE, vp);
         return (error);
  }
diff --git a/bsd/nfs/nfs_node.c b/bsd/nfs/nfs_node.c

index 383428171e7937d522f16f462ea30270819e2078..f12696b3616c54819976f82fa8a3cdb3206b5590 100644 (file)
--- a/bsd/nfs/nfs_node.c
+++ b/bsd/nfs/nfs_node.c
@@ -206,9 +206,9 @@ loop:
         /*
          * Lock the new nfsnode.
          */
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+       error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
  
-       return (0);
+       return (error);
  }
  
  int
@@ -253,18 +253,19 @@ nfs_inactive(ap)
                  } else if (vget(ap->a_vp, 0, ap->a_p))
                         panic("nfs_inactive: vget failed");
                 (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);
+               np->n_size = 0;
                 ubc_setsize(ap->a_vp, (off_t)0);
  
-                /* We have a problem. The dvp could have gone away on us
-                 * while in the unmount path. Thus it appears as VBAD and we
-                 * cannot use it. If we tried locking the parent (future), for silly
+                /* We have a problem. The dvp could have gone away on us while
+                 * in the unmount path. Thus it appears as VBAD and we cannot
+                 * use it. If we tried locking the parent (future), for silly
                   * rename files, it is unclear where we would lock. The unmount
                   * code just pulls unlocked vnodes as it goes thru its list and
                   * yanks them. Could unmount be smarter to see if a busy reg vnode has
                   * a parent, and not yank it yet? Put in more passes at unmount
-                 * time? In the meantime, just check if it went away on us. Could
-                 * have gone away during the nfs_vinvalbuf or ubc_setsize which block.
-                 * Or perhaps even before nfs_inactive got called.
+                 * time? In the meantime, just check if it went away on us.
+                 * Could have gone away during the nfs_vinvalbuf or ubc_setsize
+                 * which block.  Or perhaps even before nfs_inactive got called.
                   */
                  if ((sp->s_dvp)->v_type != VBAD) 
                          nfs_removeit(sp); /* uses the dvp */
@@ -339,57 +340,30 @@ nfs_reclaim(ap)
         return (0);
  }
  
-#if 0
  /*
   * Lock an nfsnode
   */
  int
  nfs_lock(ap)
         struct vop_lock_args /* {
-               struct vnode *a_vp;
+                struct vnode *a_vp;
+                int a_flags;
+                struct proc *a_p;
         } */ *ap;
  {
         register struct vnode *vp = ap->a_vp;
  
         /*
          * Ugh, another place where interruptible mounts will get hung.
-        * If you make this sleep interruptible, then you have to fix all
+        * If you make this call interruptible, then you have to fix all
          * the VOP_LOCK() calls to expect interruptibility.
          */
-       while (vp->v_flag & VXLOCK) {
-               vp->v_flag |= VXWANT;
-               (void) tsleep((caddr_t)vp, PINOD, "nfslck", 0);
-       }
         if (vp->v_tag == VT_NON)
-               return (ENOENT);
-
-#if 0
-       /*
-        * Only lock regular files.  If a server crashed while we were
-        * holding a directory lock, we could easily end up sleeping
-        * until the server rebooted while holding a lock on the root.
-        * Locks are only needed for protecting critical sections in
-        * VMIO at the moment.
-        * New vnodes will have type VNON but they should be locked
-        * since they may become VREG.  This is checked in loadattrcache
-        * and unwanted locks are released there.
-        */
-       if (vp->v_type == VREG || vp->v_type == VNON) {
-               while (np->n_flag & NLOCKED) {
-                       np->n_flag |= NWANTED;
-                       (void) tsleep((caddr_t) np, PINOD, "nfslck2", 0);
-                       /*
-                        * If the vnode has transmuted into a VDIR while we
-                        * were asleep, then skip the lock.
-                        */
-                       if (vp->v_type != VREG && vp->v_type != VNON)
-                               return (0);
-               }
-               np->n_flag |= NLOCKED;
-       }
-#endif
-
-       return (0);
+               return (ENOENT); /* ??? -- got to check something and error, but what? */
+        
+       return(lockmgr(&VTONFS(vp)->n_lock, ap->a_flags, &vp->v_interlock,
+                ap->a_p));
+       
  }
  
  /*
@@ -397,26 +371,16 @@ nfs_lock(ap)
   */
  int
  nfs_unlock(ap)
-       struct vop_unlock_args /* {
-               struct vnode *a_vp;
-       } */ *ap;
+        struct vop_unlock_args /* {
+                struct vnode *a_vp;
+                int a_flags;
+                struct proc *a_p;
+        } */ *ap;
  {
-#if 0
-       struct vnode* vp = ap->a_vp;
-        struct nfsnode* np = VTONFS(vp);
-
-       if (vp->v_type == VREG || vp->v_type == VNON) {
-               if (!(np->n_flag & NLOCKED))
-                       panic("nfs_unlock: nfsnode not locked");
-               np->n_flag &= ~NLOCKED;
-               if (np->n_flag & NWANTED) {
-                       np->n_flag &= ~NWANTED;
-                       wakeup((caddr_t) np);
-               }
-       }
-#endif
+        struct vnode *vp = ap->a_vp;
  
-       return (0);
+        return (lockmgr(&VTONFS(vp)->n_lock, ap->a_flags | LK_RELEASE,
+                &vp->v_interlock, ap->a_p));
  }
  
  /*
@@ -428,9 +392,10 @@ nfs_islocked(ap)
                 struct vnode *a_vp;
         } */ *ap;
  {
-       return VTONFS(ap->a_vp)->n_flag & NLOCKED ? 1 : 0;
+       return (lockstatus(&VTONFS(ap->a_vp)->n_lock));
+
  }
-#endif
+
  
  /*
   * Nfs abort op, called after namei() when a CREATE/DELETE isn't actually
diff --git a/bsd/nfs/nfs_nqlease.c b/bsd/nfs/nfs_nqlease.c

index 3f3d51ef053a0b0c3c3c226c44aedd45b669e445..758242702bf83bdd8d7a997fb065722b9fac05d8 100644 (file)
--- a/bsd/nfs/nfs_nqlease.c
+++ b/bsd/nfs/nfs_nqlease.c
@@ -900,6 +900,7 @@ nqnfs_getlease(vp, rwflag, cred, p)
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         int cachable;
         u_quad_t frev;
+       u_int64_t xid;
  
         nfsstats.rpccnt[NQNFSPROC_GETLEASE]++;
         mb = mreq = nfsm_reqh(vp, NQNFSPROC_GETLEASE, NFSX_V3FH+2*NFSX_UNSIGNED,
@@ -909,7 +910,7 @@ nqnfs_getlease(vp, rwflag, cred, p)
         *tl++ = txdr_unsigned(rwflag);
         *tl = txdr_unsigned(nmp->nm_leaseterm);
         reqtime = time.tv_sec;
-       nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred);
+       nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred, &xid);
         np = VTONFS(vp);
         nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED);
         cachable = fxdr_unsigned(int, *tl++);
@@ -917,7 +918,7 @@ nqnfs_getlease(vp, rwflag, cred, p)
         if (reqtime > time.tv_sec) {
                 fxdr_hyper(tl, &frev);
                 nqnfs_clientlease(nmp, np, rwflag, cachable, reqtime, frev);
-               nfsm_loadattr(vp, (struct vattr *)0);
+               nfsm_loadattr(vp, (struct vattr *)0, &xid);
         } else
                 error = NQNFS_EXPIRED;
         nfsm_reqdone;
diff --git a/bsd/nfs/nfs_serv.c b/bsd/nfs/nfs_serv.c

index 30b1127695a78e7e40a65e141eb202949248be0e..f03085e4e0b5b43ad3d470403be168eddb174014 100644 (file)
--- a/bsd/nfs/nfs_serv.c
+++ b/bsd/nfs/nfs_serv.c
@@ -2648,10 +2648,16 @@ again:
         io.uio_rw = UIO_READ;
         io.uio_procp = (struct proc *)0;
         eofflag = 0;
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp);
-       if (cookies) {
-               _FREE((caddr_t)cookies, M_TEMP);
-               cookies = NULL;
+
+        if (cookies) {
+                _FREE((caddr_t)cookies, M_TEMP);
+                cookies = NULL;
+        }
+       if (error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp)) {
+               FREE((caddr_t)rbuf, M_TEMP);
+               nfsm_reply(NFSX_POSTOPATTR(v3));
+               nfsm_srvpostop_attr(getret, &at);
+               return (0);
         }
         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
         off = (off_t)io.uio_offset;
@@ -2922,11 +2928,16 @@ again:
         io.uio_rw = UIO_READ;
         io.uio_procp = (struct proc *)0;
         eofflag = 0;
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp);
         if (cookies) {
-               _FREE((caddr_t)cookies, M_TEMP);
-               cookies = NULL;
-       }
+                _FREE((caddr_t)cookies, M_TEMP);
+                cookies = NULL;
+        }       
+        if (error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp)) {
+                FREE((caddr_t)rbuf, M_TEMP);
+                nfsm_reply(NFSX_V3POSTOPATTR);
+                nfsm_srvpostop_attr(getret, &at);
+                return (0);
+        }
         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
         off = (u_quad_t)io.uio_offset;
         getret = VOP_GETATTR(vp, &at, cred, procp);
diff --git a/bsd/nfs/nfs_socket.c b/bsd/nfs/nfs_socket.c

index 9c331ce9fa80ceedf6b5b79cb3ccfd46d16b2ff2..9aaf35b993d63586179fba4d2f23adfbf0c5f62a 100644 (file)
--- a/bsd/nfs/nfs_socket.c
+++ b/bsd/nfs/nfs_socket.c
@@ -95,6 +95,18 @@
  #include <nfs/nfsrtt.h>
  #include <nfs/nqnfs.h>
  
+#include <sys/kdebug.h>
+
+#define FSDBG(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_TOP(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_BOT(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
+
  #define        TRUE    1
  #define        FALSE   0
  
@@ -128,6 +140,7 @@ extern time_t nqnfsstarttime;
  extern struct nfsstats nfsstats;
  extern int nfsv3_procid[NFS_NPROCS];
  extern int nfs_ticks;
+extern u_long nfs_xidwrap;
  
  /*
   * Defines which timer to use for the procnum.
@@ -203,6 +216,11 @@ int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
  };
  #endif /* NFS_NOSERVER */
  
+/*
+ * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG)
+ * But some of this code may prove useful someday...
+ */
+#undef NFSDIAG
  #if NFSDIAG
  int nfstraceindx = 0;
  struct nfstracerec nfstracebuf[NFSTBUFSIZ] = {{0,0,0,0}};
@@ -470,8 +488,7 @@ nfs_connect(nmp, rep)
                 nmp->nm_sdrtt[3] = 0;
         nmp->nm_cwnd = NFS_MAXCWND / 2;     /* Initial send window */
         nmp->nm_sent = 0;
-       NFSTRACE4(NFSTRC_CWND_INIT, nmp, nmp->nm_flag, nmp->nm_soflags,
-                 nmp->nm_cwnd);
+       FSDBG(529, nmp, nmp->nm_flag, nmp->nm_soflags, nmp->nm_cwnd);
         nmp->nm_timeouts = 0;
         return (0);
  
@@ -956,25 +973,23 @@ nfs_reply(myrep)
                  */
                 if (myrep->r_mrep != NULL) {
                         nfs_rcvunlock(&nmp->nm_flag);
-                       NFSTRACE4(NFSTRC_RCVALREADY, myrep->r_xid, myrep,
-                                 myrep->r_nmp, 2);
+                       FSDBG(530, myrep->r_xid, myrep, myrep->r_nmp, -1);
                         return (0);
                 }
                 /*
                  * Get the next Rpc reply off the socket. Assume myrep->r_nmp
-                * is still in tact by checks done in nfs_rcvlock.
+                * is still intact by checks done in nfs_rcvlock.
                  */
                 error = nfs_receive(myrep, &nam, &mrep);
                 /*
                  * Bailout asap if nfsmount struct gone (unmounted). 
                  */
                 if (!myrep->r_nmp) {
-                       NFSTRACE4(NFSTRC_ECONN, myrep->r_xid, myrep, nmp, 2);
+                       FSDBG(530, myrep->r_xid, myrep, nmp, -2);
                         return (ECONNABORTED);
                 }
                 if (error) {
-                       NFSTRACE4(NFSTRC_RCVERR, myrep->r_xid, myrep, nmp,
-                                 error);
+                       FSDBG(530, myrep->r_xid, myrep, nmp, error);
                         nfs_rcvunlock(&nmp->nm_flag);
  
                         /*
@@ -1005,7 +1020,7 @@ nfs_reply(myrep)
                   * just check here and get out. (ekn)
                  */
                 if (!mrep) {
-                        NFSTRACE4(NFSTRC_ECONN, myrep->r_xid, myrep, nmp, 3);
+                        FSDBG(530, myrep->r_xid, myrep, nmp, -3);
                          return (ECONNABORTED); /* sounds good */
                  }
                          
@@ -1073,8 +1088,8 @@ nfsmout:
                                  * Do the additive increase of
                                  * one rpc/rtt.
                                  */
-                               NFSTRACE4(NFSTRC_CWND_REPLY, rep->r_xid, rep,
-                                         nmp->nm_sent, nmp->nm_cwnd);
+                               FSDBG(530, rep->r_xid, rep, nmp->nm_sent,
+                                     nmp->nm_cwnd);
                                 if (nmp->nm_cwnd <= nmp->nm_sent) {
                                         nmp->nm_cwnd +=
                                            (NFS_CWNDSCALE * NFS_CWNDSCALE +
@@ -1127,8 +1142,8 @@ nfsmout:
                                 panic("nfs_reply: nil r_mrep");
                         return (0);
                 }
-               NFSTRACE4(NFSTRC_NOTMINE, myrep->r_xid, myrep, rep,
-                         rep ? rep->r_xid : myrep->r_flags);
+               FSDBG(530, myrep->r_xid, myrep, rep,
+                     rep ? rep->r_xid : myrep->r_flags);
                 if (myrep->r_flags & R_GETONEREP)
                         return (0); /* this path used by NQNFS */
         }
@@ -1145,7 +1160,7 @@ nfsmout:
   * nb: always frees up mreq mbuf list
   */
  int
-nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
+nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp)
         struct vnode *vp;
         struct mbuf *mrest;
         int procnum;
@@ -1154,6 +1169,7 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
         struct mbuf **mrp;
         struct mbuf **mdp;
         caddr_t *dposp;
+       u_int64_t *xidp;
  {
         register struct mbuf *m, *mrep;
         register struct nfsreq *rep, *rp;
@@ -1173,10 +1189,12 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
         char *auth_str, *verf_str;
         NFSKERBKEY_T key;               /* save session key */
  
+       if (xidp)
+               *xidp = 0;
         nmp = VFSTONFS(vp->v_mount);
         MALLOC_ZONE(rep, struct nfsreq *,
                     sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
-       NFSTRACE4(NFSTRC_REQ, vp, procnum, nmp, rep);
+       FSDBG_TOP(531, vp, procnum, nmp, rep);
  
         /*
          * make sure if we blocked above, that the file system didn't get
@@ -1189,7 +1207,7 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
          */
  
         if (vp->v_type == VBAD) {
-               NFSTRACE4(NFSTRC_VBAD, 1, vp, nmp, rep);
+               FSDBG_BOT(531, 1, vp, nmp, rep);
                 _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
                 return (EINVAL);
         }
@@ -1220,6 +1238,7 @@ kerbauth:
                         error = nfs_getauth(nmp, rep, cred, &auth_str,
                                 &auth_len, verf_str, &verf_len, key);
                         if (error) {
+                               FSDBG_BOT(531, 2, vp, error, rep);
                                 _FREE_ZONE((caddr_t)rep,
                                         sizeof (struct nfsreq), M_NFSREQ);
                                 m_freem(mrest);
@@ -1236,6 +1255,8 @@ kerbauth:
         }
         m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
              auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
+       if (xidp)
+               *xidp = xid + ((u_int64_t)nfs_xidwrap << 32);
         if (auth_str)
                 _FREE(auth_str, M_TEMP);
  
@@ -1293,8 +1314,8 @@ tryagain:
                  */
                 if (!error) {
                         if ((rep->r_flags & R_MUSTRESEND) == 0) {
-                               NFSTRACE4(NFSTRC_CWND_REQ1, rep->r_xid, rep,
-                                         nmp->nm_sent, nmp->nm_cwnd);
+                               FSDBG(531, rep->r_xid, rep, nmp->nm_sent,
+                                     nmp->nm_cwnd);
                                 nmp->nm_sent += NFS_CWNDSCALE;
                                 rep->r_flags |= R_SENT;
                         }
@@ -1336,8 +1357,7 @@ tryagain:
          * Decrement the outstanding request count.
          */
         if (rep->r_flags & R_SENT) {
-               NFSTRACE4(NFSTRC_CWND_REQ2, rep->r_xid, rep, nmp->nm_sent,
-                         nmp->nm_cwnd);
+               FSDBG(531, rep->r_xid, rep, nmp->nm_sent, nmp->nm_cwnd);
                 rep->r_flags &= ~R_SENT;        /* paranoia */
                 nmp->nm_sent -= NFS_CWNDSCALE;
         }
@@ -1354,7 +1374,7 @@ tryagain:
         dpos = rep->r_dpos;
         if (error) {
                 m_freem(rep->r_mreq);
-               NFSTRACE4(NFSTRC_REQERR, error, rep->r_xid, nmp, rep);
+               FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
                 _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
                 return (error);
         }
@@ -1379,7 +1399,7 @@ tryagain:
                         error = EACCES;
                 m_freem(mrep);
                 m_freem(rep->r_mreq);
-               NFSTRACE4(NFSTRC_RPCERR, error, rep->r_xid, nmp, rep);
+               FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
                 _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
                 return (error);
         }
@@ -1434,8 +1454,7 @@ tryagain:
                         } else
                                 m_freem(mrep);
                         m_freem(rep->r_mreq);
-                       NFSTRACE4(NFSTRC_DISSECTERR, error, rep->r_xid, nmp,
-                                 rep);
+                       FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
                         _FREE_ZONE((caddr_t)rep,
                                    sizeof (struct nfsreq), M_NFSREQ);
                         return (error);
@@ -1463,7 +1482,7 @@ tryagain:
                 *mdp = md;
                 *dposp = dpos;
                 m_freem(rep->r_mreq);
-               NFSTRACE4(NFSTRC_REQFREE, 0xf0f0f0f0, rep->r_xid, nmp, rep);
+               FSDBG_BOT(531, 0xf0f0f0f0, rep->r_xid, nmp, rep);
                 FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
                 return (0);
         }
@@ -1471,7 +1490,7 @@ tryagain:
         error = EPROTONOSUPPORT;
  nfsmout:
         m_freem(rep->r_mreq);
-       NFSTRACE4(NFSTRC_REQFREE, error, rep->r_xid, nmp, rep);
+       FSDBG_BOT(531, error, rep->r_xid, nmp, rep);
         _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
         return (error);
  }
@@ -1645,8 +1664,8 @@ nfs_softterm(struct nfsreq *rep)
  {
         rep->r_flags |= R_SOFTTERM;
         if (rep->r_flags & R_SENT) {
-               NFSTRACE4(NFSTRC_CWND_SOFT, rep->r_xid, rep,
-                         rep->r_nmp->nm_sent, rep->r_nmp->nm_cwnd);
+               FSDBG(532, rep->r_xid, rep, rep->r_nmp->nm_sent,
+                     rep->r_nmp->nm_cwnd);
                 rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
                 rep->r_flags &= ~R_SENT;
         }
@@ -1816,8 +1835,7 @@ rescan:
                                 rep->r_flags |= R_SENT;
                                 nmp->nm_sent += NFS_CWNDSCALE;
                         }
-                       NFSTRACE4(NFSTRC_CWND_TIMER, xid, rep,
-                                 nmp->nm_sent, nmp->nm_cwnd);
+                       FSDBG(535, xid, rep, nmp->nm_sent, nmp->nm_cwnd);
  
                         thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
  
@@ -1830,7 +1848,7 @@ rescan:
  
                         thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
  
-                       NFSTRACE4(NFSTRC_CWND_TIMER, xid, error, sent, cwnd);
+                       FSDBG(535, xid, error, sent, cwnd);
                         /*
                          * This is to fix "nfs_sigintr" DSI panics.
                          * We may have slept during the send so the current
@@ -1976,24 +1994,23 @@ nfs_rcvlock(rep)
         register int *flagp = &rep->r_nmp->nm_flag;
         int slpflag, slptimeo = 0;
  
+       FSDBG_TOP(534, rep->r_xid, rep, rep->r_nmp, *flagp);
         if (*flagp & NFSMNT_INT)
                 slpflag = PCATCH;
         else
                 slpflag = 0;
         while (*flagp & NFSMNT_RCVLOCK) {
                 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) {
-                       NFSTRACE4(NFSTRC_RCVLCKINTR, rep->r_xid, rep,
-                                 rep->r_nmp, *flagp);
+                       FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x100);
                         return (EINTR);
                 } else if (rep->r_mrep != NULL) {
                         /*
                          * Don't bother sleeping if reply already arrived
                          */
-                       NFSTRACE4(NFSTRC_RCVALREADY, rep->r_xid, rep,
-                                 rep->r_nmp, 1);
+                       FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x101);
                         return (EALREADY);
                 }
-               NFSTRACE4(NFSTRC_RCVLCKW, rep->r_xid, rep, rep->r_nmp, *flagp);
+               FSDBG(534, rep->r_xid, rep, rep->r_nmp, 0x102);
                 *flagp |= NFSMNT_WANTRCV;
                 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
                               slptimeo);
@@ -2003,16 +2020,18 @@ nfs_rcvlock(rep)
                 }
                 /*
                  * Make sure while we slept that the mountpoint didn't go away.
-                * nfs_sigintr and caller nfs_reply expect it in tact.
+                * nfs_sigintr and caller nfs_reply expect it intact.
                  */
-               if (!rep->r_nmp) 
+               if (!rep->r_nmp)  {
+                       FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x103);
                         return (ECONNABORTED); /* don't have lock until out of loop */
+               }
         }
         /*
          * nfs_reply will handle it if reply already arrived.
          * (We may have slept or been preempted while on network funnel).
          */
-       NFSTRACE4(NFSTRC_RCVLCK, rep->r_xid, rep, rep->r_nmp, *flagp);
+       FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *flagp);
         *flagp |= NFSMNT_RCVLOCK;
         return (0);
  }
@@ -2025,15 +2044,13 @@ nfs_rcvunlock(flagp)
         register int *flagp;
  {
  
+       FSDBG(533, flagp, *flagp, 0, 0);
         if ((*flagp & NFSMNT_RCVLOCK) == 0)
                 panic("nfs rcvunlock");
         *flagp &= ~NFSMNT_RCVLOCK;
         if (*flagp & NFSMNT_WANTRCV) {
-               NFSTRACE(NFSTRC_RCVUNLW, flagp);
                 *flagp &= ~NFSMNT_WANTRCV;
                 wakeup((caddr_t)flagp);
-       } else {
-               NFSTRACE(NFSTRC_RCVUNL, flagp);
         }
  }
  
diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c

index 9018b50a622c9ab7baddc08459b303bbac8cf0f1..e152a0d18c3254773ca3e2bea3b034c5508d9dd4 100644 (file)
--- a/bsd/nfs/nfs_subs.c
+++ b/bsd/nfs/nfs_subs.c
@@ -104,6 +104,17 @@
  #include <netiso/iso.h>
  #endif
  
+#include <sys/kdebug.h>
+
+#define FSDBG(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_TOP(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_BOT(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
  /*
   * Data items converted to xdr at startup, since they are constant
   * This is kinda hokey, but may save a little time doing byte swaps
@@ -116,6 +127,7 @@ u_long nfs_prog, nqnfs_prog, nfs_true, nfs_false;
  
  /* And other global data */
  static u_long nfs_xid = 0;
+u_long nfs_xidwrap = 0;                /* to build a (non-wwrapping) 64 bit xid */
  static enum vtype nv2tov_type[8]= {
         VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON,  VNON 
  };
@@ -691,8 +703,10 @@ nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len,
         /*
          * Skip zero xid if it should ever happen.
          */
-       if (++nfs_xid == 0)
+       if (++nfs_xid == 0) {
+               nfs_xidwrap++;
                 nfs_xid++;
+       }
  
         *tl++ = *xidp = txdr_unsigned(nfs_xid);
         *tl++ = rpc_call;
@@ -1227,11 +1241,13 @@ nfs_init(vfsp)
   *    copy the attributes to *vaper
   */
  int
-nfs_loadattrcache(vpp, mdp, dposp, vaper)
+nfs_loadattrcache(vpp, mdp, dposp, vaper, dontshrink, xidp)
         struct vnode **vpp;
         struct mbuf **mdp;
         caddr_t *dposp;
         struct vattr *vaper;
+       int dontshrink;
+       u_int64_t *xidp;
  {
         register struct vnode *vp = *vpp;
         register struct vattr *vap;
@@ -1247,18 +1263,24 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
         struct vnode *nvp;
         int v3;
  
-        /* this routine is a good place to check for VBAD again. We caught most of them
-         * in nfsm_request, but postprocessing may indirectly get here, so check again.
-         */
-        if (vp->v_type==VBAD)
-            return (EINVAL); 
-            
-        v3 = NFS_ISV3(vp);
-       NFSTRACE(NFSTRC_LAC, vp);
+       FSDBG_TOP(527, vp, 0, *xidp >> 32, *xidp);
+       /*
+        * this routine is a good place to check for VBAD again. We caught
+        * most of them in nfsm_request, but postprocessing may indirectly get
+        * here, so check again.
+        */
+       if (vp->v_type == VBAD) {
+               FSDBG_BOT(527, EINVAL, 1, 0, *xidp);
+               return (EINVAL); 
+       }
+
+       v3 = NFS_ISV3(vp);
         md = *mdp;
         t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
-       if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)))
+       if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2))) {
+               FSDBG_BOT(527, error, 2, 0, *xidp);
                 return (error);
+       }
         fp = (struct nfs_fattr *)cp2;
         if (v3) {
                 vtyp = nfsv3tov_type(fp->fa_type);
@@ -1308,12 +1330,30 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
          * information.
          */
         np = VTONFS(vp);
+       if (*xidp < np->n_xid) {
+               /*
+                * We have already updated attributes with a response from
+                * a later request.  The attributes we have here are probably
+                * stale so we drop them (just return).  However, our 
+                * out-of-order receipt could be correct - if the requests were
+                * processed out of order at the server.  Given the uncertainty
+                * we invalidate our cached attributes.  *xidp is zeroed here
+                * to indicate the attributes were dropped - only getattr
+                * cares - it needs to retry the rpc.
+                */
+               np->n_attrstamp = 0;
+               FSDBG_BOT(527, 0, np, np->n_xid, *xidp);
+               *xidp = 0;
+               return (0);
+       }
         if (vp->v_type != vtyp) {
                 vp->v_type = vtyp;
  
                 if (UBCINFOMISSING(vp) || UBCINFORECLAIMED(vp))
-                       if (error = ubc_info_init(vp)) /* VREG */
+                       if ((error = ubc_info_init(vp))) { /* VREG */
+                               FSDBG_BOT(527, error, 3, 0, *xidp);
                                 return(error);
+                       }
  
                 if (vp->v_type == VFIFO) {
                         vp->v_op = fifo_nfsv2nodeop_p;
@@ -1342,8 +1382,9 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
                         }
                 }
                 np->n_mtime = mtime.tv_sec;
-               NFSTRACE(NFSTRC_LAC_INIT, vp);
+               FSDBG(527, vp, np->n_mtime, 0, 0);
         }
+       np->n_xid = *xidp;
         vap = &np->n_vattr;
         vap->va_type = vtyp;
         vap->va_mode = (vmode & 07777);
@@ -1378,15 +1419,15 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
                 vap->va_filerev = 0;
         }
  
+       np->n_attrstamp = time.tv_sec;
         if (vap->va_size != np->n_size) {
-               NFSTRACE4(NFSTRC_LAC_NP, vp, vap->va_size, np->n_size,
-                         (vap->va_type == VREG) |
-                         (np->n_flag & NMODIFIED ? 2 : 0));
+               FSDBG(527, vp, vap->va_size, np->n_size,
+                     (vap->va_type == VREG) |
+                     (np->n_flag & NMODIFIED ? 6 : 4));
                 if (vap->va_type == VREG) {
-                       int orig_size;
+                       int orig_size;
  
                         orig_size = np->n_size;
-
                         if (np->n_flag & NMODIFIED) {
                                 if (vap->va_size < np->n_size)
                                         vap->va_size = np->n_size;
@@ -1394,13 +1435,16 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
                                         np->n_size = vap->va_size;
                         } else
                                 np->n_size = vap->va_size;
-                       if (UBCISVALID(vp) && np->n_size > orig_size)
-                               ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
+                       if (dontshrink && UBCISVALID(vp) &&
+                           np->n_size < ubc_getsize(vp)) {
+                               vap->va_size = np->n_size = orig_size;
+                               np->n_attrstamp = 0;
+                       } else
+                               ubc_setsize(vp, (off_t)np->n_size); /* XXX */
                 } else
                         np->n_size = vap->va_size;
         }
  
-       np->n_attrstamp = time.tv_sec;
         if (vaper != NULL) {
                 bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
                 if (np->n_flag & NCHG) {
@@ -1410,6 +1454,7 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
                                 vaper->va_mtime = np->n_mtim;
                 }
         }
+       FSDBG_BOT(527, 0, np, 0, *xidp);
         return (0);
  }
  
@@ -1427,23 +1472,19 @@ nfs_getattrcache(vp, vaper)
         register struct vattr *vap;
  
         if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
-               NFSTRACE(NFSTRC_GAC_MISS, vp);
+               FSDBG(528, vp, 0, 0, 1);
                 nfsstats.attrcache_misses++;
                 return (ENOENT);
         }
-       NFSTRACE(NFSTRC_GAC_HIT, vp);
+       FSDBG(528, vp, 0, 0, 2);
         nfsstats.attrcache_hits++;
         vap = &np->n_vattr;
  
         if (vap->va_size != np->n_size) {
-               NFSTRACE4(NFSTRC_GAC_NP, vp, vap->va_size, np->n_size,
-                         (vap->va_type == VREG) |
-                         (np->n_flag & NMODIFIED ? 2 : 0));
+               FSDBG(528, vp, vap->va_size, np->n_size,
+                     (vap->va_type == VREG) |
+                     (np->n_flag & NMODIFIED ? 6 : 4));
                 if (vap->va_type == VREG) {
-                       int orig_size;
-
-                       orig_size = np->n_size;
-
                         if (np->n_flag & NMODIFIED) {
                                 if (vap->va_size < np->n_size)
                                         vap->va_size = np->n_size;
@@ -1451,8 +1492,7 @@ nfs_getattrcache(vp, vaper)
                                         np->n_size = vap->va_size;
                         } else
                                 np->n_size = vap->va_size;
-                       if (UBCISVALID(vp) && np->n_size > orig_size)
-                               ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
+                       ubc_setsize(vp, (off_t)np->n_size); /* XXX */
                 } else
                         np->n_size = vap->va_size;
         }
diff --git a/bsd/nfs/nfs_vfsops.c b/bsd/nfs/nfs_vfsops.c

index 9c1c7caadd8cfcdc847d7955f962d82e0a265de8..8281df11136ae95dde50c3cac86999538049fa29 100644 (file)
--- a/bsd/nfs/nfs_vfsops.c
+++ b/bsd/nfs/nfs_vfsops.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
@@ -57,14 +57,6 @@
   *
   *     @(#)nfs_vfsops.c        8.12 (Berkeley) 5/20/95
   * FreeBSD-Id: nfs_vfsops.c,v 1.52 1997/11/12 05:42:21 julian Exp $
- *
- *  History:
- *
- *
- *  23-May-97  Umesh Vaishampayan  (umeshv@apple.com)
- *     Added the ability to mount "/private" separately.
- *     Fixed bug which caused incorrect reporting of "mounted on"
- *     directory name in case of nfs root.
   */
  
  #include <sys/param.h>
@@ -228,12 +220,8 @@ static int nfs_iosize(nmp)
          * space.
          */
         iosize = max(nmp->nm_rsize, nmp->nm_wsize);
-       if (iosize < PAGE_SIZE) iosize = PAGE_SIZE;
-#if 0
-       /* XXX UPL changes for UBC do not support multiple pages */
-       iosize = PAGE_SIZE; /* XXX FIXME */
-#endif
-        /* return iosize; */
+       if (iosize < PAGE_SIZE)
+               iosize = PAGE_SIZE;
         return (trunc_page(iosize));
  }
  
@@ -282,12 +270,14 @@ nfs_statfs(mp, sbp, p)
         struct ucred *cred;
         u_quad_t tquad;
         extern int nfs_mount_type;
+       u_int64_t xid;
  
  #ifndef nolint
         sfp = (struct nfs_statfs *)0;
  #endif
         vp = nmp->nm_dvp;
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+       if (error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p))
+               return(error);
         cred = crget();
         cred->cr_ngroups = 1;
         if (v3 && (nmp->nm_flag & NFSMNT_GOTFSINFO) == 0)
@@ -295,9 +285,9 @@ nfs_statfs(mp, sbp, p)
         nfsstats.rpccnt[NFSPROC_FSSTAT]++;
         nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
         nfsm_fhtom(vp, v3);
-       nfsm_request(vp, NFSPROC_FSSTAT, p, cred);
+       nfsm_request(vp, NFSPROC_FSSTAT, p, cred, &xid);
         if (v3)
-               nfsm_postop_attr(vp, retattr);
+               nfsm_postop_attr(vp, retattr, &xid);
         nfsm_dissect(sfp, struct nfs_statfs *, NFSX_STATFS(v3));
  
  /* XXX CSM 12/2/97 Cleanup when/if we integrate FreeBSD mount.h */
@@ -355,12 +345,13 @@ nfs_fsinfo(nmp, vp, cred, p)
         caddr_t bpos, dpos, cp2;
         int error = 0, retattr;
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+       u_int64_t xid;
  
         nfsstats.rpccnt[NFSPROC_FSINFO]++;
         nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
         nfsm_fhtom(vp, 1);
-       nfsm_request(vp, NFSPROC_FSINFO, p, cred);
-       nfsm_postop_attr(vp, retattr);
+       nfsm_request(vp, NFSPROC_FSINFO, p, cred, &xid);
+       nfsm_postop_attr(vp, retattr, &xid);
         if (!error) {
                 nfsm_dissect(fsp, struct nfsv3_fsinfo *, NFSX_V3FSINFO);
                 pref = fxdr_unsigned(u_long, fsp->fs_wtpref);
@@ -562,8 +553,8 @@ nfs_mount_diskless_private(ndmntp, mntname, mntflag, vpp, mpp)
                 /* Get the vnode for '/'. Set fdp->fd_cdir to reference it. */
                 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
                         panic("cannot find root vnode");
+               VREF(rootvnode);
                 fdp->fd_cdir = rootvnode;
-               VREF(fdp->fd_cdir);
                 VOP_UNLOCK(rootvnode, 0, procp);
                 fdp->fd_rdir = NULL;
         }
@@ -947,13 +938,8 @@ nfs_unmount(mp, mntflags, p)
          * - Decrement reference on the vnode representing remote root.
          * - Close the socket
          * - Free up the data structures
-        *
-        * We need to decrement the ref. count on the nfsnode representing
-        * the remote root.  See comment in mountnfs().  The VFS unmount()
-        * has done vput on this vnode, otherwise we would get deadlock!
          */
         vp = nmp->nm_dvp;
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
         
         /*
          * Must handshake with nqnfs_clientd() if it is active.
@@ -968,10 +954,9 @@ nfs_unmount(mp, mntflags, p)
          */
         error = vflush(mp, vp, SKIPSWAP | flags);
         if (mntflags & MNT_FORCE) 
-               error = vflush(mp, NULLVP, flags);
+               error = vflush(mp, NULLVP, flags); /* locks vp in the process */
         else {
                 if (vp->v_usecount > 1) {
-                       VOP_UNLOCK(vp, 0, p);
                         nmp->nm_flag &= ~NFSMNT_DISMINPROG;
                         return (EBUSY);
                 }
@@ -979,7 +964,6 @@ nfs_unmount(mp, mntflags, p)
         }
  
         if (error) {
-               VOP_UNLOCK(vp, 0, p);
                 nmp->nm_flag &= ~NFSMNT_DISMINPROG;
                 return (error);
         }
@@ -993,10 +977,11 @@ nfs_unmount(mp, mntflags, p)
  
         /*
          * Release the root vnode reference held by mountnfs()
-        * Note: vflush would have done the vgone for us if we
-        * didn't skip over it due to mount reference held.
+        * vflush did the vgone for us when we didn't skip over
+        * it in the MNT_FORCE case. (Thus vp can't be locked when
+        * called vflush in non-skip vp case.)
          */
-       vput(vp);
+       vrele(vp);
         if (!(mntflags & MNT_FORCE))
                 vgone(vp);
         mp->mnt_data = 0; /* don't want to end up using stale vp */
@@ -1076,6 +1061,7 @@ loop:
         for (vp = mp->mnt_vnodelist.lh_first;
              vp != NULL;
              vp = vp->v_mntvnodes.le_next) {
+                int didhold = 0;
                 /*
                  * If the vnode that we are about to sync is no longer
                  * associated with this mount point, start over.
@@ -1086,10 +1072,14 @@ loop:
                         continue;
                 if (vget(vp, LK_EXCLUSIVE, p))
                         goto loop;
+               didhold = ubc_hold(vp);
                 error = VOP_FSYNC(vp, cred, waitfor, p);
                 if (error)
                         allerror = error;
-               vput(vp);
+               VOP_UNLOCK(vp, 0, p);
+               if (didhold)
+                       ubc_rele(vp);
+               vrele(vp);
         }
         return (allerror);
  }
diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c

index a34b11003181df6de395b3de42851722aa0fea77..8b64b7d245d393da577cf9dbaada02407b9deb41 100644 (file)
--- a/bsd/nfs/nfs_vnops.c
+++ b/bsd/nfs/nfs_vnops.c
@@ -63,7 +63,6 @@
  /*
   * vnode op calls for Sun NFS version 2 and 3
   */
-
  #include <sys/param.h>
  #include <sys/kernel.h>
  #include <sys/systm.h>
@@ -111,6 +110,16 @@
  
  #include <sys/kdebug.h>
  
+#define FSDBG(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_TOP(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
+#define FSDBG_BOT(A, B, C, D, E) \
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
+               (int)(B), (int)(C), (int)(D), (int)(E), 0)
+
  #define        TRUE    1
  #define        FALSE   0
  
@@ -523,21 +532,22 @@ nfs3_access_otw(struct vnode *vp,
         register caddr_t cp;
         u_int32_t rmode;
         struct nfsnode *np = VTONFS(vp);
+       u_int64_t xid;
  
         nfsstats.rpccnt[NFSPROC_ACCESS]++;   
         nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
         nfsm_fhtom(vp, v3);
         nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
         *tl = txdr_unsigned(wmode);
-       nfsm_request(vp, NFSPROC_ACCESS, p, cred);
-       nfsm_postop_attr(vp, attrflag);
+       nfsm_request(vp, NFSPROC_ACCESS, p, cred, &xid);
+       nfsm_postop_attr(vp, attrflag, &xid);
         if (!error) {
                 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
                 rmode = fxdr_unsigned(u_int32_t, *tl);
                 np->n_mode = rmode;
                 np->n_modeuid = cred->cr_uid;
                 np->n_modestamp = time_second;
-               }
+       }
         nfsm_reqdone;
         return error;
  }
@@ -578,21 +588,21 @@ nfs_access(ap)
                         mode = 0;
                 if (vp->v_type == VDIR) {
                         if (ap->a_mode & VWRITE)
-                               mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
-                                        NFSV3ACCESS_DELETE);
+                               mode |= NFSV3ACCESS_MODIFY |
+                                       NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE;
                         if (ap->a_mode & VEXEC)
                                 mode |= NFSV3ACCESS_LOOKUP;
                 } else {
                         if (ap->a_mode & VWRITE)
-                               mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
+                               mode |= NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND;
                         if (ap->a_mode & VEXEC)
                                 mode |= NFSV3ACCESS_EXECUTE;
                 }
                 /* XXX safety belt, only make blanket request if caching */
                 if (nfsaccess_cache_timeout > 0) {
                         wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
-                                       NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
-                                       NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
+                               NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
+                               NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
                 } else
                         wmode = mode;
                  
@@ -600,9 +610,9 @@ nfs_access(ap)
                  * Does our cached result allow us to give a definite yes to
                  * this request?
                  */     
-               if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
-                       (ap->a_cred->cr_uid == np->n_modeuid) &&
-                       ((np->n_mode & mode) == mode)) {
+               if (time_second < np->n_modestamp + nfsaccess_cache_timeout &&
+                   ap->a_cred->cr_uid == np->n_modeuid &&
+                   (np->n_mode & mode) == mode) {
                         /* nfsstats.accesscache_hits++; */
                 } else {
                         /*
@@ -613,8 +623,8 @@ nfs_access(ap)
                         if (!error) {
                                 if ((np->n_mode & mode) != mode)
                                         error = EACCES;
-                                       }
-                               }
+                       }
+               }
         } else
                 return (nfsspec_access(ap)); /* NFSv2 case checks for EROFS here */
         /*
@@ -631,8 +641,8 @@ nfs_access(ap)
                                 error = EROFS;
                         default: 
                                 break;
-                       }
                 }
+       }
         return (error);
  }
  
@@ -644,6 +654,7 @@ nfs_access(ap)
   * if consistency is lost.
   */
  /* ARGSUSED */
+
  static int
  nfs_open(ap)
         struct vop_open_args /* {
@@ -659,10 +670,9 @@ nfs_open(ap)
         struct vattr vattr;
         int error;
  
-       if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
-{ printf("open eacces vtyp=%d\n",vp->v_type);
+       if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
                 return (EACCES);
-}
+       }
         /*
          * Get a valid lease. If cached data is stale, flush it.
          */
@@ -769,8 +779,14 @@ nfs_close(ap)
             if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
                 (np->n_flag & NMODIFIED)) {
                 if (NFS_ISV3(vp)) {
-                   error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0);
-                   np->n_flag &= ~NMODIFIED;
+                   error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 1);
+                    /*
+                     * We cannot clear the NMODIFIED bit in np->n_flag due to
+                     * potential races with other processes (and because
+                     * the commit arg is 0 in the nfs_flush call above.)
+                    * NMODIFIED is a hint
+                     */
+                   /* np->n_flag &= ~NMODIFIED; */
                 } else
                     error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
                 np->n_attrstamp = 0;
@@ -804,65 +820,72 @@ nfs_getattr(ap)
         int error = 0;
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         int v3 = NFS_ISV3(vp);
+       u_int64_t xid;
+       int avoidfloods;
         
+       FSDBG_TOP(513, np->n_size, np, np->n_vattr.va_size, np->n_flag);
         /*
          * Update local times for special files.
          */
         if (np->n_flag & (NACC | NUPD))
                 np->n_flag |= NCHG;
-
-       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_START,
-                    (int)np->n_size, 0, (int)np->n_vattr.va_size, np->n_flag, 0);
-
         /*
          * First look in the cache.
          */
         if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0) {
-               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_END,
-                            (int)np->n_size, 0, (int)np->n_vattr.va_size, np->n_flag, 0);
-
+               FSDBG_BOT(513, np->n_size, 0, np->n_vattr.va_size, np->n_flag);
                 return (0);
         }
-       if (error != ENOENT)
+       if (error != ENOENT) {
+               FSDBG_BOT(513, np->n_size, error, np->n_vattr.va_size,
+                         np->n_flag);
                 return (error);
+       }
         error = 0;
-        
+
         if (v3 && nfsaccess_cache_timeout > 0) {
                 /*  nfsstats.accesscache_misses++; */
-               if (error = nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred))
-                    return (error);
+               if (error = nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p,
+                                           ap->a_cred))
+                       return (error);
                 if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0)
                         return (0);
                 if (error != ENOENT)
                         return (error);
                 error = 0;
         }
-
+       avoidfloods = 0;
+tryagain:
         nfsstats.rpccnt[NFSPROC_GETATTR]++;
         nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
         nfsm_fhtom(vp, v3);
-       nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
+       nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred, &xid);
         if (!error) {
-               nfsm_loadattr(vp, ap->a_vap);
+               nfsm_loadattr(vp, ap->a_vap, &xid);
+               if (!xid) { /* out-of-order rpc - attributes were dropped */
+                       m_freem(mrep);
+                       FSDBG(513, -1, np, np->n_xid << 32, np->n_xid);
+                       if (avoidfloods++ < 100)
+                               goto tryagain;
+                       /*
+                        * avoidfloods>1 is bizarre.  at 100 pull the plug
+                        */
+                       panic("nfs_getattr: getattr flood\n");
+               }
                 if (np->n_mtime != ap->a_vap->va_mtime.tv_sec) {
-                       NFSTRACE(NFSTRC_GA_INV, vp);
+                       FSDBG(513, -1, np, -1, vp);
                         if (vp->v_type == VDIR)
                                 nfs_invaldir(vp);
                         error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
                                               ap->a_p, 1);
-                       if (!error) {
-                               NFSTRACE(NFSTRC_GA_INV1, vp);
+                       FSDBG(513, -1, np, -2, error);
+                       if (!error)
                                 np->n_mtime = ap->a_vap->va_mtime.tv_sec;
-                       } else {
-                               NFSTRACE(NFSTRC_GA_INV2, error);
-                       }
                 }
         }
         nfsm_reqdone;
  
-       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_END,
-                    (int)np->n_size, -1, (int)np->n_vattr.va_size, error, 0);
-
+       FSDBG_BOT(513, np->n_size, -1, np->n_vattr.va_size, error);
         return (error);
  }
  
@@ -888,6 +911,15 @@ nfs_setattr(ap)
  #ifndef nolint
         tsize = (u_quad_t)0;
  #endif
+
+#ifdef XXX /* enable this code soon! (but test it first) */
+       /*                
+        * Setting of flags is not supported.
+        */
+       if (vap->va_flags != VNOVAL)
+               return (EOPNOTSUPP);
+#endif
+
         /*
          * Disallow write attempts if the filesystem is mounted read-only.
          */
@@ -919,72 +951,63 @@ nfs_setattr(ap)
                          */
                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
                                 return (EROFS);
-                       np->n_flag |= NMODIFIED;
-                       tsize = np->n_size;
-                       
-                       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_START,
-                                    (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, np->n_flag, 0);
-
-                       if (vap->va_size == 0)
-                               error = nfs_vinvalbuf(vp, 0,
-                                       ap->a_cred, ap->a_p, 1);
-                       else
-                               error = nfs_vinvalbuf(vp, V_SAVE,
-                                       ap->a_cred, ap->a_p, 1);
-
-                       if (UBCISVALID(vp))
-                               ubc_setsize(vp, (off_t)vap->va_size); /* XXX check error */
-
-                       if (error) {
-                               printf("nfs_setattr: nfs_vinvalbuf %d\n", error);
-
-#if DIAGNOSTIC
-                               kprintf("nfs_setattr: nfs_vinvalbuf %d\n",
-                                       error);
-#endif /* DIAGNOSTIC */
-                               if (UBCISVALID(vp))
-                                       ubc_setsize(vp, (off_t)tsize); /* XXX check error */
-
-                               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_END,
-                                            (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, -1, 0);
-
-                               return (error);
+                       FSDBG_TOP(512, np->n_size, vap->va_size,
+                                 np->n_vattr.va_size, np->n_flag);
+                       if (np->n_flag & NMODIFIED) {
+                               if (vap->va_size == 0)
+                                       error = nfs_vinvalbuf(vp, 0,
+                                               ap->a_cred, ap->a_p, 1);
+                               else
+                                       error = nfs_vinvalbuf(vp, V_SAVE,
+                                               ap->a_cred, ap->a_p, 1);
+                               if (error) {
+                                       printf("nfs_setattr: nfs_vinvalbuf %d\n", error);
+                                       FSDBG_BOT(512, np->n_size, vap->va_size,
+                                                 np->n_vattr.va_size, -1);
+                                       return (error);
+                               }
+                       } else if (np->n_size > vap->va_size) { /* shrinking? */
+                               daddr_t obn, bn;
+                               int biosize;
+                               struct buf *bp;
+
+                               biosize = min(vp->v_mount->mnt_stat.f_iosize,
+                                             PAGE_SIZE);
+                               obn = (np->n_size - 1) / biosize;
+                               bn = vap->va_size / biosize; 
+                               for ( ; obn >= bn; obn--)
+                                       if (incore(vp, obn)) {
+                                               bp = getblk(vp, obn, biosize, 0,
+                                                           0, BLK_READ);
+                                               FSDBG(512, bp, bp->b_flags,
+                                                     0, obn);
+                                               SET(bp->b_flags, B_INVAL);
+                                               brelse(bp);
+                                       }
                         }
+                       tsize = np->n_size;
                         np->n_size = np->n_vattr.va_size = vap->va_size;
-
+                       ubc_setsize(vp, (off_t)vap->va_size); /* XXX */
                 };
         } else if ((vap->va_mtime.tv_sec != VNOVAL ||
-                   vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
-                  vp->v_type == VREG &&
+                   vap->va_atime.tv_sec != VNOVAL) &&
+                  (np->n_flag & NMODIFIED) && vp->v_type == VREG &&
                    (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
                                           ap->a_p, 1)) == EINTR)
-               return (error);
-
+               return (error);
         error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
-
-       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_END,
-                    (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, error, 0);
-
+       FSDBG_BOT(512, np->n_size, vap->va_size, np->n_vattr.va_size, error);
         if (error && vap->va_size != VNOVAL) {
                 /* make every effort to resync file size w/ server... */
                 int err = 0; /* preserve "error" for return */
  
                 printf("nfs_setattr: nfs_setattrrpc %d\n", error);
-#if DIAGNOSTIC
-               kprintf("nfs_setattr: nfs_setattrrpc %d\n", error);
-#endif /* DIAGNOSTIC */
                 np->n_size = np->n_vattr.va_size = tsize;
-               if (UBCISVALID(vp))
-                       ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
+               ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
                 vap->va_size = tsize;
                 err = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
-
                 if (err)
                         printf("nfs_setattr1: nfs_setattrrpc %d\n", err);
-#if DIAGNOSTIC
-               if (err)
-                       kprintf("nfs_setattr nfs_setattrrpc %d\n", err);
-#endif /* DIAGNOSTIC */
         }
         return (error);
  }
@@ -1007,6 +1030,7 @@ nfs_setattrrpc(vp, vap, cred, procp)
         int error = 0, wccflag = NFSV3_WCCRATTR;
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         int v3 = NFS_ISV3(vp);
+       u_int64_t xid;
  
         nfsstats.rpccnt[NFSPROC_SETATTR]++;
         nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
@@ -1090,13 +1114,13 @@ nfs_setattrrpc(vp, vap, cred, procp)
                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
         }
-       nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
+       nfsm_request(vp, NFSPROC_SETATTR, procp, cred, &xid);
         if (v3) {
-               nfsm_wcc_data(vp, wccflag);
-               if ((!wccflag) && (vp->v_type != VBAD)) /* EINVAL set on VBAD vnode */
-                       VTONFS(vp)->n_attrstamp = 0;
+               nfsm_wcc_data(vp, wccflag, &xid);
+               if (!wccflag && vp->v_type != VBAD) /* EINVAL on VBAD node */
+                       VTONFS(vp)->n_attrstamp = 0;
         } else
-               nfsm_loadattr(vp, (struct vattr *)0);
+               nfsm_loadattr(vp, (struct vattr *)0, &xid);
         nfsm_reqdone;
         return (error);
  }
@@ -1133,6 +1157,7 @@ nfs_lookup(ap)
         int v3 = NFS_ISV3(dvp);
         struct proc *p = cnp->cn_proc;
         int worldbuildworkaround = 1;
+       u_int64_t xid;
  
         if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
             (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
@@ -1144,7 +1169,7 @@ nfs_lookup(ap)
         wantparent = flags & (LOCKPARENT|WANTPARENT);
         nmp = VFSTONFS(dvp->v_mount);
         np = VTONFS(dvp);
-        
+
         if (worldbuildworkaround) {
                 /* 
                  * Temporary workaround for world builds to not have dvp go
@@ -1169,7 +1194,7 @@ nfs_lookup(ap)
                         *vpp = NULLVP;
                         goto error_return;
                         }
-                
+
                 /* got to check to make sure the vnode didn't go away if access went to server */
                 if ((*vpp)->v_type == VBAD) {
                         error = EINVAL;
@@ -1216,7 +1241,7 @@ nfs_lookup(ap)
                 if (error) 
                         goto error_return;
         }
-        
+
         /* 
          * Got to check to make sure the vnode didn't go away if VOP_GETATTR went to server
          * or callers prior to this blocked and had it go VBAD.
@@ -1236,10 +1261,10 @@ nfs_lookup(ap)
         nfsm_fhtom(dvp, v3);
         nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
         /* nfsm_request for NFSv2 causes you to goto to nfsmout upon errors */
-       nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred); 
+       nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred, &xid); 
  
         if (error) {
-               nfsm_postop_attr(dvp, attrflag);
+               nfsm_postop_attr(dvp, attrflag, &xid);
                 m_freem(mrep);
                 goto nfsmout;
         }
@@ -1260,10 +1285,12 @@ nfs_lookup(ap)
                 }
                 newvp = NFSTOV(np);
                 if (v3) {
-                       nfsm_postop_attr(newvp, attrflag);
-                       nfsm_postop_attr(dvp, attrflag);
+                       u_int64_t dxid = xid;
+
+                       nfsm_postop_attr(newvp, attrflag, &xid);
+                       nfsm_postop_attr(dvp, attrflag, &dxid);
                 } else
-                       nfsm_loadattr(newvp, (struct vattr *)0);
+                       nfsm_loadattr(newvp, (struct vattr *)0, &xid);
                 *vpp = newvp;
                 m_freem(mrep);
                 cnp->cn_flags |= SAVENAME;
@@ -1299,10 +1326,12 @@ nfs_lookup(ap)
                 newvp = NFSTOV(np);
         }
         if (v3) {
-               nfsm_postop_attr(newvp, attrflag);
-               nfsm_postop_attr(dvp, attrflag);
+               u_int64_t dxid = xid;
+
+               nfsm_postop_attr(newvp, attrflag, &xid);
+               nfsm_postop_attr(dvp, attrflag, &dxid);
         } else
-               nfsm_loadattr(newvp, (struct vattr *)0);
+               nfsm_loadattr(newvp, (struct vattr *)0, &xid);
         if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
                 cnp->cn_flags |= SAVENAME;
         if ((cnp->cn_flags & MAKEENTRY) &&
@@ -1364,6 +1393,7 @@ nfs_read(ap)
         return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0));
  }
  
+
  /*
   * nfs readlink call
   */
@@ -1399,17 +1429,18 @@ nfs_readlinkrpc(vp, uiop, cred)
         int error = 0, len, attrflag;
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         int v3 = NFS_ISV3(vp);
+       u_int64_t xid;
  
         nfsstats.rpccnt[NFSPROC_READLINK]++;
         nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
         nfsm_fhtom(vp, v3);
-       nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
+       nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred, &xid);
         if (v3)
-               nfsm_postop_attr(vp, attrflag);
+               nfsm_postop_attr(vp, attrflag, &xid);
         if (!error) {
                 nfsm_strsiz(len, NFS_MAXPATHLEN);
-               if (len == NFS_MAXPATHLEN) {
-                        struct nfsnode *np = VTONFS(vp);
+               if (len == NFS_MAXPATHLEN) {
+                       struct nfsnode *np = VTONFS(vp);
  #if DIAGNOSTIC
                          if (!np)
                                  panic("nfs_readlinkrpc: null np");
@@ -1441,13 +1472,15 @@ nfs_readrpc(vp, uiop, cred)
         struct nfsmount *nmp;
         int error = 0, len, retlen, tsiz, eof, attrflag;
         int v3 = NFS_ISV3(vp);
+       u_int64_t xid;
  
  #ifndef nolint
         eof = 0;
  #endif
         nmp = VFSTONFS(vp->v_mount);
         tsiz = uiop->uio_resid;
-        if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && !v3)
+        if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) &&
+            !v3)
                 return (EFBIG);
         while (tsiz > 0) {
                 nfsstats.rpccnt[NFSPROC_READ]++;
@@ -1463,9 +1496,9 @@ nfs_readrpc(vp, uiop, cred)
                         *tl++ = txdr_unsigned(len);
                         *tl = 0;
                 }
-               nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
+               nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred, &xid);
                 if (v3) {
-                       nfsm_postop_attr(vp, attrflag);
+                       nfsm_postop_attr(vp, attrflag, &xid);
                         if (error) {
                                 m_freem(mrep);
                                 goto nfsmout;
@@ -1473,7 +1506,7 @@ nfs_readrpc(vp, uiop, cred)
                         nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
                         eof = fxdr_unsigned(int, *(tl + 1));
                 } else
-                       nfsm_loadattr(vp, (struct vattr *)0);
+                       nfsm_loadattr(vp, (struct vattr *)0, &xid);
                 nfsm_strsiz(retlen, nmp->nm_rsize);
                 nfsm_mtouio(uiop, retlen);
                 m_freem(mrep);
@@ -1506,6 +1539,7 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit)
         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
         int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
         int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
+       u_int64_t xid;
  
  #if DIAGNOSTIC
         if (uiop->uio_iovcnt != 1)
@@ -1534,10 +1568,10 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit)
                 }
                 *tl = txdr_unsigned(len);
                 nfsm_uiotom(uiop, len);
-               nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
+               nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred, &xid);
                 if (v3) {
                         wccflag = NFSV3_WCCCHK;
-                       nfsm_wcc_data(vp, wccflag);
+                       nfsm_wcc_data(vp, wccflag, &xid);
                         if (!error) {
                                 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED +
                                         NFSX_V3WRITEVERF);
@@ -1576,23 +1610,29 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit)
                                 }
                         }
                 } else
-                   nfsm_loadattr(vp, (struct vattr *)0);
-               if ((wccflag) && (vp->v_type != VBAD))  /* EINVAL set on VBAD vnode */
+                   nfsm_loadattr(vp, (struct vattr *)0, &xid);
+
+               if (wccflag && vp->v_type != VBAD) /* EINVAL set on VBAD node */
                     VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
                 m_freem(mrep);
-                /*
-                 * we seem to have a case where we end up looping on shutdown and taking down nfs servers.
-                 * For V3, error cases, there is no way to terminate loop, if the len was 0, meaning,
-                 * nmp->nm_wsize was trashed. FreeBSD has this fix in it. Let's try it.
-                 */
-                if (error)
-                    break;
-                tsiz -= len;
+               /*
+                * we seem to have a case where we end up looping on shutdown
+                * and taking down nfs servers.  For V3, error cases, there is
+                * no way to terminate loop, if the len was 0, meaning,
+                * nmp->nm_wsize was trashed. FreeBSD has this fix in it.
+                * Let's try it.
+                */
+               if (error)
+                       break;
+               tsiz -= len;
         }
  nfsmout:
-        /* does it make sense to even say it was committed if we had an error? EKN */
-        /* okay well just don't on bad vnodes then. EINVAL will be returned on bad vnodes */
-        if ((vp->v_type != VBAD) && (vp->v_mount->mnt_flag & MNT_ASYNC))
+        /* EKN
+         * does it make sense to even say it was committed if we had an error?
+         * okay well just don't on bad vnodes then.  EINVAL will be
+         * returned on bad vnodes
+         */
+        if (vp->v_type != VBAD && (vp->v_mount->mnt_flag & MNT_ASYNC))
                 committed = NFSV3WRITE_FILESYNC;
          *iomode = committed;
         if (error)
@@ -1625,6 +1665,7 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
         int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         u_long rdev;
+       u_int64_t xid;
         int v3 = NFS_ISV3(dvp);
  
         if (vap->va_type == VCHR || vap->va_type == VBLK)
@@ -1665,9 +1706,9 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
         }
-       nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
+       nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred, &xid);
         if (!error) {
-               nfsm_mtofh(dvp, newvp, v3, gotvp);
+               nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
                 if (!gotvp) {
                         if (newvp) {
                                 vput(newvp);
@@ -1680,7 +1721,7 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
                 }
         }
         if (v3)
-               nfsm_wcc_data(dvp, wccflag);
+               nfsm_wcc_data(dvp, wccflag, &xid);
         nfsm_reqdone;
         if (error) {
                 if (newvp)
@@ -1691,11 +1732,11 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
                 *vpp = newvp;
         }
         FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
-        if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
-            VTONFS(dvp)->n_flag |= NMODIFIED;
-            if (!wccflag)
-               VTONFS(dvp)->n_attrstamp = 0;
-            }
+       if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
+               VTONFS(dvp)->n_flag |= NMODIFIED;
+               if (!wccflag)
+                       VTONFS(dvp)->n_attrstamp = 0;
+       }
         vput(dvp);
         return (error);
  }
@@ -1718,8 +1759,9 @@ nfs_mknod(ap)
         int error;
  
         error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap);
-       if (!error)
+       if (!error && newvp)
                 vput(newvp);
+       *ap->a_vpp = 0;
         return (error);
  }
  
@@ -1751,6 +1793,7 @@ nfs_create(ap)
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         struct vattr vattr;
         int v3 = NFS_ISV3(dvp);
+       u_int64_t xid;
  
         /*
          * Oops, not for me..
@@ -1796,9 +1839,9 @@ again:
                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
         }
-       nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+       nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred, &xid);
         if (!error) {
-               nfsm_mtofh(dvp, newvp, v3, gotvp);
+               nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
                 if (!gotvp) {
                         if (newvp) {
                                 vput(newvp);
@@ -1811,7 +1854,7 @@ again:
                 }
         }
         if (v3)
-               nfsm_wcc_data(dvp, wccflag);
+               nfsm_wcc_data(dvp, wccflag, &xid);
         nfsm_reqdone;
         if (error) {
                 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
@@ -1889,7 +1932,8 @@ nfs_remove(ap)
                  * unnecessary delayed writes later.
                  */
                 error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
-               ubc_setsize(vp, (off_t)0);
+               np->n_size = 0;
+               ubc_setsize(vp, (off_t)0); /* XXX check error */
                 /* Do the rpc */
                 if (error != EINTR)
                         error = nfs_removerpc(dvp, cnp->cn_nameptr,
@@ -1951,21 +1995,22 @@ nfs_removerpc(dvp, name, namelen, cred, proc)
         int error = 0, wccflag = NFSV3_WCCRATTR;
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         int v3 = NFS_ISV3(dvp);
+       u_int64_t xid;
  
         nfsstats.rpccnt[NFSPROC_REMOVE]++;
         nfsm_reqhead(dvp, NFSPROC_REMOVE,
                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
         nfsm_fhtom(dvp, v3);
         nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
-       nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
+       nfsm_request(dvp, NFSPROC_REMOVE, proc, cred, &xid);
         if (v3)
-               nfsm_wcc_data(dvp, wccflag);
+               nfsm_wcc_data(dvp, wccflag, &xid);
         nfsm_reqdone;
         if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
                 VTONFS(dvp)->n_flag |= NMODIFIED;
                 if (!wccflag)
                         VTONFS(dvp)->n_attrstamp = 0;
-               }
+       }
         return (error);
  }
  
@@ -2075,30 +2120,33 @@ nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
         int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         int v3 = NFS_ISV3(fdvp);
+       u_int64_t xid;
  
         nfsstats.rpccnt[NFSPROC_RENAME]++;
         nfsm_reqhead(fdvp, NFSPROC_RENAME,
-               (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
-               nfsm_rndup(tnamelen));
+                    (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
+                     nfsm_rndup(tnamelen));
         nfsm_fhtom(fdvp, v3);
         nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
         nfsm_fhtom(tdvp, v3);
         nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
-       nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
+       nfsm_request(fdvp, NFSPROC_RENAME, proc, cred, &xid);
         if (v3) {
-               nfsm_wcc_data(fdvp, fwccflag);
-               nfsm_wcc_data(tdvp, twccflag);
+               u_int64_t txid = xid;
+
+               nfsm_wcc_data(fdvp, fwccflag, &xid);
+               nfsm_wcc_data(tdvp, twccflag, &txid);
         }
         nfsm_reqdone;
-        if (fdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
-            VTONFS(fdvp)->n_flag |= NMODIFIED;
-            if (!fwccflag)
-               VTONFS(fdvp)->n_attrstamp = 0;
-        }
-        if (tdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
-            VTONFS(tdvp)->n_flag |= NMODIFIED;
-            if (!twccflag)
-                    VTONFS(tdvp)->n_attrstamp = 0;
+       if (fdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
+               VTONFS(fdvp)->n_flag |= NMODIFIED;
+               if (!fwccflag)
+                       VTONFS(fdvp)->n_attrstamp = 0;
+       }
+       if (tdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
+               VTONFS(tdvp)->n_flag |= NMODIFIED;
+               if (!twccflag)
+                       VTONFS(tdvp)->n_attrstamp = 0;
          }
         return (error);
  }
@@ -2124,6 +2172,7 @@ nfs_link(ap)
         int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         int v3 = NFS_ISV3(vp);
+       u_int64_t xid;
  
         if (vp->v_mount != tdvp->v_mount) {
                 VOP_ABORTOP(vp, cnp);
@@ -2147,18 +2196,20 @@ nfs_link(ap)
         nfsm_fhtom(vp, v3);
         nfsm_fhtom(tdvp, v3);
         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
-       nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
+       nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred, &xid);
         if (v3) {
-               nfsm_postop_attr(vp, attrflag);
-               nfsm_wcc_data(tdvp, wccflag);
+               u_int64_t txid = xid;
+
+               nfsm_postop_attr(vp, attrflag, &xid);
+               nfsm_wcc_data(tdvp, wccflag, &txid);
         }
         nfsm_reqdone;
         FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
  
         VTONFS(tdvp)->n_flag |= NMODIFIED;
-       if ((!attrflag) && (vp->v_type != VBAD))  /* EINVAL set on VBAD vnode */
+       if (!attrflag && vp->v_type != VBAD)  /* EINVAL set on VBAD vnode */
                 VTONFS(vp)->n_attrstamp = 0;
-       if ((!wccflag) && (tdvp->v_type != VBAD))  /* EINVAL set on VBAD vnode */
+       if (!wccflag && tdvp->v_type != VBAD)  /* EINVAL set on VBAD vnode */
                 VTONFS(tdvp)->n_attrstamp = 0;
         vput(tdvp);
         /*
@@ -2195,6 +2246,7 @@ nfs_symlink(ap)
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         struct vnode *newvp = (struct vnode *)0;
         int v3 = NFS_ISV3(dvp);
+       u_int64_t xid;
  
         nfsstats.rpccnt[NFSPROC_SYMLINK]++;
         slen = strlen(ap->a_target);
@@ -2217,21 +2269,23 @@ nfs_symlink(ap)
                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
         }
-       nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
+       nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred, &xid);
         if (v3) {
+               u_int64_t dxid = xid;
+
                 if (!error)
-                       nfsm_mtofh(dvp, newvp, v3, gotvp);
-               nfsm_wcc_data(dvp, wccflag);
+                       nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
+               nfsm_wcc_data(dvp, wccflag, &dxid);
         }
         nfsm_reqdone;
         if (newvp)
                 vput(newvp);
         FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
-        if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
-            VTONFS(dvp)->n_flag |= NMODIFIED;
-            if (!wccflag)
-               VTONFS(dvp)->n_attrstamp = 0;
-        }
+       if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
+               VTONFS(dvp)->n_flag |= NMODIFIED;
+               if (!wccflag)
+                       VTONFS(dvp)->n_attrstamp = 0;
+       }
         vput(dvp);
         /*
          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
@@ -2270,6 +2324,7 @@ nfs_mkdir(ap)
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         struct vattr vattr;
         int v3 = NFS_ISV3(dvp);
+       u_int64_t xid, dxid;
  
         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) {
                 VOP_ABORTOP(dvp, cnp);
@@ -2294,17 +2349,18 @@ nfs_mkdir(ap)
                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
         }
-       nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
+       nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred, &xid);
+       dxid = xid;
         if (!error)
-               nfsm_mtofh(dvp, newvp, v3, gotvp);
+               nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
         if (v3)
-               nfsm_wcc_data(dvp, wccflag);
+               nfsm_wcc_data(dvp, wccflag, &dxid);
         nfsm_reqdone;
         if (dvp->v_type != VBAD) { /* EINVAL set on this case */
                 VTONFS(dvp)->n_flag |= NMODIFIED;
                 if (!wccflag)
                         VTONFS(dvp)->n_attrstamp = 0;
-               }
+       }
         /*
          * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
          * if we can succeed in looking up the directory.
@@ -2315,7 +2371,7 @@ nfs_mkdir(ap)
                         newvp = (struct vnode *)0;
                 }
                 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
-                       cnp->cn_proc, &np);
+                                    cnp->cn_proc, &np);
                 if (!error) {
                         newvp = NFSTOV(np);
                         if (newvp->v_type != VDIR)
@@ -2353,22 +2409,23 @@ nfs_rmdir(ap)
         int error = 0, wccflag = NFSV3_WCCRATTR;
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         int v3 = NFS_ISV3(dvp);
+       u_int64_t xid;
  
         nfsstats.rpccnt[NFSPROC_RMDIR]++;
         nfsm_reqhead(dvp, NFSPROC_RMDIR,
                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
         nfsm_fhtom(dvp, v3);
         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
-       nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
+       nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred, &xid);
         if (v3)
-               nfsm_wcc_data(dvp, wccflag);
+               nfsm_wcc_data(dvp, wccflag, &xid);
         nfsm_reqdone;
         FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
-        if (dvp->v_type != VBAD) { /* EINVAL set on this case */
-            VTONFS(dvp)->n_flag |= NMODIFIED;
-            if (!wccflag)
-               VTONFS(dvp)->n_attrstamp = 0;
-        }
+       if (dvp->v_type != VBAD) { /* EINVAL set on this case */
+               VTONFS(dvp)->n_flag |= NMODIFIED;
+               if (!wccflag)
+                       VTONFS(dvp)->n_attrstamp = 0;
+       }
         cache_purge(dvp);
         cache_purge(vp);
         vput(vp);
@@ -2454,6 +2511,7 @@ nfs_readdirrpc(vp, uiop, cred)
         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
         int attrflag;
         int v3 = NFS_ISV3(vp);
+       u_int64_t xid;
  
  #ifndef nolint
         dp = (struct dirent *)0;
@@ -2493,9 +2551,9 @@ nfs_readdirrpc(vp, uiop, cred)
                         *tl++ = cookie.nfsuquad[0];
                 }
                 *tl = txdr_unsigned(nmp->nm_readdirsize);
-               nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
+               nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred, &xid);
                 if (v3) {
-                       nfsm_postop_attr(vp, attrflag);
+                       nfsm_postop_attr(vp, attrflag, &xid);
                         if (!error) {
                                 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
                                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
@@ -2641,6 +2699,7 @@ nfs_readdirplusrpc(vp, uiop, cred)
         u_quad_t fileno;
         int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
         int attrflag, fhsize;
+       u_int64_t xid, savexid;
  
  #ifndef nolint
         dp = (struct dirent *)0;
@@ -2678,8 +2737,10 @@ nfs_readdirplusrpc(vp, uiop, cred)
                 *tl++ = dnp->n_cookieverf.nfsuquad[1];
                 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
                 *tl = txdr_unsigned(nmp->nm_rsize);
-               nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
-               nfsm_postop_attr(vp, attrflag);
+               nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred,
+                            &xid);
+               savexid = xid;
+               nfsm_postop_attr(vp, attrflag, &xid);
                 if (error) {
                         m_freem(mrep);
                         goto nfsmout;
@@ -2776,7 +2837,8 @@ nfs_readdirplusrpc(vp, uiop, cred)
                                 dpos = dpossav1;
                                 mdsav2 = md;
                                 md = mdsav1;
-                               nfsm_loadattr(newvp, (struct vattr *)0);
+                               xid = savexid;
+                               nfsm_loadattr(newvp, (struct vattr *)0, &xid);
                                 dpos = dpossav2;
                                 md = mdsav2;
                                 dp->d_type =
@@ -2937,13 +2999,14 @@ nfs_lookitup(dvp, name, len, cred, procp, npp)
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
         nfsfh_t *nfhp;
         int v3 = NFS_ISV3(dvp);
+       u_int64_t xid;
  
         nfsstats.rpccnt[NFSPROC_LOOKUP]++;
         nfsm_reqhead(dvp, NFSPROC_LOOKUP,
                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
         nfsm_fhtom(dvp, v3);
         nfsm_strtom(name, len, NFS_MAXNAMLEN);
-       nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
+       nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred, &xid);
         if (npp && !error) {
                 nfsm_getfh(nfhp, fhlen, v3);
                 if (*npp) {
@@ -2970,7 +3033,7 @@ nfs_lookitup(dvp, name, len, cred, procp, npp)
                     newvp = NFSTOV(np);
                 }
                 if (v3) {
-                       nfsm_postop_attr(newvp, attrflag);
+                       nfsm_postop_attr(newvp, attrflag, &xid);
                         if (!attrflag && *npp == NULL) {
                                 m_freem(mrep);
                                 if (newvp == dvp)
@@ -2980,7 +3043,7 @@ nfs_lookitup(dvp, name, len, cred, procp, npp)
                                 return (ENOENT);
                         }
                 } else
-                       nfsm_loadattr(newvp, (struct vattr *)0);
+                       nfsm_loadattr(newvp, (struct vattr *)0, &xid);
         }
         nfsm_reqdone;
         if (npp && *npp == NULL) {
@@ -3014,7 +3077,9 @@ nfs_commit(vp, offset, cnt, cred, procp)
         caddr_t bpos, dpos, cp2;
         int error = 0, wccflag = NFSV3_WCCRATTR;
         struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+       u_int64_t xid;
         
+       FSDBG(521, vp, offset, cnt, nmp->nm_flag);
         if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0)
                 return (0);
         nfsstats.rpccnt[NFSPROC_COMMIT]++;
@@ -3024,12 +3089,12 @@ nfs_commit(vp, offset, cnt, cred, procp)
         txdr_hyper(&offset, tl);
         tl += 2;
         *tl = txdr_unsigned(cnt);
-       nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
-       nfsm_wcc_data(vp, wccflag);
+       nfsm_request(vp, NFSPROC_COMMIT, procp, cred, &xid);
+       nfsm_wcc_data(vp, wccflag, &xid);
         if (!error) {
                 nfsm_dissect(tl, u_long *, NFSX_V3WRITEVERF);
                 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
-                       NFSX_V3WRITEVERF)) {
+                        NFSX_V3WRITEVERF)) {
                         bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
                                 NFSX_V3WRITEVERF);
                         error = NFSERR_STALEWRITEVERF;
@@ -3144,7 +3209,6 @@ nfs_fsync(ap)
                 struct proc * a_p;
         } */ *ap;
  {
-
         return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
  }
  
@@ -3171,16 +3235,15 @@ nfs_flush(vp, cred, waitfor, p, commit)
         u_quad_t off, endoff, toff;
         struct ucred* wcred = NULL;
         struct buf **bvec = NULL;
-       kern_return_t kret;
-       upl_t *upls = NULL;
-
-
  #ifndef NFS_COMMITBVECSIZ
  #define NFS_COMMITBVECSIZ      20
  #endif
         struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
-       struct upl_t *upls_on_stack[NFS_COMMITBVECSIZ]; 
-       int bvecsize = 0, bveccount, buplpos;
+       int bvecsize = 0, bveccount;
+       kern_return_t kret;
+       upl_t         upl;
+
+       FSDBG_TOP(517, vp, np, waitfor, commit);
  
         if (nmp->nm_flag & NFSMNT_INT)
                 slpflag = PCATCH;
@@ -3195,16 +3258,18 @@ nfs_flush(vp, cred, waitfor, p, commit)
          * job.
          */
  again:
+       FSDBG(518, vp->v_dirtyblkhd.lh_first, np->n_flag, 0, 0);
         if (vp->v_dirtyblkhd.lh_first)
                 np->n_flag |= NMODIFIED;
         off = (u_quad_t)-1;
         endoff = 0;
         bvecpos = 0;
-       buplpos = 0;
         if (NFS_ISV3(vp) && commit) {
                 s = splbio();
                 /*
                  * Count up how many buffers waiting for a commit.
+                * This is an upper bound - any with dirty pages must be
+                * written not commited.
                  */
                 bveccount = 0;
                 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
@@ -3212,45 +3277,79 @@ again:
                         if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
                             == (B_DELWRI | B_NEEDCOMMIT))
                                 bveccount++;
+                       FSDBG(519, bp, bp->b_flags, bveccount, 0);
                 }
                 /*
                  * Allocate space to remember the list of bufs to commit.  It is
-                * important to use M_NOWAIT here to avoid a race with nfs_write.
+                * important to use M_NOWAIT here to avoid a race with nfs_write
                  * If we can't get memory (for whatever reason), we will end up
                  * committing the buffers one-by-one in the loop below.
                  */
                 if (bvec != NULL && bvec != bvec_on_stack)
-                               _FREE(bvec, M_TEMP);
-               if (upls != NULL && upls != (upl_t *) upls_on_stack)
-                               _FREE(upls, M_TEMP);
-                               
-               bvecsize = NFS_COMMITBVECSIZ;
+                       _FREE(bvec, M_TEMP);
                 if (bveccount > NFS_COMMITBVECSIZ) {
                         MALLOC(bvec, struct buf **,
-                                       bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT);
-                       MALLOC(upls, upl_t *,
-                                       bveccount * sizeof(upl_t), M_TEMP, M_NOWAIT);
-                       if ((bvec == NULL) || (upls == NULL)) {
-                               if (bvec) 
-                                       _FREE(bvec, M_TEMP);
-                               if (upls)
-                                       _FREE(upls, M_TEMP);
+                              bveccount * sizeof(struct buf *), M_TEMP,
+                              M_NOWAIT);
+                       if (bvec == NULL) {
                                 bvec = bvec_on_stack;
-                               upls = (upl_t *) upls_on_stack;
+                               bvecsize = NFS_COMMITBVECSIZ;
                         } else
                                 bvecsize = bveccount;
                 } else {
                         bvec = bvec_on_stack;
-                       upls = (upl_t *) upls_on_stack;
+                       bvecsize = NFS_COMMITBVECSIZ;
                 }
+               FSDBG(519, 0, bvecsize, bveccount, 0);
  
                 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
                         nbp = bp->b_vnbufs.le_next;
+                       /* XXX nbp aok if we sleep in this loop? */
+                       FSDBG(520, bp, bp->b_flags, bvecpos, bp->b_bufsize);
+                       FSDBG(520, bp->b_validoff, bp->b_validend,
+                             bp->b_dirtyoff, bp->b_dirtyend);
                         if (bvecpos >= bvecsize)
                                 break;
                         if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
                                 != (B_DELWRI | B_NEEDCOMMIT))
                                 continue;
+                       SET(bp->b_flags, B_BUSY);
+                       /*
+                        * we need a upl to see if the page has been
+                        * dirtied (think mmap) since the unstable write, and
+                        * so to prevent vm from paging during our commit rpc
+                        */
+                       if (ISSET(bp->b_flags, B_PAGELIST)) {
+                               upl = bp->b_pagelist;
+                       } else {
+                               kret = ubc_create_upl(vp, ubc_blktooff(vp, bp->b_lblkno),
+                                                     bp->b_bufsize, &upl,
+                                                     NULL, UPL_PRECIOUS);
+                               if (kret != KERN_SUCCESS) 
+                                       panic("nfs_flush: create upl %d", kret);
+#ifdef UBC_DEBUG
+                               upl_ubc_alias_set(upl, current_act(), 1);
+#endif /* UBC_DEBUG */
+                       }
+                       if (upl_dirty_page(ubc_upl_pageinfo(upl), 0)) {
+                               if (!ISSET(bp->b_flags, B_PAGELIST)) {
+                                       err = ubc_upl_abort(upl, NULL); 
+                                       if (err)
+                                               printf("nfs_flush: upl abort %d\n", err);
+                               }
+                               /*
+                                * Any/all of it may be modified...
+                                */
+                               bp->b_dirtyoff = bp->b_validoff;
+                               bp->b_dirtyend = bp->b_validend;
+                               CLR(bp->b_flags, B_BUSY | B_NEEDCOMMIT);
+                               continue;
+                       }
+                       if (!ISSET(bp->b_flags, B_PAGELIST)) {
+                               bp->b_pagelist = upl;
+                               SET(bp->b_flags, B_PAGELIST);
+                               ubc_upl_map(upl, (vm_address_t *)&bp->b_data);
+                       }
                         bremfree(bp);
                         /*
                          * Work out if all buffers are using the same cred
@@ -3260,28 +3359,7 @@ again:
                                 wcred = bp->b_wcred;
                         else if (wcred != bp->b_wcred)
                                 wcred = NOCRED;
-                       SET(bp->b_flags, (B_BUSY | B_WRITEINPROG));
-
-                       /*
-                        * we need ubc_create_upl so if vm decides to
-                        * do paging while we are waiting on commit rpc,
-                        * that it doesn't pick these pages.
-                        */
-                       if (!ISSET(bp->b_flags, B_PAGELIST)) {
-                               kret = ubc_create_upl(vp,
-                                                               ubc_blktooff(vp, bp->b_lblkno),
-                                                               bp->b_bufsize,
-                                                               &(upls[buplpos]),
-                                                               NULL,     
-                                                               UPL_PRECIOUS);
-                               if (kret != KERN_SUCCESS) 
-                                       panic("nfs_getcacheblk: get pagelists failed with (%d)", kret);
-                                    
-#ifdef UBC_DEBUG
-                               upl_ubc_alias_set(upls[buplpos], ioaddr, 1);
-#endif /* UBC_DEBUG */
-                               buplpos++; /* not same as bvecpos if upl existed already */
-                       }
+                       SET(bp->b_flags, B_WRITEINPROG);
  
                         /*
                          * A list of these buffers is kept so that the
@@ -3316,6 +3394,8 @@ again:
                         for (i = 0; i < bvecpos; i++) {
                                 off_t off, size;
                                 bp = bvec[i];
+                               FSDBG(522, bp, bp->b_blkno * DEV_BSIZE,
+                                     bp->b_dirtyoff, bp->b_dirtyend);
                                 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
                                         bp->b_dirtyoff;
                                 size = (u_quad_t)(bp->b_dirtyend
@@ -3328,21 +3408,6 @@ again:
  
                 if (retv == NFSERR_STALEWRITEVERF)
                         nfs_clearcommit(vp->v_mount);
-                        
-        for (i = 0; i < buplpos; i++) {
-                       /*
-                        * Before the VOP_BWRITE and biodone(ASYNC)/brelse, we have to undo
-                        * holding the vm page or we we will deadlock on another vm_fault_list_request.
-                        * Here's a convenient place to put it. 
-                        * Better if we could hold it by setting the PAGELIST flag and kernel_upl_map
-                        * as does nfs_writebp. Then normal biodones and brelse will clean it up and 
-                        * we can avoid this abort. For now make minimal changes.
-                        */
-                       err = ubc_upl_abort(upls[i], NULL); 
-                       if (err)
-                               printf("nfs_flush: kernel_upl_abort %d\n", err);
-               }
-
  
                 /*
                  * Now, either mark the blocks I/O done or mark the
@@ -3350,31 +3415,31 @@ again:
                  * succeeded.
                  */
                 for (i = 0; i < bvecpos; i++) {
-                        
                         bp = bvec[i];
+                       FSDBG(523, bp, retv, bp->b_flags, 0);
                         CLR(bp->b_flags, (B_NEEDCOMMIT | B_WRITEINPROG));
                         if (retv) {
-                           brelse(bp);
+                               brelse(bp);
                         } else {
-                           vp->v_numoutput++;
-                           SET(bp->b_flags, B_ASYNC);
-                           s = splbio();
-                           CLR(bp->b_flags, (B_READ|B_DONE|B_ERROR|B_DELWRI));
-                           bp->b_dirtyoff = bp->b_dirtyend = 0;
-                           reassignbuf(bp, vp);
-                           splx(s);
-                           biodone(bp);
+                               s = splbio();
+                               vp->v_numoutput++;
+                               SET(bp->b_flags, B_ASYNC);
+                               CLR(bp->b_flags,
+                                   (B_READ|B_DONE|B_ERROR|B_DELWRI));
+                               bp->b_dirtyoff = bp->b_dirtyend = 0;
+                               reassignbuf(bp, vp);
+                               splx(s);
+                               biodone(bp);
                         }
                 }
  
         }
-
         /*
-        * Start/do any write(s) that are required.
-        * There is a window here where B_BUSY protects the buffer. The vm pages have been
-        * freed up, yet B_BUSY is set. Don't think you will hit any busy/incore problems while
-        * we sleep, but not absolutely sure. Keep an eye on it. Otherwise we will have to hold
-        * vm page across this locked. - EKN
+        * Start/do any write(s) that are required.  There is a window here
+        * where B_BUSY protects the buffer. The vm pages have been freed up,
+        * yet B_BUSY is set. Don't think you will hit any busy/incore problems
+        * while we sleep, but not absolutely sure. Keep an eye on it. Otherwise
+        * we will have to hold vm page across this locked. - EKN
          */
  loop:
         if (current_thread_aborted()) {
@@ -3385,34 +3450,36 @@ loop:
         for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
                 nbp = bp->b_vnbufs.le_next;
                 if (ISSET(bp->b_flags, B_BUSY)) {
+                       FSDBG(524, bp, waitfor, passone, bp->b_flags);
                         if (waitfor != MNT_WAIT || passone)
                                 continue;
                         SET(bp->b_flags, B_WANTED);
                         error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
-                               "nfsfsync", slptimeo);
+                                      "nfsfsync", slptimeo);
                         splx(s);
                         if (error) {
-                           if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
-                               error = EINTR;
-                               goto done;
-                           }
-                           if (slpflag == PCATCH) {
-                               slpflag = 0;
-                               slptimeo = 2 * hz;
-                           }
+                               if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+                                       error = EINTR;
+                                       goto done;
+                               }
+                               if (slpflag == PCATCH) {
+                                       slpflag = 0;
+                                       slptimeo = 2 * hz;
+                               }
                         }
                         goto loop;
                 }
                 if (!ISSET(bp->b_flags, B_DELWRI))
                         panic("nfs_fsync: not dirty");
+               FSDBG(525, bp, passone, commit, bp->b_flags);
                 if ((passone || !commit) && ISSET(bp->b_flags, B_NEEDCOMMIT))
                         continue;
                 bremfree(bp);
                 if (passone || !commit)
-            SET(bp->b_flags, (B_BUSY|B_ASYNC));
+                       SET(bp->b_flags, B_BUSY|B_ASYNC);
                 else
-            SET(bp->b_flags, (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT));
-
+                       SET(bp->b_flags,
+                           B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT);
                 splx(s);
                 VOP_BWRITE(bp);
                 goto loop;
@@ -3442,15 +3509,15 @@ loop:
                         goto loop;
                 }
         }
+       FSDBG(526, np->n_flag, np->n_error, 0, 0);
         if (np->n_flag & NWRITEERR) {
                 error = np->n_error;
                 np->n_flag &= ~NWRITEERR;
         }
  done:
+       FSDBG_BOT(517, vp, np, error, 0);
         if (bvec != NULL && bvec != bvec_on_stack)
                 _FREE(bvec, M_TEMP);
-       if (upls != NULL && upls != (upl_t *) upls_on_stack)
-        _FREE(upls, M_TEMP);
         return (error);
  }
  
@@ -3732,51 +3799,31 @@ nfs_writebp(bp, force)
         splx(s);
          
          /* 
-         * Since the B_BUSY flag is set, we need to lock the page before doing nfs_commit.
-         * Otherwise we may block and get a busy incore pages during a vm pageout.
-         * Move the existing code up before the commit.
+         * Since the B_BUSY flag is set, we need to lock the page before doing
+         * nfs_commit.  Otherwise we may block and get a busy incore pages
+         * during a vm pageout.  Move the existing code up before the commit.
           */
-
-        if (!ISSET(bp->b_flags, B_META) && UBCISVALID(vp)) {
-    
-            if (!ISSET(bp->b_flags, B_PAGELIST)) {
-                               kret = ubc_create_upl(vp,
-                                                               ubc_blktooff(vp, bp->b_lblkno),
-                                                               bp->b_bufsize,
-                                                               &upl,
-                                                               &pl,
-                                                               UPL_PRECIOUS);
-                               if (kret != KERN_SUCCESS) {
-                                       panic("nfs_writebp: get pagelists failed with (%d)", kret);
-                               }
-                    
+        if (!ISSET(bp->b_flags, B_META) && UBCISVALID(vp) &&
+            !ISSET(bp->b_flags, B_PAGELIST)) {
+               kret = ubc_create_upl(vp, ubc_blktooff(vp, bp->b_lblkno),
+                                     bp->b_bufsize, &upl, &pl, UPL_PRECIOUS);
+               if (kret != KERN_SUCCESS)
+                       panic("nfs_writebp: ubc_create_upl %d", kret);
  #ifdef UBC_DEBUG
-                               upl_ubc_alias_set(upl, ioaddr, 2);
+               upl_ubc_alias_set(upl, current_act(), 2);
  #endif /* UBC_DEBUG */
+               s = splbio();
+               bp->b_pagelist = upl;
+               SET(bp->b_flags, B_PAGELIST);
+               splx(s);
  
-                               s = splbio();
-
-                               bp->b_pagelist = upl;
-                               SET(bp->b_flags, B_PAGELIST);
-                               splx(s);
-                    
-                               kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data));
-                               if (kret != KERN_SUCCESS) {
-                                       panic("nfs_writebp: ubc_upl_map() failed with (%d)", kret);
-                               }
-                               if(bp->b_data == 0) 
-                                       panic("nfs_writebp: upl_map mapped 0");
-
-                               if (!upl_page_present(pl, 0)) {
-                                       /* 
-                                        * may be the page got paged out.
-                                        * let's just read it in. It is marked
-                                        * busy so we should not have any one
-                                        * yanking this page underneath the fileIO
-                                        */
-                                       panic("nfs_writebp: nopage");
-                               }
-               }
+               kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data));
+               if (kret != KERN_SUCCESS)
+                       panic("nfs_writebp: ubc_upl_map %d", kret);
+               if(bp->b_data == 0) 
+                       panic("nfs_writebp: ubc_upl_map mapped 0");
+               if (!upl_page_present(pl, 0)) /* even more paranoia */
+                       panic("nfs_writebp: nopage");
         }
  
         /*
@@ -4110,37 +4157,36 @@ nfs_pagein(ap)
         struct uio      auio;
         struct iovec    aiov;
         struct uio * uio = &auio;
-       int nocommit = flags & UPL_NOCOMMIT;
+       int nofreeupl = flags & UPL_NOCOMMIT;
  
-       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE,
-                    (int)f_offset, size, pl, pl_offset, 0);
+       FSDBG(322, f_offset, size, pl, pl_offset);
+       if (pl == (upl_t)NULL)
+               panic("nfs_pagein: no upl");
  
         if (UBCINVALID(vp)) {
-#if DIAGNOSTIC
-               panic("nfs_pagein: invalid vp");
-#endif /* DIAGNOSTIC */
+               printf("nfs_pagein: invalid vnode 0x%x", (int)vp);
+               if (!nofreeupl)
+                       (void) ubc_upl_abort(pl, NULL); 
                 return (EPERM);
         }
-
         UBCINFOCHECK("nfs_pagein", vp);
-       if(pl == (upl_t)NULL) {
-               panic("nfs_pagein: no upl");
-       }
  
-       cred = ubc_getcred(vp);
-       if (cred == NOCRED)
-               cred = ap->a_cred;
-
-       if (size <= 0)
+       if (size <= 0) {
+               printf("nfs_pagein: invalid size %d", size);
+               if (!nofreeupl)
+                       (void) ubc_upl_abort(pl, NULL); 
                 return (EINVAL);
-
-       if (f_offset < 0 || f_offset >= np->n_size 
-                                       || (f_offset & PAGE_MASK_64)) {
-               if (!nocommit)
+       }
+       if (f_offset < 0 || f_offset >= np->n_size ||
+           (f_offset & PAGE_MASK_64)) {
+               if (!nofreeupl)
                         ubc_upl_abort_range(pl, pl_offset, size, 
                                 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
                 return (EINVAL);
         }
+       cred = ubc_getcred(vp);
+       if (cred == NOCRED)
+               cred = ap->a_cred;
  
         auio.uio_iov = &aiov;
         auio.uio_iovcnt = 1;
@@ -4149,7 +4195,6 @@ nfs_pagein(ap)
         auio.uio_rw = UIO_READ;
         auio.uio_procp = NULL;
  
-
         if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
                 (void)nfs_fsinfo(nmp, vp, cred, p);
         biosize = min(vp->v_mount->mnt_stat.f_iosize, size);
@@ -4157,59 +4202,6 @@ nfs_pagein(ap)
         if (biosize & PAGE_MASK)
                 panic("nfs_pagein(%x): biosize not page aligned", biosize);
  
-#if 0 /* Why bother? */
-/* DO NOT BOTHER WITH "approximately maintained cache consistency" */
-/* Does not make sense in paging paths -- Umesh*/
-       /*
-        * For nfs, cache consistency can only be maintained approximately.
-        * Although RFC1094 does not specify the criteria, the following is
-        * believed to be compatible with the reference port.
-        * For nqnfs, full cache consistency is maintained within the loop.
-        * For nfs:
-        * If the file's modify time on the server has changed since the
-        * last read rpc or you have written to the file,
-        * you may have lost data cache consistency with the
-        * server, so flush all of the file's data out of the cache.
-        * Then force a getattr rpc to ensure that you have up to date
-        * attributes.
-        * NB: This implies that cache data can be read when up to
-        * NFS_ATTRTIMEO seconds out of date. If you find that you need current
-        * attributes this could be forced by setting n_attrstamp to 0 before
-        * the VOP_GETATTR() call.
-        */
-       if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
-               if (np->n_flag & NMODIFIED) {
-                       np->n_attrstamp = 0;
-                       error = VOP_GETATTR(vp, &vattr, cred, p);
-                       if (error) {
-                               if (!nocommit)
-                                       ubc_upl_abort_range(pl, pl_offset, size, 
-                                                       UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
-                               return (error);
-                       }
-                       np->n_mtime = vattr.va_mtime.tv_sec;
-               } else {
-                       error = VOP_GETATTR(vp, &vattr, cred, p);
-                       if (error){
-                               if (!nocommit)
-                                       ubc_upl_abort_range(pl, pl_offset, size,  
-                                                       UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
-                               return (error);
-                       }
-                       if (np->n_mtime != vattr.va_mtime.tv_sec) {
-                               error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
-                               if (error){
-                                       if (!nocommit)
-                                               ubc_upl_abort_range(pl, pl_offset, size, 
-                                                               UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
-                                       return (error);
-                               }
-                               np->n_mtime = vattr.va_mtime.tv_sec;
-                       }
-               }
-       }
-#endif 0 /* Why bother? */
-
         ubc_upl_map(pl, &ioaddr);
         ioaddr += pl_offset;
         xsize = size;
@@ -4219,102 +4211,43 @@ nfs_pagein(ap)
                 aiov.iov_len  = uio->uio_resid;
                 aiov.iov_base = (caddr_t)ioaddr;
  
-               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE,
-                       (int)uio->uio_offset, uio->uio_resid, ioaddr, xsize, 0);
-
-#warning nfs_pagein does not support NQNFS yet.
-#if 0 /* why bother? */
-/* NO RESOURCES TO FIX NQNFS CASE */
-/* We need to deal with this later -- Umesh */
-               /*
-                * Get a valid lease. If cached data is stale, flush it.
-                */
-               if (nmp->nm_flag & NFSMNT_NQNFS) {
-                       if (NQNFS_CKINVALID(vp, np, ND_READ)) {
-                               do {
-                                       error = nqnfs_getlease(vp, ND_READ, cred, p);
-                               } while (error == NQNFS_EXPIRED);
-                               if (error){
-                                       ubc_upl_unmap(pl);
-                                       if (!nocommit)
-                                               ubc_upl_abort_range(pl, pl_offset, size,
-                                                               UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
-
-                                       return (error);
-                               }
-                               if (np->n_lrev != np->n_brev ||
-                                       (np->n_flag & NQNFSNONCACHE)) {
-                                       error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
-                                       if (error) {
-                                               ubc_upl_unmap(pl);
-                                       if (!nocommit)
-                                               ubc_upl_abort_range(pl, pl_offset, size,
-                                                               UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
-                                               return (error);
-                                       }
-                                       np->n_brev = np->n_lrev;
-                               }
-                       }
-               }
-#endif 0 /* why bother? */
-
-               if (np->n_flag & NQNFSNONCACHE) {
-                       error = nfs_readrpc(vp, uio, cred);
-                       ubc_upl_unmap(pl);
-
-                       if (!nocommit) {
-                               if(error) 
-                                       ubc_upl_abort_range(pl, pl_offset, size,
-                                               UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
-                               else
-                                       ubc_upl_commit_range(pl, pl_offset, size, 
-                                               UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
-                       }
-                       return (error);
-               }
-
+               FSDBG(322, uio->uio_offset, uio->uio_resid, ioaddr, xsize);
+#warning our nfs_pagein does not support NQNFS
                 /*
                  * With UBC we get here only when the file data is not in the VM
                  * page cache, so go ahead and read in.
                  */
  #ifdef UBC_DEBUG
-               upl_ubc_alias_set(pl, ioaddr, 2);
+               upl_ubc_alias_set(pl, current_act(), 2);
  #endif /* UBC_DEBUG */
                 nfsstats.pageins++;
                 error = nfs_readrpc(vp, uio, cred);
  
                 if (!error) {
-                       int zoff;
-                       int zcnt;
-
                         if (uio->uio_resid) {
                                 /*
-                                * If uio_resid > 0, there is a hole in the file and
-                                * no writes after the hole have been pushed to
-                                * the server yet... or we're at the EOF
+                                * If uio_resid > 0, there is a hole in the file
+                                * and no writes after the hole have been pushed
+                                * to the server yet... or we're at the EOF
                                  * Just zero fill the rest of the valid area.
                                  */
-                               zcnt = uio->uio_resid;
-                               zoff = biosize - zcnt;
+                               int zcnt = uio->uio_resid;
+                               int zoff = biosize - zcnt;
                                 bzero((char *)ioaddr + zoff, zcnt);
  
-                               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 324)) | DBG_FUNC_NONE,
-                                       (int)uio->uio_offset, zoff, zcnt, ioaddr, 0);
-
+                               FSDBG(324, uio->uio_offset, zoff, zcnt, ioaddr);
                                 uio->uio_offset += zcnt;
                         }
                         ioaddr += biosize;      
                         xsize  -= biosize;
                 } else
-                       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE,
-                               (int)uio->uio_offset, uio->uio_resid, error, -1, 0);
-
+                       FSDBG(322, uio->uio_offset, uio->uio_resid, error, -1);
                 if (p && (vp->v_flag & VTEXT) &&
-                               (((nmp->nm_flag & NFSMNT_NQNFS) &&
-                               NQNFS_CKINVALID(vp, np, ND_READ) &&
-                               np->n_lrev != np->n_brev) ||
-                               (!(nmp->nm_flag & NFSMNT_NQNFS) &&
-                               np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { 
+                   ((nmp->nm_flag & NFSMNT_NQNFS &&
+                     NQNFS_CKINVALID(vp, np, ND_READ) &&
+                     np->n_lrev != np->n_brev) ||
+                    (!(nmp->nm_flag & NFSMNT_NQNFS) &&
+                     np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
                         uprintf("Process killed due to text file modification\n");
                         psignal(p, SIGKILL);
                         p->p_flag |= P_NOSWAP;
@@ -4324,20 +4257,20 @@ nfs_pagein(ap)
  
         ubc_upl_unmap(pl);
  
-       if (!nocommit) {
+       if (!nofreeupl) {
                 if (error) 
                         ubc_upl_abort_range(pl, pl_offset, size, 
-                                       UPL_ABORT_ERROR |  UPL_ABORT_FREE_ON_EMPTY);
+                                           UPL_ABORT_ERROR |
+                                           UPL_ABORT_FREE_ON_EMPTY);
                 else
                         ubc_upl_commit_range(pl, pl_offset, size,
-                                       UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
+                                            UPL_COMMIT_CLEAR_DIRTY |
+                                            UPL_COMMIT_FREE_ON_EMPTY);
         }
-
         return (error);
  }
  
  
-
  /*
   * Vnode op for pageout using UPL
   * Derived from nfs_write()
@@ -4369,33 +4302,34 @@ nfs_pageout(ap)
         struct buf *bp;
         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
         daddr_t lbn;
-       int bufsize;
         int n = 0, on, error = 0, iomode, must_commit, s;
         off_t off;
         vm_offset_t ioaddr;
         struct uio      auio;
         struct iovec    aiov;
         struct uio * uio = &auio;
-       int nocommit = flags & UPL_NOCOMMIT;
+       int nofreeupl = flags & UPL_NOCOMMIT;
         int iosize;
         int pgsize;
  
-       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 323)) | DBG_FUNC_NONE,
-               (int)f_offset, size, pl, pl_offset, 0);
+       FSDBG(323, f_offset, size, pl, pl_offset);
+
+       if (pl == (upl_t)NULL)
+               panic("nfs_pageout: no upl");
  
         if (UBCINVALID(vp)) {
-#if DIAGNOSTIC
-               panic("nfs_pageout: invalid vnode");
-#endif
+               printf("nfs_pageout: invalid vnode 0x%x", (int)vp);
+               if (!nofreeupl)
+                       (void) ubc_upl_abort(pl, NULL); 
                 return (EIO);
         }
         UBCINFOCHECK("nfs_pageout", vp);
  
-       if (size <= 0)
+       if (size <= 0) {
+               printf("nfs_pageout: invalid size %d", size);
+               if (!nofreeupl)
+                       (void) ubc_upl_abort(pl, NULL); 
                 return (EINVAL);
-
-       if (pl == (upl_t)NULL) {
-               panic("nfs_pageout: no upl");
         }
  
         /*
@@ -4408,7 +4342,6 @@ nfs_pageout(ap)
         if (biosize & PAGE_MASK)
                 panic("nfs_pageout(%x): biosize not page aligned", biosize);
  
-
         /*
          * Check to see whether the buffer is incore
          * If incore and not busy invalidate it from the cache
@@ -4419,15 +4352,15 @@ nfs_pageout(ap)
         lbn = f_offset / PAGE_SIZE; /* to match the size getblk uses */
          
         for (iosize = size; iosize > 0; iosize -= PAGE_SIZE, lbn++) {
-
                 s = splbio();
                 if (bp = incore(vp, lbn)) {
+                       FSDBG(323, lbn*PAGE_SIZE, 1, bp, bp->b_flags);
                         if (ISSET(bp->b_flags, B_BUSY)) {
-                               /* don't panic incore. just tell vm we are busy */
-                               (void) ubc_upl_abort(pl, NULL); 
+                               /* no panic. just tell vm we are busy */
+                               if (!nofreeupl)
+                                       (void) ubc_upl_abort(pl, NULL); 
                                 return(EBUSY);
-                       };
-
+                       }
                         bremfree(bp);
                         SET(bp->b_flags, (B_BUSY | B_INVAL));
                         brelse(bp);
@@ -4441,23 +4374,25 @@ nfs_pageout(ap)
  
         if (np->n_flag & NWRITEERR) {
                 np->n_flag &= ~NWRITEERR;
-               if (!nocommit)
-                       ubc_upl_abort_range(pl, pl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
+               if (!nofreeupl)
+                       ubc_upl_abort_range(pl, pl_offset, size,
+                                           UPL_ABORT_FREE_ON_EMPTY);
                 return (np->n_error);
         }
         if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
                 (void)nfs_fsinfo(nmp, vp, cred, p);
  
         if (f_offset < 0 || f_offset >= np->n_size ||
-          (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
-               if (!nocommit)
-                       ubc_upl_abort_range(pl, pl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
+          f_offset & PAGE_MASK_64 || size & PAGE_MASK) {
+               if (!nofreeupl)
+                       ubc_upl_abort_range(pl, pl_offset, size,
+                                           UPL_ABORT_FREE_ON_EMPTY);
                 return (EINVAL);
         }
  
         ubc_upl_map(pl, &ioaddr);
  
-       if ((f_offset + size) > np->n_size)
+       if (f_offset + size > np->n_size)
                 iosize = np->n_size - f_offset;
         else
                 iosize = size;
@@ -4465,9 +4400,10 @@ nfs_pageout(ap)
         pgsize = (iosize + (PAGE_SIZE - 1)) & ~PAGE_MASK;
  
         if (size > pgsize) {
-               if (!nocommit)
-                       ubc_upl_abort_range(pl, pl_offset + pgsize, size - pgsize,
-                                       UPL_ABORT_FREE_ON_EMPTY);
+               if (!nofreeupl)
+                       ubc_upl_abort_range(pl, pl_offset + pgsize,
+                                           size - pgsize,
+                                           UPL_ABORT_FREE_ON_EMPTY);
         }
         auio.uio_iov = &aiov;
         auio.uio_iovcnt = 1;
@@ -4479,158 +4415,41 @@ nfs_pageout(ap)
  
         aiov.iov_len = iosize;
         aiov.iov_base = (caddr_t)ioaddr + pl_offset;
-
         /* 
          * check for partial page and clear the
          * contents past end of the file before
          * releasing it in the VM page cache
          */
-       if ((f_offset < np->n_size) && (f_offset + size) > np->n_size) {
+       if (f_offset < np->n_size && f_offset + size > np->n_size) {
                 size_t io = np->n_size - f_offset;
  
                 bzero((caddr_t)(ioaddr + pl_offset + io), size - io);
  
-               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 321)) | DBG_FUNC_NONE,
-                       (int)np->n_size, (int)f_offset, (int)f_offset + io, size - io, 0);
+               FSDBG(321, np->n_size, f_offset, f_offset + io, size - io);
         }
  
         do {
-
-#warning nfs_pageout does not support NQNFS yet.
-#if 0 /* why bother? */
-/* NO RESOURCES TO FIX NQNFS CASE */
-/* We need to deal with this later -- Umesh */
-
-               /*
-                * Check for a valid write lease.
-                */
-               if ((nmp->nm_flag & NFSMNT_NQNFS) &&
-                   NQNFS_CKINVALID(vp, np, ND_WRITE)) {
-                       do {
-                               error = nqnfs_getlease(vp, ND_WRITE, cred, p);
-                       } while (error == NQNFS_EXPIRED);
-                       if (error) {
-                               ubc_upl_unmap(pl);
-                               if (!nocommit)
-                                       ubc_upl_abort_range(pl, pl_offset, size, 
-                                                       UPL_ABORT_FREE_ON_EMPTY);
-                               return (error);
-                       }
-                       if (np->n_lrev != np->n_brev ||
-                           (np->n_flag & NQNFSNONCACHE)) {
-                               error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
-                               if (error) {
-                                       ubc_upl_unmap(pl);
-                                       if (!nocommit)
-                                               ubc_upl_abort_range(pl, pl_offset, size, 
-                                                               UPL_ABORT_FREE_ON_EMPTY);
-                                       return (error);
-                               }
-                               np->n_brev = np->n_lrev;
-                       }
-               }
-#endif 0 /* why bother? */
-
-               if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
-                       iomode = NFSV3WRITE_FILESYNC;
-                       error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
-                       if (must_commit)
-                               nfs_clearcommit(vp->v_mount);
-                       ubc_upl_unmap(pl);
-                        
-                       /* copied from non-nqnfs case below. see there for comments */
-                       if (!nocommit) {
-                               if (error) {
-                                       int abortflags; 
-                                       short action = nfs_pageouterrorhandler(error);
-                                       
-                                       switch (action) {
-                                               case DUMP:
-                                                       abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
-                                                       break;
-                                               case DUMPANDLOG:
-                                                       abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
-                                                       if ((error <= ELAST) && (errorcount[error] % 100 == 0)) 
-                                                               printf("nfs_pageout: unexpected error %d. dumping vm page\n", error);
-                                                       errorcount[error]++;
-                                                       break;
-                                               case RETRY:
-                                                       abortflags = UPL_ABORT_FREE_ON_EMPTY;
-                                                       break;
-                                               case RETRYWITHSLEEP:
-                                                       abortflags = UPL_ABORT_FREE_ON_EMPTY;
-                                                       (void) tsleep(&lbolt, PSOCK, "nfspageout", 0); /* pri unused. PSOCK for placeholder. */
-                                                       break;
-                                               case SEVER: /* not implemented */
-                                               default:
-                                                       printf("nfs_pageout: action %d not expected\n", action);
-                                                       break;
-                                       }
-                                               
-                                       ubc_upl_abort_range(pl, pl_offset, size, abortflags);
-                                       /* return error in all cases above */
-
-                               } else
-                                       ubc_upl_commit_range(pl, 
-                                               pl_offset, size, 
-                                               UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
-                       }
-                       return (error); /* note this early return */
-               }
-
+#warning our nfs_pageout does not support NQNFS
                 nfsstats.pageouts++;
                 lbn = uio->uio_offset / biosize;
                 on = uio->uio_offset & (biosize-1);
                 n = min((unsigned)(biosize - on), uio->uio_resid);
  again:
-               bufsize = biosize;
  #if 0
+               /* (removed for UBC) */
+               bufsize = biosize;
                 if ((lbn + 1) * biosize > np->n_size) {
                         bufsize = np->n_size - lbn * biosize;
                         bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
                 }
  #endif
                 vp->v_numoutput++;
-
-               np->n_flag |= NMODIFIED;
-
-#if 0 /* why bother? */
-/* NO RESOURCES TO FIX NQNFS CASE */
-/* We need to deal with this later -- Umesh */
-               /*
-                * Check for valid write lease and get one as required.
-                * In case getblk() and/or bwrite() delayed us.
-                */
-               if ((nmp->nm_flag & NFSMNT_NQNFS) &&
-                   NQNFS_CKINVALID(vp, np, ND_WRITE)) {
-                       do {
-                               error = nqnfs_getlease(vp, ND_WRITE, cred, p);
-                       } while (error == NQNFS_EXPIRED);
-                       if (error)
-                               goto cleanup;
-
-                       if (np->n_lrev != np->n_brev ||
-                           (np->n_flag & NQNFSNONCACHE)) {
-                                       error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
-                                       if (error) {
-                                               ubc_upl_unmap(pl);
-                                               if (!nocommit)
-                                                       ubc_upl_abort_range(pl, pl_offset, size,
-                                                                       UPL_ABORT_FREE_ON_EMPTY);
-
-                                               return (error);
-                                       }
-                                       np->n_brev = np->n_lrev;
-                                       goto again;
-                       }
-               }
-#endif 0 /* why bother? */
-
+               /* NMODIFIED would be set here if doing unstable writes */
                 iomode = NFSV3WRITE_FILESYNC;
                 error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
                 if (must_commit)
                         nfs_clearcommit(vp->v_mount);
-               vp->v_numoutput--;
+               vpwakeup(vp);
  
                 if (error)
                         goto cleanup;
@@ -4656,8 +4475,8 @@ cleanup:
          * the server is telling us that the file is no longer the same. We 
          * would not want to keep paging out to that. We also saw some 151 
          * errors from Auspex server and NFSv3 can return errors higher than
-        * ELAST. Those along with NFS known server errors we will "dump" from vm. 
-        * Errors we don't expect to occur, we dump and log for further
+        * ELAST. Those along with NFS known server errors we will "dump" from
+        * vm.  Errors we don't expect to occur, we dump and log for further
          * analysis. Errors that could be transient, networking ones,
          * we let vm "retry". Lastly, errors that we retry, but may have potential
          * to storm the network, we "retrywithsleep". "sever" will be used in
@@ -4666,10 +4485,10 @@ cleanup:
          * error handling. Tweaking expected as more statistics are gathered.
          * Note, in the long run we may need another more robust solution to
          * have some kind of persistant store when the vm cannot dump nor keep
-        * retrying as a solution, but this would be a file architectural change.
+        * retrying as a solution, but this would be a file architectural change
          */
           
-       if (!nocommit) { /* otherwise stacked file system has to handle this */
+       if (!nofreeupl) { /* otherwise stacked file system has to handle this */
                 if (error) {
                         int abortflags; 
                         short action = nfs_pageouterrorhandler(error);
@@ -4680,7 +4499,8 @@ cleanup:
                                         break;
                                 case DUMPANDLOG:
                                         abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
-                                       if ((error <= ELAST) && (errorcount[error] % 100 == 0)) 
+                                       if (error <= ELAST &&
+                                           (errorcount[error] % 100 == 0)) 
                                                 printf("nfs_pageout: unexpected error %d. dumping vm page\n", error);
                                         errorcount[error]++;
                                         break;
@@ -4689,7 +4509,9 @@ cleanup:
                                         break;
                                 case RETRYWITHSLEEP:
                                         abortflags = UPL_ABORT_FREE_ON_EMPTY;
-                                       (void) tsleep(&lbolt, PSOCK, "nfspageout", 0); /* pri unused. PSOCK for placeholder. */
+                                       /* pri unused. PSOCK for placeholder. */
+                                       (void) tsleep(&lbolt, PSOCK,
+                                                     "nfspageout", 0);
                                         break;
                                 case SEVER: /* not implemented */
                                 default:
@@ -4702,7 +4524,8 @@ cleanup:
                         
                 } else 
                         ubc_upl_commit_range(pl, pl_offset, pgsize,
-                               UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
+                                            UPL_COMMIT_CLEAR_DIRTY |
+                                            UPL_COMMIT_FREE_ON_EMPTY);
         }
         return (error);
  }
@@ -4726,7 +4549,6 @@ nfs_blktooff(ap)
         return (0);
  }
  
-/* Blktooff derives file offset given a logical block number */
  static int
  nfs_offtoblk(ap)
         struct vop_offtoblk_args /* {
diff --git a/bsd/nfs/nfsm_subs.h b/bsd/nfs/nfsm_subs.h

index 75f019ce0e5a051113765a4ffbb49a53aec77425..0f31ebb57eb8fa5067ca82dd807e3cbb4488dad5 100644 (file)
--- a/bsd/nfs/nfsm_subs.h
+++ b/bsd/nfs/nfsm_subs.h
@@ -170,7 +170,7 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid,
                 bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \
                 }
  
-#define nfsm_mtofh(d, v, v3, f) \
+#define nfsm_mtofh(d, v, v3, f, x) \
                 { struct nfsnode *ttnp; nfsfh_t *ttfhp; int ttfhsize; \
                 if (v3) { \
                         nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \
@@ -195,7 +195,7 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid,
                                 nfsm_adv(NFSX_V3FATTR); \
                 } \
                 if (f) \
-                       nfsm_loadattr((v), (struct vattr *)0); \
+                       nfsm_loadattr((v), (struct vattr *)0, (x)); \
                 }
  
  #define nfsm_getfh(f, s, v3) \
@@ -211,26 +211,29 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid,
                         (s) = NFSX_V2FH; \
                 nfsm_dissect((f), nfsfh_t *, nfsm_rndup(s)); }
  
-#define        nfsm_loadattr(v, a) \
+#define        nfsm_loadattr(v, a, x) \
                 { struct vnode *ttvp = (v); \
-               if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, (a)))) { \
+               if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, (a), 0, \
+                                           (x)))) { \
                         error = t1; \
                         m_freem(mrep); \
                         goto nfsmout; \
                 } \
                 (v) = ttvp; }
  
-#define        nfsm_postop_attr(v, f) \
+#define        nfsm_postop_attr(v, f, x) \
                 { struct vnode *ttvp = (v); \
                 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \
                 if (((f) = fxdr_unsigned(int, *tl))) { \
                         if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, \
-                               (struct vattr *)0))) { \
+                                       (struct vattr *)0, 1, (x)))) { \
                                 error = t1; \
                                 (f) = 0; \
                                 m_freem(mrep); \
                                 goto nfsmout; \
                         } \
+                       if (*(x) == 0) \
+                               (f) = 0; \
                         (v) = ttvp; \
                 } }
  
@@ -238,7 +241,7 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid,
  #define NFSV3_WCCRATTR 0
  #define NFSV3_WCCCHK   1
  
-#define        nfsm_wcc_data(v, f) \
+#define        nfsm_wcc_data(v, f, x) \
                 { int ttattrf, ttretf = 0; \
                 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \
                 if (*tl == nfs_true) { \
@@ -247,7 +250,7 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid,
                                 ttretf = (VTONFS(v)->n_mtime == \
                                         fxdr_unsigned(u_long, *(tl + 2))); \
                 } \
-               nfsm_postop_attr((v), ttattrf); \
+               nfsm_postop_attr((v), ttattrf, (x)); \
                 if (f) { \
                         (f) = ttretf; \
                 } else { \
@@ -324,11 +327,11 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid,
  * m_freem(mrep). Wondering if some of our freeing problems could be
  * due to nfsv3 calling nfsm_reqdone unlike nfsv2. Separate problem.
  */
-#define        nfsm_request(v, t, p, c)        \
+#define        nfsm_request(v, t, p, c, x)     \
                  { \
                  int nfsv3 = (VFSTONFS((v)->v_mount))->nm_flag & NFSMNT_NFSV3; \
                 if ((error = nfs_request((v), mreq, (t), (p), \
-                  (c), &mrep, &md, &dpos))) { \
+                  (c), &mrep, &md, &dpos, (x)))) { \
                         if (error & NFSERR_RETERR) \
                                 error &= ~NFSERR_RETERR; \
                         else \
diff --git a/bsd/nfs/nfsnode.h b/bsd/nfs/nfsnode.h

index 829508a1430d5cd3bc1cd9b6163421c6f2a4e701..cf37ec7959d85626e8529fcbd8b7a80ef3504ceb 100644 (file)
--- a/bsd/nfs/nfsnode.h
+++ b/bsd/nfs/nfsnode.h
@@ -66,6 +66,8 @@
  #ifndef _NFS_NFS_H_
  #include <nfs/nfs.h>
  #endif
+#include <sys/lock.h>
+
  
  /*
   * Silly rename structure that hangs off the nfsnode until the name
@@ -108,6 +110,7 @@ struct nfsdmap {
   *     be well aligned and, therefore, tightly packed.
   */
  struct nfsnode {
+       struct lock__bsd__      n_lock; /* the vnode lock */
         LIST_ENTRY(nfsnode)     n_hash;         /* Hash chain */
         CIRCLEQ_ENTRY(nfsnode)  n_timer;        /* Nqnfs timer chain */
         u_quad_t                n_size;         /* Current size of file */
@@ -140,6 +143,7 @@ struct nfsnode {
         short                   n_fhsize;       /* size in bytes, of fh */
         short                   n_flag;         /* Flag for locking.. */
         nfsfh_t                 n_fh;           /* Small File Handle */
+       u_int64_t               n_xid;          /* last xid to loadattr */
  };
  
  #define n_atim         n_un1.nf_atim
@@ -179,6 +183,7 @@ extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
  extern struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
  
  #if defined(KERNEL)
+
  typedef int     vop_t __P((void *));
  extern vop_t   **fifo_nfsv2nodeop_p;
  extern vop_t   **nfsv2_vnodeop_p;
@@ -196,9 +201,10 @@ int        nqnfs_vop_lease_check __P((struct vop_lease_args *));
  int    nfs_abortop __P((struct vop_abortop_args *));
  int    nfs_inactive __P((struct vop_inactive_args *));
  int    nfs_reclaim __P((struct vop_reclaim_args *));
-#define nfs_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock)
-#define nfs_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock)
-#define nfs_islocked ((int (*) __P((struct vop_islocked_args *)))vop_noislocked)
+int nfs_lock __P((struct vop_lock_args *));
+int nfs_unlock __P((struct vop_unlock_args *));
+int nfs_islocked __P((struct vop_islocked_args *));
+
  #define nfs_reallocblks \
         ((int (*) __P((struct  vop_reallocblks_args *)))eopnotsupp)
  
diff --git a/bsd/sys/mbuf.h b/bsd/sys/mbuf.h

index 9638bc398cfa8f12809a3db78790e26ae0337923..ba1bf3bcd8355aead3315c7da8d2468a1231bc8d 100644 (file)
--- a/bsd/sys/mbuf.h
+++ b/bsd/sys/mbuf.h
@@ -286,6 +286,7 @@ extern simple_lock_data_t   mbuf_slock;
         _MINTGET(m, type);                                              \
         if (m) {                                                        \
                 (m)->m_next = (m)->m_nextpkt = 0;                       \
+               (m)->m_len = 0;                                         \
                 (m)->m_type = (type);                                   \
                 (m)->m_data = (m)->m_dat;                               \
                 (m)->m_flags = 0;                                       \
@@ -300,6 +301,7 @@ extern simple_lock_data_t   mbuf_slock;
                 (m)->m_type = (type);                                   \
                 (m)->m_data = (m)->m_pktdat;                            \
                 (m)->m_flags = M_PKTHDR;                                \
+               (m)->m_pkthdr.len = 0;                                  \
                 (m)->m_pkthdr.rcvif = NULL;                             \
                 (m)->m_pkthdr.header = NULL;                            \
                 (m)->m_pkthdr.csum_flags = 0;                           \
diff --git a/bsd/ufs/ufs/ufs_inode.c b/bsd/ufs/ufs/ufs_inode.c

index ea004e531abe107c56080e1cde3b3630f105ff69..b51ee08244ffd6d11e49ad45796a1a55d97f2971 100644 (file)
--- a/bsd/ufs/ufs/ufs_inode.c
+++ b/bsd/ufs/ufs/ufs_inode.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
@@ -157,8 +157,9 @@ ufs_reclaim(vp, p)
          */
         cache_purge(vp);
         if (ip->i_devvp) {
-               vrele(ip->i_devvp);
-               ip->i_devvp = 0;
+               struct vnode *tvp = ip->i_devvp;
+               ip->i_devvp = NULL;
+               vrele(tvp);
         }
  #if QUOTA
         for (i = 0; i < MAXQUOTAS; i++) {
diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c

index 3e9b0a09badfe201681aefdc7aa49df9039afe53..5579b8c9c80a092ffa069b0b3f863fa91fccb3d7 100644 (file)
--- a/bsd/vfs/vfs_bio.c
+++ b/bsd/vfs/vfs_bio.c
@@ -64,6 +64,7 @@
   *     @(#)vfs_bio.c   8.6 (Berkeley) 1/11/94
   */
  
+
  /*
   * Some references:
   *     Bach: The Design of the UNIX Operating System (Prentice Hall, 1986)
@@ -99,7 +100,7 @@ extern void bufq_balance_thread_init();
  extern void reassignbuf(struct buf *, struct vnode *);
  static struct buf *getnewbuf(int slpflag, int slptimeo, int *queue);
  
-extern int niobuf;             /* The number of IO buffer headers for cluster IO */
+extern int niobuf;     /* The number of IO buffer headers for cluster IO */
  int blaundrycnt;
  
  #if TRACE
@@ -632,7 +633,8 @@ brelse(bp)
         long whichq;
  
         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 388)) | DBG_FUNC_START,
-                    bp->b_lblkno * PAGE_SIZE, bp, bp->b_data, bp->b_flags, 0);
+                    bp->b_lblkno * PAGE_SIZE, (int)bp, (int)bp->b_data,
+                    bp->b_flags, 0);
  
         trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
  
@@ -674,7 +676,9 @@ brelse(bp)
                                         upl_flags = 0;
                                 ubc_upl_abort(upl, upl_flags);
                         } else {
-                           if (ISSET(bp->b_flags, (B_DELWRI | B_WASDIRTY)))
+                           if (ISSET(bp->b_flags, B_NEEDCOMMIT))
+                                   upl_flags = UPL_COMMIT_CLEAR_DIRTY ;
+                           else if (ISSET(bp->b_flags, B_DELWRI | B_WASDIRTY))
                                         upl_flags = UPL_COMMIT_SET_DIRTY ;
                                 else
                                     upl_flags = UPL_COMMIT_CLEAR_DIRTY ;
@@ -758,7 +762,7 @@ brelse(bp)
         splx(s);
  
         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 388)) | DBG_FUNC_END,
-                    bp, bp->b_data, bp->b_flags, 0, 0);
+                    (int)bp, (int)bp->b_data, bp->b_flags, 0, 0);
  }
  
  /*
@@ -791,7 +795,8 @@ incore(vp, blkno)
         return (0);
  }
  
-/* XXX FIXME -- Update the comment to reflect the UBC changes -- */
+
+/* XXX FIXME -- Update the comment to reflect the UBC changes (please) -- */
  /*
   * Get a block of requested size that is associated with
   * a given vnode and block offset. If it is found in the
@@ -889,8 +894,11 @@ start:
                                         SET(bp->b_flags, B_PAGELIST);
                                         bp->b_pagelist = upl;
  
-                                       if ( !upl_valid_page(pl, 0))
-                                               panic("getblk: incore buffer without valid page");
+                                       if (!upl_valid_page(pl, 0)) {
+                                               if (vp->v_tag != VT_NFS)
+                                                       panic("getblk: incore buffer without valid page");
+                                               CLR(bp->b_flags, B_CACHE);
+                                       }
  
                                         if (upl_dirty_page(pl, 0))
                                                 SET(bp->b_flags, B_WASDIRTY);
@@ -1112,7 +1120,7 @@ start:
         }
  
         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 386)) | DBG_FUNC_END,
-                    bp, bp->b_data, bp->b_flags, 3, 0);
+                    (int)bp, (int)bp->b_data, bp->b_flags, 3, 0);
  
         return (bp);
  }
@@ -1639,7 +1647,7 @@ biodone(bp)
         funnel_state = thread_funnel_set(kernel_flock, TRUE);
  
         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_START,
-                    bp, bp->b_data, bp->b_flags, 0, 0);
+                    (int)bp, (int)bp->b_data, bp->b_flags, 0, 0);
  
         if (ISSET(bp->b_flags, B_DONE))
                 panic("biodone already");
@@ -1664,7 +1672,7 @@ biodone(bp)
         }
  
         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_END,
-                    bp, bp->b_data, bp->b_flags, 0, 0);
+                    (int)bp, (int)bp->b_data, bp->b_flags, 0, 0);
  
         thread_funnel_set(kernel_flock, funnel_state);
  }
diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c

index fd99cca8d7b2bd369a04235d49524ea7330f2601..be3a4d40c3b04eed2860e67c9a0a1700fdef69d1 100644 (file)
--- a/bsd/vfs/vfs_subr.c
+++ b/bsd/vfs/vfs_subr.c
@@ -635,6 +635,21 @@ insmntque(vp, mp)
         simple_unlock(&mntvnode_slock);
  }
  
+__inline void
+vpwakeup(struct vnode *vp)
+{
+       if (vp) {
+               if (--vp->v_numoutput < 0)
+                       panic("vpwakeup: neg numoutput");
+               if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+                       if (vp->v_numoutput < 0)
+                               panic("vpwakeup: neg numoutput 2");
+                       vp->v_flag &= ~VBWAIT;
+                       wakeup((caddr_t)&vp->v_numoutput);
+               }
+       }
+}
+
  /*
   * Update outstanding I/O count and do wakeup if requested.
   */
@@ -645,16 +660,7 @@ vwakeup(bp)
         register struct vnode *vp;
  
         CLR(bp->b_flags, B_WRITEINPROG);
-       if (vp = bp->b_vp) {
-               if (--vp->v_numoutput < 0)
-                       panic("vwakeup: neg numoutput");
-               if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
-                       if (vp->v_numoutput < 0)
-                               panic("vwakeup: neg numoutput 2");
-                       vp->v_flag &= ~VBWAIT;
-                       wakeup((caddr_t)&vp->v_numoutput);
-               }
-       }
+       vpwakeup(bp->b_vp);
  }
  
  /*
diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c

index 42470db6969519101d811e8fb40b909e17638e5b..d70d27453b141152461ecde82ff110b0dcb09ed7 100644 (file)
--- a/bsd/vfs/vfs_syscalls.c
+++ b/bsd/vfs/vfs_syscalls.c
@@ -357,6 +357,7 @@ checkdirs(olddp)
         struct filedesc *fdp;
         struct vnode *newdp;
         struct proc *p;
+       struct vnode *tvp;
  
         if (olddp->v_usecount == 1)
                 return;
@@ -365,20 +366,23 @@ checkdirs(olddp)
         for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
                 fdp = p->p_fd;
                 if (fdp->fd_cdir == olddp) {
-                       vrele(fdp->fd_cdir);
                         VREF(newdp);
+                       tvp = fdp->fd_cdir;
                         fdp->fd_cdir = newdp;
+                       vrele(tvp);
                 }
                 if (fdp->fd_rdir == olddp) {
-                       vrele(fdp->fd_rdir);
                         VREF(newdp);
+                       tvp = fdp->fd_rdir;
                         fdp->fd_rdir = newdp;
+                       vrele(tvp);
                 }
         }
         if (rootvnode == olddp) {
-               vrele(rootvnode);
                 VREF(newdp);
+               tvp = rootvnode;
                 rootvnode = newdp;
+               vrele(tvp);
         }
         vput(newdp);
  }
@@ -775,7 +779,7 @@ fchdir(p, uap, retval)
         register_t *retval;
  {
         register struct filedesc *fdp = p->p_fd;
-       struct vnode *vp, *tdp;
+       struct vnode *vp, *tdp, *tvp;
         struct mount *mp;
         struct file *fp;
         int error;
@@ -804,8 +808,9 @@ fchdir(p, uap, retval)
                 return (error);
         }
         VOP_UNLOCK(vp, 0, p);
-       vrele(fdp->fd_cdir);
+       tvp = fdp->fd_cdir;
         fdp->fd_cdir = vp;
+       vrele(tvp);
         return (0);
  }
  
@@ -825,13 +830,15 @@ chdir(p, uap, retval)
         register struct filedesc *fdp = p->p_fd;
         int error;
         struct nameidata nd;
+       struct vnode *tvp;
  
         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
             uap->path, p);
         if (error = change_dir(&nd, p))
                 return (error);
-       vrele(fdp->fd_cdir);
+       tvp = fdp->fd_cdir;
         fdp->fd_cdir = nd.ni_vp;
+       vrele(tvp);
         return (0);
  }
  
@@ -851,6 +858,7 @@ chroot(p, uap, retval)
         register struct filedesc *fdp = p->p_fd;
         int error;
         struct nameidata nd;
+       struct vnode *tvp;
  
         if (error = suser(p->p_ucred, &p->p_acflag))
                 return (error);
@@ -865,9 +873,10 @@ chroot(p, uap, retval)
                 return (error);
         }
  
-       if (fdp->fd_rdir != NULL)
-               vrele(fdp->fd_rdir);
+       tvp = fdp->fd_rdir;
         fdp->fd_rdir = nd.ni_vp;
+       if (tvp != NULL)
+               vrele(tvp);
         return (0);
  }
  
diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c

index a16dfb16759b4278374f3a61f0d9fd9b486acd57..b351d715b3f799342a829ccc691e397a51aa3b43 100644 (file)
--- a/bsd/vfs/vfs_vnops.c
+++ b/bsd/vfs/vfs_vnops.c
@@ -431,6 +431,7 @@ vn_ioctl(fp, com, data, p)
         register struct vnode *vp = ((struct vnode *)fp->f_data);
         struct vattr vattr;
         int error;
+       struct vnode *ttyvp;
  
         switch (vp->v_type) {
  
@@ -454,10 +455,11 @@ vn_ioctl(fp, com, data, p)
         case VBLK:
                 error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
                 if (error == 0 && com == TIOCSCTTY) {
-                       if (p->p_session->s_ttyvp)
-                               vrele(p->p_session->s_ttyvp);
-                       p->p_session->s_ttyvp = vp;
                         VREF(vp);
+                       ttyvp = p->p_session->s_ttyvp;
+                       p->p_session->s_ttyvp = vp;
+                       if (ttyvp)
+                               vrele(ttyvp);
                 }
                 return (error);
         }
diff --git a/bsd/vm/vnode_pager.c b/bsd/vm/vnode_pager.c

index dede2616db02e947c38ee72769847a12df19671c..5cdbf447fd56c9f61a528de33d8c8410333c63cc 100644 (file)
--- a/bsd/vm/vnode_pager.c
+++ b/bsd/vm/vnode_pager.c
@@ -89,22 +89,17 @@ vnode_pageout(struct vnode *vp,
         int             result = PAGER_SUCCESS;
         struct proc     *p = current_proc();
         int             error = 0;
-       int vp_size = 0;
         int blkno=0, s;
         int cnt, isize;
         int pg_index;
         int offset;
         struct buf *bp;
         boolean_t       funnel_state;
-       int haveupl=0;
         upl_page_info_t *pl;
         upl_t vpupl = NULL;
  
         funnel_state = thread_funnel_set(kernel_flock, TRUE);
  
-       if (upl != (upl_t)NULL) {
-               haveupl = 1;
-       }
         isize = (int)size;
  
         if (isize < 0)
@@ -117,54 +112,44 @@ vnode_pageout(struct vnode *vp,
         if (UBCINVALID(vp)) {
                 result = PAGER_ERROR;
                 error  = PAGER_ERROR;
+               if (upl && !(flags & UPL_NOCOMMIT))
+                       ubc_upl_abort(upl, 0);
                 goto out;
         }
-       if (haveupl) {
+       if (upl) {
                 /*
-                * This is a pageout form the Default pager,
+                * This is a pageout from the Default pager,
                  * just go ahead and call VOP_PAGEOUT
                  */
                 dp_pgouts++;
-               if (error = VOP_PAGEOUT(vp, upl, upl_offset,
-                        (off_t)f_offset,(size_t)size, p->p_ucred, flags)) {
-                       result = PAGER_ERROR;
-                       error  = PAGER_ERROR;
-               }
+               if (error = VOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset,
+                                       (size_t)size, p->p_ucred, flags))
+                       result = error = PAGER_ERROR;
                 goto out;
         }
-       ubc_create_upl( vp,
-                                       f_offset,
-                                       isize,
-                                       &vpupl,
-                                       &pl,
-                                       UPL_COPYOUT_FROM);
+       ubc_create_upl(vp, f_offset, isize, &vpupl, &pl, UPL_COPYOUT_FROM);
         if (vpupl == (upl_t) 0)
                 return PAGER_ABSENT;
  
-       vp_size = ubc_getsize(vp);
-       if (vp_size == 0) {
-
-               while (isize) {
+       if (ubc_getsize(vp) == 0) {
+               for (offset = 0; isize; isize -= PAGE_SIZE,
+                                       offset += PAGE_SIZE) {
                         blkno = ubc_offtoblk(vp, (off_t)f_offset);
-start0:
-                       if (bp = incore(vp, blkno)) {
-                               if (ISSET(bp->b_flags, B_BUSY)) {
-                                       SET(bp->b_flags, B_WANTED);
-                                       error = tsleep(bp, (PRIBIO + 1), "vnpgout", 0);
-                                       goto start0;
-                               } else {
-                                       bremfree(bp);
-                                       SET(bp->b_flags, (B_BUSY|B_INVAL));
-                               }
-                       }
-                       if (bp)
-                               brelse(bp);
                         f_offset += PAGE_SIZE;
-                       isize    -= PAGE_SIZE;
+                       if ((bp = incore(vp, blkno)) &&
+                           ISSET(bp->b_flags, B_BUSY)) {
+                               ubc_upl_abort_range(vpupl, offset, PAGE_SIZE,
+                                                   UPL_ABORT_FREE_ON_EMPTY);
+                               result = error = PAGER_ERROR;
+                               continue;
+                       } else if (bp) {
+                               bremfree(bp);
+                               SET(bp->b_flags, B_BUSY | B_INVAL);
+                               brelse(bp);
+                       }
+                       ubc_upl_commit_range(vpupl, offset, PAGE_SIZE,
+                                            UPL_COMMIT_FREE_ON_EMPTY);
                 }
-               ubc_upl_commit_range(vpupl, 0, size, UPL_COMMIT_FREE_ON_EMPTY);
-
-               error = 0;
                 goto out;
         }
         pg_index = 0;
@@ -176,8 +161,7 @@ start0:
  
                 if ( !upl_valid_page(pl, pg_index)) {
                         ubc_upl_abort_range(vpupl, offset, PAGE_SIZE,
-                                       UPL_ABORT_FREE_ON_EMPTY);
-                
+                                           UPL_ABORT_FREE_ON_EMPTY);
                         offset += PAGE_SIZE;
                         isize  -= PAGE_SIZE;
                         pg_index++;
@@ -192,28 +176,32 @@ start0:
                          * We also get here from vm_object_terminate()
                          * So all you need to do in these
                          * cases is to invalidate incore buffer if it is there
+                        * Note we must not sleep here if B_BUSY - that is
+                        * a lock inversion which causes deadlock.
                          */
                         blkno = ubc_offtoblk(vp, (off_t)(f_offset + offset));
                         s = splbio();
                         vp_pgoclean++;                  
-start:
-                       if (bp = incore(vp, blkno)) {
-                               if (ISSET(bp->b_flags, B_BUSY)) {
-                                       SET(bp->b_flags, B_WANTED);
-                                       error = tsleep(bp, (PRIBIO + 1), "vnpgout", 0);
-                                       goto start;
-                               } else {
-                                       bremfree(bp);
-                                       SET(bp->b_flags, (B_BUSY|B_INVAL));
-                               }
-                       }
-                       splx(s);
-                       if (bp)
+                       if ((bp = incore(vp, blkno)) &&
+                           ISSET(bp->b_flags, B_BUSY | B_NEEDCOMMIT)) {
+                               splx(s);
+                               ubc_upl_abort_range(vpupl, offset, PAGE_SIZE,
+                                                   UPL_ABORT_FREE_ON_EMPTY);
+                               result = error = PAGER_ERROR;
+                               offset += PAGE_SIZE;
+                               isize -= PAGE_SIZE;
+                               pg_index++;
+                               continue;
+                       } else if (bp) {
+                               bremfree(bp);
+                               SET(bp->b_flags, B_BUSY | B_INVAL );
+                               splx(s);
                                 brelse(bp);
+                       } else
+                               splx(s);
  
                         ubc_upl_commit_range(vpupl, offset, PAGE_SIZE, 
-                                       UPL_COMMIT_FREE_ON_EMPTY);
-
+                                            UPL_COMMIT_FREE_ON_EMPTY);
                         offset += PAGE_SIZE;
                         isize  -= PAGE_SIZE;
                         pg_index++;
@@ -236,12 +224,10 @@ start:
                 xsize = num_of_pages * PAGE_SIZE;
  
                 /*  By defn callee will commit or abort upls */
-               if (error = VOP_PAGEOUT(vp, vpupl, (vm_offset_t) offset,
-                                       (off_t)(f_offset + offset),
-                                       xsize, p->p_ucred, flags & ~UPL_NOCOMMIT)) {
-                       result = PAGER_ERROR;
-                       error  = PAGER_ERROR;
-               }
+               if (error = VOP_PAGEOUT(vp, vpupl, (vm_offset_t)offset,
+                                       (off_t)(f_offset + offset), xsize,
+                                       p->p_ucred, flags & ~UPL_NOCOMMIT))
+                       result = error = PAGER_ERROR;
                 offset += xsize;
                 isize  -= xsize;
                 pg_index += num_of_pages;
@@ -271,52 +257,42 @@ vnode_pagein(
         int             error = 0;
         int             xfer_size;
         boolean_t       funnel_state;
-       int haveupl=0;
         upl_t vpupl = NULL;
         off_t   local_offset;
         unsigned int  ioaddr;
  
         funnel_state = thread_funnel_set(kernel_flock, TRUE);
  
-#if 0
-       if(pl->page_list.npages >1 )
-               panic("vnode_pageout: Can't handle more than one page");
-#endif /* 0 */
-
-       if (pl != (upl_t)NULL) {
-               haveupl = 1;
-       }
         UBCINFOCHECK("vnode_pagein", vp);
  
         if (UBCINVALID(vp)) {
                 result = PAGER_ERROR;
                 error  = PAGER_ERROR;
+               if (pl && !(flags & UPL_NOCOMMIT)) {
+                       ubc_upl_abort(pl, 0);
+               }
                 goto out;
         }
  
-       if (haveupl) {
+       if (pl) {
                 dp_pgins++;
                 if (error = VOP_PAGEIN(vp, pl, pl_offset, (off_t)f_offset,
-                                size,p->p_ucred, flags)) {
+                                      size, p->p_ucred, flags)) {
                         result = PAGER_ERROR;
                 }
         } else {
  
                 local_offset = 0;
                 while (size) {
-                       if((size > 4096) && (vp->v_tag == VT_NFS)) {
+                       if(size > 4096 && vp->v_tag == VT_NFS) {
                                 xfer_size =  4096;
                                 size = size - xfer_size;
                         } else {
                                 xfer_size = size;
                                 size = 0;
                         }
-                       ubc_create_upl( vp,
-                                                       f_offset+local_offset,
-                                                       xfer_size,
-                                                       &vpupl,
-                                                       NULL,
-                                                       UPL_FLAGS_NONE);
+                       ubc_create_upl(vp, f_offset + local_offset, xfer_size,
+                                      &vpupl, NULL, UPL_FLAGS_NONE);
                         if (vpupl == (upl_t) 0) {
                                 result =  PAGER_ABSENT;
                                 error = PAGER_ABSENT;
@@ -327,7 +303,9 @@ vnode_pagein(
  
                         /*  By defn callee will commit or abort upls */
                         if (error = VOP_PAGEIN(vp, vpupl, (vm_offset_t) 0,
-                               (off_t)f_offset+local_offset, xfer_size,p->p_ucred, flags & ~UPL_NOCOMMIT)) {
+                                              (off_t)f_offset + local_offset,
+                                              xfer_size, p->p_ucred,
+                                              flags & ~UPL_NOCOMMIT)) {
                                 result = PAGER_ERROR;
                                 error  = PAGER_ERROR;
                         }
@@ -336,7 +314,7 @@ vnode_pagein(
         }
  out:
         if (errorp)
-           *errorp = result;
+               *errorp = result;
         thread_funnel_set(kernel_flock, funnel_state);
  
         return (error);
diff --git a/iokit/Drivers/platform/drvAppleRootDomain/RootDomain.cpp b/iokit/Drivers/platform/drvAppleRootDomain/RootDomain.cpp

index 7fa5e30679b0e966ec6b98a8a6c90035c3908d7f..ebc0a8b23f0484cbfab1af002ec9711da383de45 100644 (file)
--- a/iokit/Drivers/platform/drvAppleRootDomain/RootDomain.cpp
+++ b/iokit/Drivers/platform/drvAppleRootDomain/RootDomain.cpp
@@ -813,8 +813,6 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon,
          case kIOMessageSystemWillSleep:
              rootDomain->powerOverrideOnPriv();         // start ignoring children's requests
                                                          // (fall through to other cases)
-        case kIOMessageSystemWillPowerOff:
-        case kIOMessageSystemWillRestart:
  
              // Interested applications have been notified of an impending power
              // change and have acked (when applicable).
@@ -841,6 +839,11 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon,
                  thread_call_enter1(rootDomain->diskSyncCalloutEntry, (thread_call_param_t)params->powerRef);
              ret = kIOReturnSuccess;
              break;
+
+        case kIOMessageSystemWillPowerOff:
+        case kIOMessageSystemWillRestart:
+            break;
+
          default:
              ret = kIOReturnUnsupported;
              break;
diff --git a/iokit/Kernel/IODeviceTreeSupport.cpp b/iokit/Kernel/IODeviceTreeSupport.cpp

index de364b87cae06e7bdaaffb05b92486cd54d85f7f..3c75dceb2d8b693a8576a3250812443c7afec551 100644 (file)
--- a/iokit/Kernel/IODeviceTreeSupport.cpp
+++ b/iokit/Kernel/IODeviceTreeSupport.cpp
@@ -616,6 +616,7 @@ bool IODTMapInterrupts( IORegistryEntry * regEntry )
              map = OSData::withData( local, mapped->getCount() * sizeof( UInt32),
                                 sizeof( UInt32));
              controller = gIODTDefaultInterruptController;
+            controller->retain();
          }
  
          localBits += skip;
@@ -734,34 +735,41 @@ bool IODTMatchNubWithKeys( IORegistryEntry * regEntry,
  OSCollectionIterator * IODTFindMatchingEntries( IORegistryEntry * from,
                         IOOptionBits options, const char * keys )
  {
-    OSSet                                      *result;
+    OSSet                                      *result = 0;
      IORegistryEntry                    *next;
      IORegistryIterator         *iter;
      OSCollectionIterator       *cIter;
      bool                                       cmp;
      bool                                       minus = options & kIODTExclusive;
  
-    result = OSSet::withCapacity( 3 );
-    if( !result)
-               return( 0);
  
      iter = IORegistryIterator::iterateOver( from, gIODTPlane,
                 (options & kIODTRecursive) ? kIORegistryIterateRecursively : 0 );
      if( iter) {
-        while( (next = iter->getNextObject())) {
+        do {
  
-            // Look for existence of a debug property to skip
-            if( next->getProperty("AAPL,ignore"))
-                               continue;
+            if( result)
+                result->release();
+            result = OSSet::withCapacity( 3 );
+            if( !result)
+                break;
  
-               if( keys) {
-                cmp = IODTMatchNubWithKeys( next, keys );
-                if( (minus && (false == cmp))
-                       || ((false == minus) && (false != cmp)) )
+            iter->reset();
+            while( (next = iter->getNextObject())) {
+    
+                // Look for existence of a debug property to skip
+                if( next->getProperty("AAPL,ignore"))
+                                    continue;
+    
+                    if( keys) {
+                    cmp = IODTMatchNubWithKeys( next, keys );
+                    if( (minus && (false == cmp))
+                            || ((false == minus) && (false != cmp)) )
+                        result->setObject( next);
+                    } else
                      result->setObject( next);
-               } else
-                result->setObject( next);
-        }
+            }
+        } while( !iter->isValid());
          iter->release();
      }
  
diff --git a/iokit/Kernel/IONVRAM.cpp b/iokit/Kernel/IONVRAM.cpp

index e4bc666b2a3820a98d76f351e15d68a4744871b9..703dc94f47daa86189b228547a9b47e5402f2e5b 100644 (file)
--- a/iokit/Kernel/IONVRAM.cpp
+++ b/iokit/Kernel/IONVRAM.cpp
@@ -280,8 +280,7 @@ IOReturn IODTNVRAM::setProperties(OSObject *properties)
  IOReturn IODTNVRAM::readXPRAM(IOByteCount offset, UInt8 *buffer,
                               IOByteCount length)
  {
-  if ((_nvramImage == 0) || (_xpramPartitionOffset == 0))
-    return kIOReturnNotReady;
+  if (_xpramImage == 0) return kIOReturnUnsupported;
    
    if ((buffer == 0) || (length <= 0) || (offset < 0) ||
        (offset + length > kIODTNVRAMXPRAMSize))
@@ -295,8 +294,7 @@ IOReturn IODTNVRAM::readXPRAM(IOByteCount offset, UInt8 *buffer,
  IOReturn IODTNVRAM::writeXPRAM(IOByteCount offset, UInt8 *buffer,
                                IOByteCount length)
  {
-  if ((_nvramImage == 0) || (_xpramPartitionOffset == 0))
-    return kIOReturnNotReady;
+  if (_xpramImage == 0) return kIOReturnUnsupported;
    
    if ((buffer == 0) || (length <= 0) || (offset < 0) ||
        (offset + length > kIODTNVRAMXPRAMSize))
diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp

index 7faa1698647e73b42b7b7cfae25b2ba81a97f786..0f07f4e757f90745c4d30fb98f2030081d2fab41 100644 (file)
--- a/iokit/Kernel/IOPlatformExpert.cpp
+++ b/iokit/Kernel/IOPlatformExpert.cpp
@@ -32,7 +32,7 @@
  #include <IOKit/IOKitDebug.h>
  #include <IOKit/IOWorkLoop.h>
  #include <IOKit/pwr_mgt/RootDomain.h>
-
+#include <IOKit/IOMessage.h>
  #include <libkern/c++/OSContainers.h>
  
  
@@ -64,6 +64,8 @@ OSMetaClassDefineReservedUnused(IOPlatformExpert, 10);
  OSMetaClassDefineReservedUnused(IOPlatformExpert, 11);
  
  static IOPlatformExpert * gIOPlatform;
+static OSDictionary * gIOInterruptControllers;
+static IOLock * gIOInterruptControllersLock;
  
  OSSymbol * gPlatformInterruptControllerName;
  
@@ -86,6 +88,9 @@ bool IOPlatformExpert::start( IOService * provider )
      if (!super::start(provider))
        return false;
      
+    gIOInterruptControllers = OSDictionary::withCapacity(1);
+    gIOInterruptControllersLock = IOLockAlloc();
+    
      // Correct the bus frequency in the device tree.
      busFrequency = OSData::withBytesNoCopy((void *)&gPEClockFrequencyInfo.bus_clock_rate_hz, 4);
      provider->setProperty("clock-frequency", busFrequency);
@@ -255,21 +260,36 @@ IOReturn IOPlatformExpert::setConsoleInfo( PE_Video * consoleInfo,
  
  IOReturn IOPlatformExpert::registerInterruptController(OSSymbol *name, IOInterruptController *interruptController)
  {
-  publishResource(name, interruptController);
+  IOLockLock(gIOInterruptControllersLock);
+  
+  gIOInterruptControllers->setObject(name, interruptController);
+  
+  thread_wakeup(gIOInterruptControllers);
+  
+  IOLockUnlock(gIOInterruptControllersLock);
    
    return kIOReturnSuccess;
  }
  
  IOInterruptController *IOPlatformExpert::lookUpInterruptController(OSSymbol *name)
  {
-  IOInterruptController *interruptController;
-  IOService             *service;
-  
-  service = waitForService(resourceMatching(name));
+  OSObject              *object;
    
-  interruptController = OSDynamicCast(IOInterruptController, service->getProperty(name));  
+  while (1) {
+    IOLockLock(gIOInterruptControllersLock);
+    
+    object = gIOInterruptControllers->getObject(name);
+    
+    if (object == 0) assert_wait(gIOInterruptControllers, THREAD_UNINT);
+    
+    IOLockUnlock(gIOInterruptControllersLock);
+    
+    if (object != 0) break;
+    
+    thread_block(0);
+  }
    
-  return interruptController;
+  return OSDynamicCast(IOInterruptController, object);
  }
  
  
@@ -628,6 +648,18 @@ static void getCStringForObject (OSObject * inObj, char * outStr)
     }
  }
  
+/* IOPMPanicOnShutdownHang
+ * - Called from a timer installed by PEHaltRestart
+ */
+static void IOPMPanicOnShutdownHang(thread_call_param_t p0, thread_call_param_t p1)
+{
+    int type = (int)p0;
+
+    /* 30 seconds has elapsed - resume shutdown */
+    gIOPlatform->haltRestart(type);
+}
+
+
  extern "C" {
  
  /*
@@ -660,6 +692,35 @@ int PEGetPlatformEpoch(void)
  
  int PEHaltRestart(unsigned int type)
  {
+  IOPMrootDomain    *pmRootDomain = IOService::getPMRootDomain();
+  bool              noWaitForResponses;
+  AbsoluteTime      deadline;
+  thread_call_t     shutdown_hang;
+  
+  /* Notify IOKit PM clients of shutdown/restart
+     Clients subscribe to this message with a call to
+     IOService::registerInterest()
+  */
+  
+  /* Spawn a thread that will panic in 30 seconds. 
+     If all goes well the machine will be off by the time
+     the timer expires.
+   */
+  shutdown_hang = thread_call_allocate( &IOPMPanicOnShutdownHang, (thread_call_param_t) type);
+  clock_interval_to_deadline( 30, kSecondScale, &deadline );
+  thread_call_enter1_delayed( shutdown_hang, 0, deadline );
+  
+  noWaitForResponses = pmRootDomain->tellChangeDown2(type); 
+  /* This notification should have few clients who all do 
+     their work synchronously.
+           
+     In this "shutdown notification" context we don't give
+     drivers the option of working asynchronously and responding 
+     later. PM internals make it very hard to wait for asynchronous
+     replies. In fact, it's a bad idea to even be calling
+     tellChangeDown2 from here at all.
+   */ 
+
    if (gIOPlatform) return gIOPlatform->haltRestart(type);
    else return -1;
  }
diff --git a/iokit/Kernel/IORegistryEntry.cpp b/iokit/Kernel/IORegistryEntry.cpp

index e309047d4c67e4223ac06641b082b09883c396e8..4040b3cd0d25948d74c4861d75ce1eca0a2fec05 100644 (file)
--- a/iokit/Kernel/IORegistryEntry.cpp
+++ b/iokit/Kernel/IORegistryEntry.cpp
@@ -440,12 +440,15 @@ bool IORegistryEntry::init( IORegistryEntry * old,
      WLOCK;
  
      fPropertyTable = old->getPropertyTable();
-    old->fPropertyTable = 0;
+    fPropertyTable->retain();
  #ifdef IOREGSPLITTABLES
      fRegistryTable = old->fRegistryTable;
-    old->fRegistryTable = 0;
+    old->fRegistryTable = OSDictionary::withDictionary( fRegistryTable );
  #endif /* IOREGSPLITTABLES */
  
+    old->registryTable()->removeObject( plane->keys[ kParentSetIndex ] );
+    old->registryTable()->removeObject( plane->keys[ kChildSetIndex ] );
+
      all = getParentSetReference( plane );
      if( all) for( index = 0;
                (next = (IORegistryEntry *) all->getObject(index));
@@ -1815,6 +1818,8 @@ unsigned int IORegistryEntry::getDepth( const IORegistryPlane * plane ) const
  
  OSDefineMetaClassAndStructors(IORegistryIterator, OSIterator)
  
+enum { kIORegistryIteratorInvalidFlag = 0x80000000 };
+
  /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
  
  IORegistryIterator *
@@ -1838,7 +1843,7 @@ IORegistryIterator::iterateOver( IORegistryEntry * root,
              create->where = &create->start;
              create->start.current = root;
              create->plane = plane;
-            create->options = options;
+            create->options = options & ~kIORegistryIteratorInvalidFlag;
  
         } else {
             create->release();
@@ -1860,10 +1865,12 @@ bool IORegistryIterator::isValid( void )
      bool               ok;
      IORegCursor *      next;
  
-    ok = true;
      next = where;
  
      RLOCK;
+
+    ok = (0 == (kIORegistryIteratorInvalidFlag & options));
+
      while( ok && next) {
         if( where->iter)
              ok = where->iter->isValid();
@@ -1927,6 +1934,7 @@ void IORegistryIterator::reset( void )
      }
  
      where->current = root;
+    options &= ~kIORegistryIteratorInvalidFlag;
  }
  
  void IORegistryIterator::free( void )
@@ -1962,11 +1970,15 @@ IORegistryEntry * IORegistryIterator::getNextObjectFlat( void )
          if( where->current)
              where->current->release();
  
-    if( where->iter)
+    if( where->iter) {
+
          next = (IORegistryEntry *) where->iter->getNextObject();
  
-    if( next)
-       next->retain();
+        if( next)
+            next->retain();
+        else if( !where->iter->isValid())
+            options |= kIORegistryIteratorInvalidFlag;
+    }
  
      where->current = next;
  
diff --git a/iokit/Kernel/IOService.cpp b/iokit/Kernel/IOService.cpp

index e84f9d9c041e511f1c7d0221357ed9fb10599de0..4bd1a2afa5dbb3b25da6a4202c680066423b3ecb 100644 (file)
--- a/iokit/Kernel/IOService.cpp
+++ b/iokit/Kernel/IOService.cpp
@@ -4138,20 +4138,14 @@ void IOService::setDeviceMemory( OSArray * array )
  IOReturn IOService::resolveInterrupt(IOService *nub, int source)
  {
    IOInterruptController *interruptController;
-  OSDictionary          *propTable;
    OSArray               *array;
    OSData                *data;
    OSSymbol              *interruptControllerName;
    long                  numSources;
    IOInterruptSource     *interruptSources;
    
-  // Get the property table from the nub.
-  propTable = nub->getPropertyTable();
-  if (propTable == 0) return kIOReturnNoResources;
-
-  // Get the parents list from the property table.
-  array = OSDynamicCast(OSArray,
-                       propTable->getObject(gIOInterruptControllersKey));
+  // Get the parents list from the nub.
+  array = OSDynamicCast(OSArray, nub->getProperty(gIOInterruptControllersKey));
    if (array == 0) return kIOReturnNoResources;
    
    // Allocate space for the IOInterruptSources if needed... then return early.
@@ -4173,9 +4167,8 @@ IOReturn IOService::resolveInterrupt(IOService *nub, int source)
    interruptController = getPlatform()->lookUpInterruptController(interruptControllerName);
    if (interruptController == 0) return kIOReturnNoResources;
    
-  // Get the interrupt numbers from the property table.
-  array = OSDynamicCast(OSArray,
-                       propTable->getObject(gIOInterruptSpecifiersKey));
+  // Get the interrupt numbers from the nub.
+  array = OSDynamicCast(OSArray, nub->getProperty(gIOInterruptSpecifiersKey));
    if (array == 0) return kIOReturnNoResources;
    data = OSDynamicCast(OSData, array->getObject(source));
    if (data == 0) return kIOReturnNoResources;
diff --git a/iokit/KernelConfigTables.cpp b/iokit/KernelConfigTables.cpp

index deff7b74e936817fa5d07789a0b0df787e2d3156..33c8a75b9b3f9ef23608c22ccbca179eab28e704 100644 (file)
--- a/iokit/KernelConfigTables.cpp
+++ b/iokit/KernelConfigTables.cpp
@@ -28,11 +28,11 @@
   */
  const char * gIOKernelKmods =
  "{
-    'com.apple.kernel'                         = '5.2';
-    'com.apple.kernel.bsd'                     = '5.2';
-    'com.apple.kernel.iokit'                   = '5.2';
-    'com.apple.kernel.libkern'                 = '5.2';
-    'com.apple.kernel.mach'                    = '5.2';
+    'com.apple.kernel'                         = '5.3';
+    'com.apple.kernel.bsd'                     = '5.3';
+    'com.apple.kernel.iokit'                   = '5.3';
+    'com.apple.kernel.libkern'                 = '5.3';
+    'com.apple.kernel.mach'                    = '5.3';
      'com.apple.iokit.IOADBFamily'              = '1.1';
      'com.apple.iokit.IOSystemManagementFamily' = '1.1';
  }";
diff --git a/iokit/conf/version.minor b/iokit/conf/version.minor

index 0cfbf08886fca9a91cb753ec8734c84fcbe52c9f..00750edc07d6415dcc07ae0351e9397b0222b7ba 100644 (file)
--- a/iokit/conf/version.minor
+++ b/iokit/conf/version.minor
@@ -1 +1 @@
-2
+3
diff --git a/iokit/conf/version.variant b/iokit/conf/version.variant

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8b137891791fe96927ad78e64b0aad7bded08bdc 100644 (file)
--- a/iokit/conf/version.variant
+++ b/iokit/conf/version.variant
@@ -0,0 +1 @@
+
diff --git a/libkern/conf/version.minor b/libkern/conf/version.minor

index 0cfbf08886fca9a91cb753ec8734c84fcbe52c9f..00750edc07d6415dcc07ae0351e9397b0222b7ba 100644 (file)
--- a/libkern/conf/version.minor
+++ b/libkern/conf/version.minor
@@ -1 +1 @@
-2
+3
diff --git a/libkern/ppc/OSAtomic.s b/libkern/ppc/OSAtomic.s

index 39481085db98673826604017716b0d9e5a14a64d..a2aeb032d7c9803edf0b1090a3140e568dd668bd 100644 (file)
--- a/libkern/ppc/OSAtomic.s
+++ b/libkern/ppc/OSAtomic.s
@@ -55,8 +55,6 @@ int OSCompareAndSwap( UInt32 oldVal, UInt32 newVal, UInt32 * addr )
  
  
         ENTRY   _OSCompareAndSwap
-
-       lwarx   r6,     0,r5            /* CEMV10 */
  .L_CASretry:
         lwarx   r6,     0,r5
         cmpw    r6,     r3
@@ -94,14 +92,9 @@ SInt32       OSAddAtomic(SInt32 amount, SInt32 * value)
         ENTRY   _OSAddAtomic
  
         mr      r5,r3                           /* Save the increment */
-       lwarx   r3,0,r4                         /* CEMV10 */
-
  .L_AAretry:
         lwarx   r3, 0, r4                       /* Grab the area value */
         add     r6, r3, r5                      /* Add the value */
         stwcx.  r6, 0, r4                       /* Try to save the new value */
         bne-    .L_AAretry                      /* Didn't get it, try again... */
         blr                                     /* Return the original value */
-
-
-
diff --git a/libsa/conf/version.minor b/libsa/conf/version.minor

index 0cfbf08886fca9a91cb753ec8734c84fcbe52c9f..00750edc07d6415dcc07ae0351e9397b0222b7ba 100644 (file)
--- a/libsa/conf/version.minor
+++ b/libsa/conf/version.minor
@@ -1 +1 @@
-2
+3
diff --git a/osfmk/conf/kernelversion.minor b/osfmk/conf/kernelversion.minor

index 0cfbf08886fca9a91cb753ec8734c84fcbe52c9f..00750edc07d6415dcc07ae0351e9397b0222b7ba 100644 (file)
--- a/osfmk/conf/kernelversion.minor
+++ b/osfmk/conf/kernelversion.minor
@@ -1 +1 @@
-2
+3
diff --git a/osfmk/conf/version.minor b/osfmk/conf/version.minor

index 0cfbf08886fca9a91cb753ec8734c84fcbe52c9f..00750edc07d6415dcc07ae0351e9397b0222b7ba 100644 (file)
--- a/osfmk/conf/version.minor
+++ b/osfmk/conf/version.minor
@@ -1 +1 @@
-2
+3
diff --git a/osfmk/default_pager/dp_backing_store.c b/osfmk/default_pager/dp_backing_store.c

index effec727ca954115c2b4ea9fdb2c828db116ebe4..34f6010593232840ce48d14ee47666fe6edec533 100644 (file)
--- a/osfmk/default_pager/dp_backing_store.c
+++ b/osfmk/default_pager/dp_backing_store.c
@@ -3499,9 +3499,6 @@ vs_cluster_transfer(
                         if (error == KERN_SUCCESS) {
                                 error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset, 
                                                         size, &residual, 0);
-                               if(error)
-                                       upl_commit(upl, NULL);
-                                       upl_deallocate(upl);
                         }
                                         
  #else
diff --git a/pexpert/conf/version.minor b/pexpert/conf/version.minor

index 0cfbf08886fca9a91cb753ec8734c84fcbe52c9f..00750edc07d6415dcc07ae0351e9397b0222b7ba 100644 (file)
--- a/pexpert/conf/version.minor
+++ b/pexpert/conf/version.minor
@@ -1 +1 @@
-2
+3
author	Apple <opensource@apple.com>
	Sun, 10 Aug 2003 07:38:02 +0000 (07:38 +0000)
committer	Apple <opensource@apple.com>
	Sun, 10 Aug 2003 07:38:02 +0000 (07:38 +0000)
bsd/conf/version.minor		patch \| blob \| blame \| history
bsd/hfs/hfs_vfsops.c		patch \| blob \| blame \| history
bsd/isofs/cd9660/cd9660_node.c		patch \| blob \| blame \| history
bsd/kern/bsd_init.c		patch \| blob \| blame \| history
bsd/kern/kern_descrip.c		patch \| blob \| blame \| history
bsd/kern/kern_exec.c		patch \| blob \| blame \| history
bsd/kern/kern_exit.c		patch \| blob \| blame \| history
bsd/kern/ubc_subr.c		patch \| blob \| blame \| history
bsd/kern/uipc_mbuf.c		patch \| blob \| blame \| history
bsd/kern/uipc_socket.c		patch \| blob \| blame \| history
bsd/kern/uipc_socket2.c		patch \| blob \| blame \| history
bsd/kern/uipc_usrreq.c		patch \| blob \| blame \| history
bsd/miscfs/specfs/spec_vnops.c		patch \| blob \| blame \| history
bsd/miscfs/synthfs/synthfs_vfsops.c		patch \| blob \| blame \| history
bsd/netat/adsp_stream.c		patch \| blob \| blame \| history
bsd/netinet/ip_output.c		patch \| blob \| blame \| history
bsd/netinet/tcp_input.c		patch \| blob \| blame \| history
bsd/netinet/tcp_output.c		patch \| blob \| blame \| history
bsd/nfs/nfs.h		patch \| blob \| blame \| history
bsd/nfs/nfs_bio.c		patch \| blob \| blame \| history
bsd/nfs/nfs_node.c		patch \| blob \| blame \| history
bsd/nfs/nfs_nqlease.c		patch \| blob \| blame \| history
bsd/nfs/nfs_serv.c		patch \| blob \| blame \| history
bsd/nfs/nfs_socket.c		patch \| blob \| blame \| history
bsd/nfs/nfs_subs.c		patch \| blob \| blame \| history
bsd/nfs/nfs_vfsops.c		patch \| blob \| blame \| history
bsd/nfs/nfs_vnops.c		patch \| blob \| blame \| history
bsd/nfs/nfsm_subs.h		patch \| blob \| blame \| history
bsd/nfs/nfsnode.h		patch \| blob \| blame \| history
bsd/sys/mbuf.h		patch \| blob \| blame \| history
bsd/ufs/ufs/ufs_inode.c		patch \| blob \| blame \| history
bsd/vfs/vfs_bio.c		patch \| blob \| blame \| history
bsd/vfs/vfs_subr.c		patch \| blob \| blame \| history
bsd/vfs/vfs_syscalls.c		patch \| blob \| blame \| history
bsd/vfs/vfs_vnops.c		patch \| blob \| blame \| history
bsd/vm/vnode_pager.c		patch \| blob \| blame \| history
iokit/Drivers/platform/drvAppleRootDomain/RootDomain.cpp		patch \| blob \| blame \| history
iokit/Kernel/IODeviceTreeSupport.cpp		patch \| blob \| blame \| history
iokit/Kernel/IONVRAM.cpp		patch \| blob \| blame \| history
iokit/Kernel/IOPlatformExpert.cpp		patch \| blob \| blame \| history
iokit/Kernel/IORegistryEntry.cpp		patch \| blob \| blame \| history
iokit/Kernel/IOService.cpp		patch \| blob \| blame \| history
iokit/KernelConfigTables.cpp		patch \| blob \| blame \| history
iokit/conf/version.minor		patch \| blob \| blame \| history
iokit/conf/version.variant		patch \| blob \| blame \| history
libkern/conf/version.minor		patch \| blob \| blame \| history
libkern/ppc/OSAtomic.s		patch \| blob \| blame \| history
libsa/conf/version.minor		patch \| blob \| blame \| history
osfmk/conf/kernelversion.minor		patch \| blob \| blame \| history
osfmk/conf/version.minor		patch \| blob \| blame \| history
osfmk/default_pager/dp_backing_store.c		patch \| blob \| blame \| history
pexpert/conf/version.minor		patch \| blob \| blame \| history