xnu-517.tar.gz

[apple/xnu.git] / bsd / hfs / hfs_vnops.c
diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c

index 7412131ca71a2a55a59637b7038ac9a5027da6bc..1c6444b2df4f50e15e36de1327c2d703c94ef309 100644 (file)
--- a/bsd/hfs/hfs_vnops.c
+++ b/bsd/hfs/hfs_vnops.c
@@ -1,21 +1,24 @@
  /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
   * 
   * @APPLE_LICENSE_HEADER_END@
   */
@@ -32,6 +35,8 @@
  #include <sys/namei.h>
  #include <sys/ubc.h>
  #include <sys/quota.h>
+#include <sys/time.h>
+#include <sys/disk.h>
  
  #include <miscfs/specfs/specdev.h>
  #include <miscfs/fifofs/fifo.h>
@@ -55,12 +60,16 @@
  #define MAKE_DELETED_NAME(NAME,FID) \
             (void) sprintf((NAME), "%s%d", HFS_DELETE_PREFIX, (FID))
  
+#define KNDETACH_VNLOCKED 0x00000001
+
+#define CARBON_TEMP_DIR_NAME   "Cleanup At Startup"
  
-extern uid_t console_user;
  
  /* Global vfs data structures for hfs */
  
  
+extern unsigned long strtoul(const char *, char **, int);
+
  extern int groupmember(gid_t gid, struct ucred *cred);
  
  static int hfs_makenode(int mode, struct vnode *dvp, struct vnode **vpp,
@@ -71,6 +80,19 @@ static int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp,
  
  static int hfs_metasync(struct hfsmount *hfsmp, daddr_t node, struct proc *p);
  
+static int hfs_removedir(struct vnode *, struct vnode *, struct componentname *,
+                         int);
+
+static int hfs_removefile(struct vnode *, struct vnode *, struct componentname *,
+                          int);
+
+/* Options for hfs_removedir and hfs_removefile */
+#define HFSRM_PARENT_LOCKED    0x01
+#define HFSRM_SKIP_RESERVE     0x02
+#define HFSRM_SAVE_NAME        0x04
+#define HFSRM_RENAMEOPTS       0x07
+
+
  int hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean considerFlags);
  
  int hfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred,
@@ -195,6 +217,8 @@ hfs_open(ap)
         } */ *ap;
  {
         struct vnode *vp = ap->a_vp;
+       struct filefork *fp = VTOF(vp);
+       struct timeval tv;
  
         /*
          * Files marked append-only must be opened for appending.
@@ -203,6 +227,36 @@ hfs_open(ap)
             (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
                 return (EPERM);
  
+       if (ap->a_mode & O_EVTONLY) {
+               if (vp->v_type == VREG) {
+                       ++VTOF(vp)->ff_evtonly_refs;
+               } else {
+                       ++VTOC(vp)->c_evtonly_refs;
+               };
+       };
+
+       /*
+        * On the first (non-busy) open of a fragmented
+        * file attempt to de-frag it (if its less than 20MB).
+        */
+       if ((VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) ||
+           !UBCISVALID(vp) || ubc_isinuse(vp, 1)) {
+               return (0);
+       }
+       fp = VTOF(vp);
+       if (fp->ff_blocks &&
+           fp->ff_extents[7].blockCount != 0 &&
+           fp->ff_size <= (20 * 1024 * 1024)) {
+               /* 
+                * Wait until system bootup is done (3 min).
+                */
+               microuptime(&tv);
+               if (tv.tv_sec < (60 * 3)) {
+                       return (0);
+               }
+               (void) hfs_relocate(vp, VTOVCB(vp)->nextAllocation + 4096, ap->a_cred, ap->a_p);
+       }
+
         return (0);
  }
  
@@ -247,6 +301,14 @@ hfs_close(ap)
         }
         simple_unlock(&vp->v_interlock);
  
+       if (ap->a_fflag & O_EVTONLY) {
+               if (vp->v_type == VREG) {
+                       --VTOF(vp)->ff_evtonly_refs;
+               } else {
+                       --VTOC(vp)->c_evtonly_refs;
+               };
+       };
+
         /*
          * VOP_CLOSE can be called with vp locked (from vclean).
          * We check for this case using VOP_ISLOCKED and bail.
@@ -258,7 +320,9 @@ hfs_close(ap)
  
         leof = fp->ff_size;
         
-       if ((fp->ff_blocks > 0) && !ISSET(cp->c_flag, C_DELETED)) {
+       if ((fp->ff_blocks > 0) &&
+           !ISSET(cp->c_flag, C_DELETED) &&
+           ((VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) == 0)) {
                 enum vtype our_type = vp->v_type;
                 u_long our_id = vp->v_id;
                 int was_nocache = ISSET(vp->v_flag, VNOCACHE_DATA);
@@ -331,6 +395,8 @@ hfs_close(ap)
                 }
                 VOP_UNLOCK(vp, 0, p);
         }
+       if ((vp->v_flag & VSYSTEM) && (vp->v_usecount == 1))
+               vgone(vp);
         return (0);
  }
  
@@ -373,7 +439,7 @@ hfs_access(ap)
                 case VDIR:
                 case VLNK:
                 case VREG:
-                       if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+                       if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                                 return (EROFS);
  #if QUOTA
                         if ((error = hfs_getinoquota(cp)))
@@ -381,20 +447,20 @@ hfs_access(ap)
  #endif /* QUOTA */
                         break;
                 }
+               /* If immutable bit set, nobody gets to write it. */
+               if (cp->c_flags & IMMUTABLE)
+                       return (EPERM);
         }
  
-       /* If immutable bit set, nobody gets to write it. */
-       if ((mode & VWRITE) && (cp->c_flags & IMMUTABLE))
-               return (EPERM);
  
         /* Otherwise, user id 0 always gets access. */
-       if (ap->a_cred->cr_uid == 0)
+       if (cred->cr_uid == 0)
                 return (0);
  
         mask = 0;
  
         /* Otherwise, check the owner. */
-       if (hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, ap->a_p, false) == 0) {
+       if ( (cp->c_uid == cred->cr_uid) || (cp->c_uid == UNKNOWNUID) ) {
                 if (mode & VEXEC)
                         mask |= S_IXUSR;
                 if (mode & VREAD)
@@ -461,6 +527,8 @@ hfs_getattr(ap)
         CTIMES(cp, &tv, &tv);
  
         vap->va_type = vp->v_type;
+       vap->va_mode = cp->c_mode;
+       vap->va_nlink = cp->c_nlink;
         /*
          * [2856576]  Since we are dynamically changing the owner, also
          * effectively turn off the set-user-id and set-group-id bits,
@@ -468,9 +536,12 @@ hfs_getattr(ap)
          * a security hole where set-user-id programs run as whoever is
          * logged on (or root if nobody is logged in yet!)
          */
-       vap->va_mode = (cp->c_uid == UNKNOWNUID) ? cp->c_mode & ~(S_ISUID | S_ISGID) : cp->c_mode;
-       vap->va_nlink = cp->c_nlink;
-       vap->va_uid = (cp->c_uid == UNKNOWNUID) ? console_user : cp->c_uid;
+       if (cp->c_uid == UNKNOWNUID) {
+               vap->va_mode &= ~(S_ISUID | S_ISGID);
+               vap->va_uid = ap->a_cred->cr_uid;
+       } else {
+               vap->va_uid = cp->c_uid;
+       }
         vap->va_gid = cp->c_gid;
         vap->va_fsid = cp->c_dev;
         /*
@@ -497,7 +568,6 @@ hfs_getattr(ap)
         vap->va_rdev = 0;
         vap->va_blocksize = VTOVFS(vp)->mnt_stat.f_iosize;
         vap->va_filerev = 0;
-       vap->va_spare = 0;
         if (vp->v_type == VDIR) {
                 vap->va_size = cp->c_nlink * AVERAGE_HFSDIRENTRY_SIZE;
                 vap->va_bytes = 0;
@@ -550,8 +620,19 @@ hfs_setattr(ap)
                 return (EINVAL);
         }
  
+       // XXXdbg
+       // don't allow people to set the attributes of symlinks
+       // (nfs has a bad habit of doing ths and it can cause
+       // problems for journaling).
+       //
+       if (vp->v_type == VLNK) {
+               return 0;
+       }
+
+
+
         if (vap->va_flags != VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+               if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                         return (EROFS);
                 if ((error = hfs_chflags(vp, vap->va_flags, cred, p)))
                         return (error);
@@ -566,7 +647,7 @@ hfs_setattr(ap)
         if (VTOHFS(vp)->jnl && cp->c_datafork) {
                 struct HFSPlusExtentDescriptor *extd;
  
-               extd = &cp->c_datafork->ff_data.cf_extents[0];
+               extd = &cp->c_datafork->ff_extents[0];
                 if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
                         return EPERM;
                 }
@@ -576,7 +657,7 @@ hfs_setattr(ap)
          * Go through the fields and update iff not VNOVAL.
          */
         if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+               if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                         return (EROFS);
                 if ((error = hfs_chown(vp, vap->va_uid, vap->va_gid, cred, p)))
                         return (error);
@@ -592,7 +673,7 @@ hfs_setattr(ap)
                         return (EISDIR);
                 case VLNK:
                 case VREG:
-                       if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+                       if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                                 return (EROFS);
                         break;
                 default:
@@ -603,7 +684,7 @@ hfs_setattr(ap)
         }
         cp = VTOC(vp);
         if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+               if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                         return (EROFS);
                 if (((error = hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, p, true)) != 0) &&
                     ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
@@ -635,10 +716,11 @@ hfs_setattr(ap)
         }
         error = 0;
         if (vap->va_mode != (mode_t)VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+               if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                         return (EROFS);
                 error = hfs_chmod(vp, (int)vap->va_mode, cred, p);
         }
+       HFS_KNOTE(vp, NOTE_ATTRIB);
         return (error);
  }
  
@@ -647,6 +729,7 @@ hfs_setattr(ap)
   * Change the mode on a file.
   * cnode must be locked before calling.
   */
+__private_extern__
  int
  hfs_chmod(vp, mode, cred, p)
         register struct vnode *vp;
@@ -664,7 +747,7 @@ hfs_chmod(vp, mode, cred, p)
         if (VTOHFS(vp)->jnl && cp && cp->c_datafork) {
                 struct HFSPlusExtentDescriptor *extd;
  
-               extd = &cp->c_datafork->ff_data.cf_extents[0];
+               extd = &cp->c_datafork->ff_extents[0];
                 if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
                         return EPERM;
                 }
@@ -690,6 +773,7 @@ hfs_chmod(vp, mode, cred, p)
  }
  
  
+__private_extern__
  int
  hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean considerFlags)
  {
@@ -707,9 +791,9 @@ hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean c
         case VDIR:
         case VLNK:
         case VREG:
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+               if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                         return (EROFS);
-        break;
+               break;
         default:
                 break;
         }
@@ -742,6 +826,7 @@ hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean c
   * Change the flags on a file or directory.
   * cnode must be locked before calling.
   */
+__private_extern__
  int
  hfs_chflags(vp, flags, cred, p)
         register struct vnode *vp;
@@ -784,6 +869,7 @@ hfs_chflags(vp, flags, cred, p)
   * Perform chown operation on cnode cp;
   * code must be locked prior to call.
   */
+__private_extern__
  int
  hfs_chown(vp, uid, gid, cred, p)
         register struct vnode *vp;
@@ -929,14 +1015,13 @@ hfs_exchange(ap)
  {
         struct vnode *from_vp = ap->a_fvp;
         struct vnode *to_vp = ap->a_tvp;
-       struct vnode *from_rvp = NULL;
-       struct vnode *to_rvp = NULL;
         struct cnode *from_cp = VTOC(from_vp);
         struct cnode *to_cp = VTOC(to_vp);
         struct hfsmount *hfsmp = VTOHFS(from_vp);
         struct cat_desc tempdesc;
         struct cat_attr tempattr;
         int error = 0, started_tr = 0, grabbed_lock = 0;
+       cat_cookie_t cookie = {0};
  
         /* The files must be on the same volume. */
         if (from_vp->v_mount != to_vp->v_mount)
@@ -953,45 +1038,20 @@ hfs_exchange(ap)
                 struct HFSPlusExtentDescriptor *extd;
  
                 if (from_cp->c_datafork) {
-                       extd = &from_cp->c_datafork->ff_data.cf_extents[0];
+                       extd = &from_cp->c_datafork->ff_extents[0];
                         if (extd->startBlock == VTOVCB(from_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) {
                                 return EPERM;
                         }
                 }
  
                 if (to_cp->c_datafork) {
-                       extd = &to_cp->c_datafork->ff_data.cf_extents[0];
+                       extd = &to_cp->c_datafork->ff_extents[0];
                         if (extd->startBlock == VTOVCB(to_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) {
                                 return EPERM;
                         }
                 }
         }
  
-       from_rvp = from_cp->c_rsrc_vp;
-       to_rvp = to_cp->c_rsrc_vp;
-
-       /* If one of the resource forks is open then get the other one. */
-       if (from_rvp || to_rvp) {
-               error = hfs_vgetrsrc(hfsmp, from_vp, &from_rvp, ap->a_p);
-               if (error)
-                       return (error);
-               error = hfs_vgetrsrc(hfsmp, to_vp, &to_rvp, ap->a_p);
-               if (error) {
-                       vrele(from_rvp);
-                       return (error);
-               } 
-       }
-
-       /* Ignore any errors, we are doing a 'best effort' on flushing */
-       if (from_vp)
-               (void) vinvalbuf(from_vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
-       if (to_vp)
-               (void) vinvalbuf(to_vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
-       if (from_rvp)
-               (void) vinvalbuf(from_rvp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
-       if (to_rvp)
-               (void) vinvalbuf(to_rvp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
-
         // XXXdbg
         hfs_global_shared_lock_acquire(hfsmp);
         grabbed_lock = 1;
@@ -1002,6 +1062,13 @@ hfs_exchange(ap)
                 started_tr = 1;
         }
         
+       /*
+        * Reserve some space in the Catalog file.
+        */
+       if ((error = cat_preflight(hfsmp, CAT_EXCHANGE, &cookie, ap->a_p))) {
+               goto Err_Exit;
+       }
+
         /* Lock catalog b-tree */
         error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, ap->a_p);
         if (error) goto Err_Exit;
@@ -1076,11 +1143,27 @@ hfs_exchange(ap)
  
         hfs_chashinsert(from_cp);
         hfs_chashinsert(to_cp);
+
+       /*
+        * When a file moves out of "Cleanup At Startup"
+        * we can drop its NODUMP status.
+        */
+       if ((from_cp->c_flags & UF_NODUMP) &&
+           (from_cp->c_parentcnid != to_cp->c_parentcnid)) {
+               from_cp->c_flags &= ~UF_NODUMP;
+               from_cp->c_flag |= C_CHANGE;
+       }
+       if ((to_cp->c_flags & UF_NODUMP) &&
+           (to_cp->c_parentcnid != from_cp->c_parentcnid)) {
+               to_cp->c_flags &= ~UF_NODUMP;
+               to_cp->c_flag |= C_CHANGE;
+       }
+
+       HFS_KNOTE(from_vp, NOTE_ATTRIB);
+       HFS_KNOTE(to_vp, NOTE_ATTRIB);
+
  Err_Exit:
-       if (to_rvp)
-               vrele(to_rvp);
-       if (from_rvp)
-               vrele(from_rvp);
+       cat_postflight(hfsmp, &cookie, ap->a_p);
  
         // XXXdbg
         if (started_tr) {
@@ -1139,12 +1222,7 @@ hfs_fsync(ap)
         if (vp->v_flag & VSYSTEM) {
             if (VTOF(vp)->fcbBTCBPtr != NULL) {
                         // XXXdbg
-                       if (hfsmp->jnl) {
-                               if (BTIsDirty(VTOF(vp))) {
-                                       panic("hfs: system file vp 0x%x has dirty blocks (jnl 0x%x)\n",
-                                                 vp, hfsmp->jnl);
-                               }
-                       } else {
+                       if (hfsmp->jnl == NULL) {
                                 BTFlushPath(VTOF(vp));
                         }
             }
@@ -1155,8 +1233,11 @@ hfs_fsync(ap)
          * When MNT_WAIT is requested and the zero fill timeout
          * has expired then we must explicitly zero out any areas
          * that are currently marked invalid (holes).
+        *
+        * Files with NODUMP can bypass zero filling here.
          */
         if ((wait || (cp->c_flag & C_ZFWANTSYNC)) &&
+           ((cp->c_flags & UF_NODUMP) == 0) &&
             UBCINFOEXISTS(vp) && (fp = VTOF(vp)) &&
             cp->c_zftimeout != 0) {
                 int devblksize;
@@ -1286,6 +1367,18 @@ metasync:
                     !ISSET(cp->c_flag, C_DELETED | C_NOEXISTS)) {
                         hfs_metasync(VTOHFS(vp), cp->c_hint, ap->a_p);
                 }
+
+               // make sure that we've really been called from the user
+               // fsync() and if so push out any pending transactions 
+               // that this file might is a part of (and get them on
+               // stable storage).
+               if (vp->v_flag & VFULLFSYNC) {
+                   if (hfsmp->jnl) {
+                       journal_flush(hfsmp->jnl);
+                   } else {
+                       VOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NOCRED, ap->a_p);
+                   }
+               }
         }
  
         return (retval);
@@ -1418,13 +1511,25 @@ hfs_rmdir(ap)
                 struct componentname *a_cnp;
         } */ *ap;
  {
-       struct vnode *vp = ap->a_vp;
-       struct vnode *dvp = ap->a_dvp;
-       struct proc *p = ap->a_cnp->cn_proc;
+       return (hfs_removedir(ap->a_dvp, ap->a_vp, ap->a_cnp, 0));
+}
+
+/*
+ * hfs_removedir
+ */
+static int
+hfs_removedir(dvp, vp, cnp, options)
+       struct vnode *dvp;
+       struct vnode *vp;
+       struct componentname *cnp;
+       int options;
+{
+       struct proc *p = cnp->cn_proc;
         struct cnode *cp;
         struct cnode *dcp;
         struct hfsmount * hfsmp;
         struct timeval tv;
+       cat_cookie_t cookie = {0};
         int error = 0, started_tr = 0, grabbed_lock = 0;
  
         cp = VTOC(vp);
@@ -1440,7 +1545,6 @@ hfs_rmdir(ap)
  #if QUOTA
         (void)hfs_getinoquota(cp);
  #endif
-
         // XXXdbg
         hfs_global_shared_lock_acquire(hfsmp);
         grabbed_lock = 1;
@@ -1451,6 +1555,15 @@ hfs_rmdir(ap)
                 started_tr = 1;
         }
  
+       if (!(options & HFSRM_SKIP_RESERVE)) {
+               /*
+                * Reserve some space in the Catalog file.
+                */
+               if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
+                       goto out;
+               }
+       }
+
         /*
          * Verify the directory is empty (and valid).
          * (Rmdir ".." won't be valid since
@@ -1495,16 +1608,22 @@ hfs_rmdir(ap)
         dcp->c_flag |= C_CHANGE | C_UPDATE;
         tv = time;
         (void) VOP_UPDATE(dvp, &tv, &tv, 0);
+       HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
  
         hfs_volupdate(hfsmp, VOL_RMDIR, (dcp->c_cnid == kHFSRootFolderID));
  
         cp->c_mode = 0;  /* Makes the vnode go away...see inactive */
         cp->c_flag |= C_NOEXISTS;
  out:
-       if (dvp) 
+       if (!(options & HFSRM_PARENT_LOCKED)) {
                 vput(dvp);
+       }
+       HFS_KNOTE(vp, NOTE_DELETE);
         vput(vp);
  
+       if (!(options & HFSRM_SKIP_RESERVE)) {
+               cat_postflight(hfsmp, &cookie, p);
+       }
         // XXXdbg
         if (started_tr) { 
             journal_end_transaction(hfsmp->jnl);
@@ -1536,23 +1655,42 @@ hfs_remove(ap)
                 struct componentname *a_cnp;
         } */ *ap;
  {
-       struct vnode *vp = ap->a_vp;
-       struct vnode *dvp = ap->a_dvp;
+       return (hfs_removefile(ap->a_dvp, ap->a_vp, ap->a_cnp, 0));
+}
+
+
+
+/*
+ * hfs_removefile
+ *
+ * Similar to hfs_remove except there are additional options.
+ */
+static int
+hfs_removefile(dvp, vp, cnp, options)
+       struct vnode *dvp;
+       struct vnode *vp;
+       struct componentname *cnp;
+       int options;
+{
         struct vnode *rvp = NULL;
         struct cnode *cp;
         struct cnode *dcp;
         struct hfsmount *hfsmp;
-       struct proc *p = current_proc();
+       struct proc *p = cnp->cn_proc;
         int dataforkbusy = 0;
         int rsrcforkbusy = 0;
         int truncated = 0;
         struct timeval tv;
+       cat_cookie_t cookie = {0};
         int error = 0;
         int started_tr = 0, grabbed_lock = 0;
+       int refcount, isbigfile = 0;
  
-       /* Redirect directories to rmdir */
-       if (vp->v_type == VDIR)
-               return (hfs_rmdir(ap));
+       /* Directories should call hfs_rmdir! */
+       if (vp->v_type == VDIR) {
+               error = EISDIR;
+               goto out;
+       }
  
         cp = VTOC(vp);
         dcp = VTOC(dvp);
@@ -1585,7 +1723,7 @@ hfs_remove(ap)
         if (hfsmp->jnl && cp->c_datafork) {
                 struct HFSPlusExtentDescriptor *extd;
  
-               extd = &cp->c_datafork->ff_data.cf_extents[0];
+               extd = &cp->c_datafork->ff_extents[0];
                 if (extd->startBlock == HFSTOVCB(hfsmp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) {
                         error = EPERM;
                         goto out;
@@ -1599,18 +1737,27 @@ hfs_remove(ap)
          * vnode (vp).  And we took a ref on the resource vnode (rvp).
          * Hence set 1 in the tookref parameter of ubc_isinuse().
          */
-       if (UBCISVALID(vp) && ubc_isinuse(vp, 1))
+       if (VTOC(vp)->c_flag & C_VPREFHELD) {
+           refcount = 2;
+       } else {
+           refcount = 1;
+       }
+       if (UBCISVALID(vp) && ubc_isinuse(vp, refcount))
                 dataforkbusy = 1;
         if (rvp && UBCISVALID(rvp) && ubc_isinuse(rvp, 1))
                 rsrcforkbusy = 1;
  
+       // need this to check if we have to break the deletion 
+       // into multiple pieces
+       isbigfile = (VTOC(vp)->c_datafork->ff_size >= HFS_BIGFILE_SIZE);
+
         /*
          * Carbon semantics prohibit deleting busy files.
          * (enforced when NODELETEBUSY is requested)
          */
         if ((dataforkbusy || rsrcforkbusy) &&
-           ((ap->a_cnp->cn_flags & NODELETEBUSY) ||
-            (hfsmp->hfs_private_metadata_dir == 0))) {
+           ((cnp->cn_flags & NODELETEBUSY) ||
+            (hfsmp->hfs_privdir_desc.cd_cnid == 0))) {
                 error = EBUSY;
                 goto out;
         }
@@ -1629,6 +1776,15 @@ hfs_remove(ap)
             started_tr = 1;
         }
  
+       if (!(options & HFSRM_SKIP_RESERVE)) {
+               /*
+                * Reserve some space in the Catalog file.
+                */
+               if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
+                       goto out;
+               }
+       }
+
         /* Remove our entry from the namei cache. */
         cache_purge(vp);
  
@@ -1670,7 +1826,7 @@ hfs_remove(ap)
         if ((cp->c_flag & C_HARDLINK) == 0) {
                 int mode = cp->c_mode;
  
-               if (!dataforkbusy && cp->c_datafork->ff_blocks != 0) {
+               if (!dataforkbusy && !isbigfile && cp->c_datafork->ff_blocks != 0) {
                         cp->c_mode = 0;  /* Suppress VOP_UPDATES */
                         error = VOP_TRUNCATE(vp, (off_t)0, IO_NDELAY, NOCRED, p);
                         cp->c_mode = mode;
@@ -1682,15 +1838,8 @@ hfs_remove(ap)
                         cp->c_mode = 0;            /* Suppress VOP_UPDATES */
                         error = VOP_TRUNCATE(rvp, (off_t)0, IO_NDELAY, NOCRED, p);
                         cp->c_mode = mode;
-                       if (error && !dataforkbusy)
+                       if (error)
                                 goto out;
-                       else {
-                               /*
-                                * XXX could also force an update on vp
-                                * and fail the remove.
-                                */
-                               error = 0;
-                       }
                         truncated = 1;
                 }
         }
@@ -1704,16 +1853,16 @@ hfs_remove(ap)
         if (cp->c_flag & C_HARDLINK) {
                 struct cat_desc desc;
  
-               if ((ap->a_cnp->cn_flags & HASBUF) == 0 ||
-                   ap->a_cnp->cn_nameptr[0] == '\0') {
+               if ((cnp->cn_flags & HASBUF) == 0 ||
+                   cnp->cn_nameptr[0] == '\0') {
                         error = ENOENT; /* name missing! */
                         goto out;
                 }
  
                 /* Setup a descriptor for the link */
                 bzero(&desc, sizeof(desc));
-               desc.cd_nameptr = ap->a_cnp->cn_nameptr;
-               desc.cd_namelen = ap->a_cnp->cn_namelen;
+               desc.cd_nameptr = cnp->cn_nameptr;
+               desc.cd_namelen = cnp->cn_namelen;
                 desc.cd_parentcnid = dcp->c_cnid;
                 /* XXX - if cnid is out of sync then the wrong thread rec will get deleted. */
                 desc.cd_cnid = cp->c_cnid;
@@ -1742,7 +1891,7 @@ hfs_remove(ap)
                         bzero(&from_desc, sizeof(from_desc));
                         from_desc.cd_nameptr = inodename;
                         from_desc.cd_namelen = strlen(inodename);
-                       from_desc.cd_parentcnid = hfsmp->hfs_private_metadata_dir;
+                       from_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid;
                         from_desc.cd_flags = 0;
                         from_desc.cd_cnid = cp->c_fileid;
  
@@ -1750,7 +1899,7 @@ hfs_remove(ap)
                         bzero(&to_desc, sizeof(to_desc));
                         to_desc.cd_nameptr = delname;
                         to_desc.cd_namelen = strlen(delname);
-                       to_desc.cd_parentcnid = hfsmp->hfs_private_metadata_dir;
+                       to_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid;
                         to_desc.cd_flags = 0;
                         to_desc.cd_cnid = cp->c_fileid;
         
@@ -1762,10 +1911,6 @@ hfs_remove(ap)
                 /* Unlock the Catalog */
                 (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
  
-               /* All done with component name... */
-               if ((ap->a_cnp->cn_flags & (HASBUF | SAVENAME)) == (HASBUF | SAVENAME))
-                       FREE_ZONE(ap->a_cnp->cn_pnbuf, ap->a_cnp->cn_pnlen, M_NAMEI);
-
                 if (error != 0)
                         goto out;
  
@@ -1775,7 +1920,7 @@ hfs_remove(ap)
  
                 hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID));
  
-       } else if (dataforkbusy || rsrcforkbusy) {
+       } else if (dataforkbusy || rsrcforkbusy || isbigfile) {
                 char delname[32];
                 struct cat_desc to_desc;
                 struct cat_desc todir_desc;
@@ -1790,7 +1935,7 @@ hfs_remove(ap)
                 bzero(&to_desc, sizeof(to_desc));
                 to_desc.cd_nameptr = delname;
                 to_desc.cd_namelen = strlen(delname);
-               to_desc.cd_parentcnid = hfsmp->hfs_private_metadata_dir;
+               to_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid;
                 to_desc.cd_flags = 0;
                 to_desc.cd_cnid = cp->c_cnid;
  
@@ -1820,10 +1965,16 @@ hfs_remove(ap)
  
         } else /* Not busy */ {
  
-               if (vp->v_type == VDIR && cp->c_entries > 0)
-                       panic("hfs_remove: attempting to delete a non-empty directory!");
-               if (vp->v_type != VDIR && cp->c_blocks > 0)
+               if (cp->c_blocks > 0) {
+#if 0
                         panic("hfs_remove: attempting to delete a non-empty file!");
+#else
+                       printf("hfs_remove: attempting to delete a non-empty file %s\n",
+                               cp->c_desc.cd_nameptr);
+                       error = EBUSY;
+                       goto out;
+#endif
+               }
  
                 /* Lock catalog b-tree */
                 error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p);
@@ -1832,11 +1983,11 @@ hfs_remove(ap)
  
                 error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
  
-               if (error && error != ENXIO && truncated) {
-                       if ((cp->c_datafork && cp->c_datafork->ff_data.cf_size != 0) ||
-                               (cp->c_rsrcfork && cp->c_rsrcfork->ff_data.cf_size != 0)) {
+               if (error && error != ENXIO && error != ENOENT && truncated) {
+                       if ((cp->c_datafork && cp->c_datafork->ff_size != 0) ||
+                               (cp->c_rsrcfork && cp->c_rsrcfork->ff_size != 0)) {
                                 panic("hfs: remove: couldn't delete a truncated file! (%d, data sz %lld; rsrc sz %lld)",
-                                         error, cp->c_datafork->ff_data.cf_size, cp->c_rsrcfork->ff_data.cf_size);
+                                         error, cp->c_datafork->ff_size, cp->c_rsrcfork->ff_size);
                         } else {
                                 printf("hfs: remove: strangely enough, deleting truncated file %s (%d) got err %d\n",
                                            cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error);
@@ -1852,6 +2003,7 @@ hfs_remove(ap)
  #endif /* QUOTA */
  
                 cp->c_mode = 0;
+               truncated  = 0;    // because the catalog entry is gone
                 cp->c_flag |= C_CHANGE | C_NOEXISTS;
                 --cp->c_nlink;
                 hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID));
@@ -1875,39 +2027,29 @@ hfs_remove(ap)
         dcp->c_flag |= C_CHANGE | C_UPDATE;
         tv = time;
         (void) VOP_UPDATE(dvp, &tv, &tv, 0);
+       HFS_KNOTE(dvp, NOTE_WRITE);
  
-       // XXXdbg
-       if (started_tr) {
-           journal_end_transaction(hfsmp->jnl);
-       }
-       if (grabbed_lock) {
-               hfs_global_shared_lock_release(hfsmp);
+out:
+       /* All done with component name... */
+       if ((options & HFSRM_SAVE_NAME) == 0 &&
+           (cnp != 0) &&
+           (cnp->cn_flags & (HASBUF | SAVENAME)) == (HASBUF | SAVENAME)) {
+               char *tmp = cnp->cn_pnbuf;
+               cnp->cn_pnbuf = NULL;
+               cnp->cn_flags &= ~HASBUF;
+               FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI);
         }
  
-       if (rvp)
-               vrele(rvp);
-       VOP_UNLOCK(vp, 0, p);
-       // XXXdbg - try to prevent the lost ubc_info panic
-       if ((cp->c_flag & C_HARDLINK) == 0 || cp->c_nlink == 0) {
-               (void) ubc_uncache(vp);
+       if (!(options & HFSRM_SKIP_RESERVE)) {
+               cat_postflight(hfsmp, &cookie, p);
         }
-       vrele(vp);
-       vput(dvp);
-
-       return (0);
  
-out:
-       if (rvp)
-               vrele(rvp);
-       
         /* Commit the truncation to the catalog record */
         if (truncated) {
-               cp->c_flag |= C_CHANGE | C_UPDATE;
-               tv = time;
-               (void) VOP_UPDATE(vp, &tv, &tv, 0);
+           cp->c_flag |= C_CHANGE | C_UPDATE | C_FORCEUPDATE;
+           tv = time;
+           (void) VOP_UPDATE(vp, &tv, &tv, 0);
         }
-       vput(vp);
-       vput(dvp);
  
         // XXXdbg
         if (started_tr) {
@@ -1917,6 +2059,26 @@ out:
                 hfs_global_shared_lock_release(hfsmp);
         }
  
+       HFS_KNOTE(vp, NOTE_DELETE);
+       if (rvp) {
+               HFS_KNOTE(rvp, NOTE_DELETE);
+               vrele(rvp);
+       };
+
+       if (error) {
+               vput(vp);
+       } else {
+               VOP_UNLOCK(vp, 0, p);
+               // XXXdbg - try to prevent the lost ubc_info panic
+               if ((cp->c_flag & C_HARDLINK) == 0 || cp->c_nlink == 0) {
+                       (void) ubc_uncache(vp);
+               }
+               vrele(vp);
+       }
+       if (!(options & HFSRM_PARENT_LOCKED)) {
+               vput(dvp);
+       }
+
         return (error);
  }
  
@@ -1931,7 +2093,7 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp)
                 cp->c_desc.cd_nameptr = 0;
                 cp->c_desc.cd_namelen = 0;
                 cp->c_desc.cd_flags &= ~CD_HASBUF;
-               FREE(name, M_TEMP);
+               remove_name(name);
         }
         bcopy(cdp, &cp->c_desc, sizeof(cp->c_desc));
  
@@ -1944,19 +2106,11 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp)
  
  /*
  #
-#% rename      fdvp    U U U
-#% rename      fvp     U U U
-#% rename      tdvp    L U U
-#% rename      tvp     X U U
+#% rename      fdvp   U U U
+#% rename      fvp    U U U
+#% rename      tdvp   L U U
+#% rename      tvp    X U U
  #
-       vop_rename {
-               IN WILLRELE struct vnode *fdvp;
-               IN WILLRELE struct vnode *fvp;
-               IN struct componentname *fcnp;
-               IN WILLRELE struct vnode *tdvp;
-               IN WILLRELE struct vnode *tvp;
-               IN struct componentname *tcnp;
-       };
  */
  /*
   * Rename a cnode.
@@ -1995,53 +2149,114 @@ hfs_rename(ap)
         struct cat_desc from_desc;
         struct cat_desc to_desc;
         struct cat_desc out_desc;
-       struct hfsmount *hfsmp;
+       struct hfsmount *hfsmp = NULL;
         struct timeval tv;
-       int fdvp_locked, fvp_locked, tdvp_locked;
+       cat_cookie_t cookie = {0};
+       int fdvp_locked, fvp_locked, tdvp_locked, tvp_locked;
         int tvp_deleted;
         int started_tr = 0, grabbed_lock = 0;
         int error = 0;
  
-       hfsmp = VTOHFS(tdvp);
  
         /* Establish our vnode lock state. */
         tdvp_locked = 1;
+       tvp_locked = (tvp != 0);
         fdvp_locked = 0;
         fvp_locked = 0;
         tvp_deleted = 0;
  
+       /*
+        * Check for cross-device rename.
+        */
+       if ((fvp->v_mount != tdvp->v_mount) ||
+           (tvp && (fvp->v_mount != tvp->v_mount))) {
+               error = EXDEV;
+               goto out;
+       }
+
         /*
          * When fvp matches tvp they must be case variants
          * or hard links.
          *
-        * For the hardlink case there can be an extra ref on fvp.
+        * In some cases tvp will be locked in other cases
+        * it be unlocked with no reference.  Normalize the
+        * state here (unlocked with a reference) so that
+        * we can exit in a known state.
          */
         if (fvp == tvp) {
-               if (VOP_ISLOCKED(fvp) &&
-                       (VTOC(fvp)->c_lock.lk_lockholder == p->p_pid) &&
-                       (VTOC(fvp)->c_lock.lk_lockthread == current_thread())) {
-                       fvp_locked = 1;
-                       vrele(fvp);  /* drop the extra ref */
+               if (VOP_ISLOCKED(tvp) &&
+                   (VTOC(tvp)->c_lock.lk_lockholder == p->p_pid) &&
+                   (VTOC(tvp)->c_lock.lk_lockthread == current_thread())) {
+                       vput(tvp);
                 }
                 tvp = NULL;
+               tvp_locked = 0;
+
                 /*
-                * If this a hard link and its not a case
-                * variant then keep tvp around for removal.
+                * If this a hard link with different parents
+                * and its not a case variant then keep tvp
+                * around for removal.
                  */
                 if ((VTOC(fvp)->c_flag & C_HARDLINK) &&
                     ((fdvp != tdvp) ||
                      (hfs_namecmp(fcnp->cn_nameptr, fcnp->cn_namelen,
                                   tcnp->cn_nameptr, tcnp->cn_namelen) != 0))) {
                         tvp = fvp;
+                       vref(tvp);
                 }
         }
          
         /*
-        * Check for cross-device rename.
+        * The following edge case is caught here:
+        * (to cannot be a descendent of from)
+        *
+        *       o fdvp
+        *      /
+        *     /
+        *    o fvp
+        *     \
+        *      \
+        *       o tdvp
+        *      /
+        *     /
+        *    o tvp
          */
-       if ((fvp->v_mount != tdvp->v_mount) ||
-           (tvp && (fvp->v_mount != tvp->v_mount))) {
-               error = EXDEV;
+       if (tdcp->c_parentcnid == VTOC(fvp)->c_cnid) {
+               error = EINVAL;
+               goto out;
+       }
+
+       /*
+        * The following two edge cases are caught here:
+        * (note tvp is not empty)
+        *
+        *       o tdvp               o tdvp
+        *      /                    /
+        *     /                    /
+        *    o tvp            tvp o fdvp
+        *     \                    \
+        *      \                    \
+        *       o fdvp               o fvp
+        *      /
+        *     /
+        *    o fvp
+        */
+       if (tvp && (tvp->v_type == VDIR) && (VTOC(tvp)->c_entries != 0)) {
+               error = ENOTEMPTY;
+               goto out;
+       }
+
+       /*
+        * The following edge case is caught here:
+        * (the from child and parent are the same)
+        *
+        *          o tdvp
+        *         /
+        *        /
+        *  fdvp o fvp
+        */
+       if (fdvp == fvp) {
+               error = EINVAL;
                 goto out;
         }
  
@@ -2054,16 +2269,7 @@ hfs_rename(ap)
                 goto out;
         }
  
-       /*
-        * Be sure we are not renaming ".", "..", or an alias of ".".
-        */
-       if ((fvp->v_type == VDIR) &&
-           (((fcnp->cn_namelen == 1) && (fcnp->cn_nameptr[0] == '.')) ||
-             (fdvp == fvp) ||
-             (fcnp->cn_flags&ISDOTDOT))) {
-                       error = EINVAL;
-                       goto out;
-       }
+       hfsmp = VTOHFS(tdvp);
  
         /*
          * If the destination parent directory is "sticky", then the
@@ -2071,157 +2277,138 @@ hfs_rename(ap)
          * the rename, otherwise the destination may not be changed
          * (except by root). This implements append-only directories.
          *
-        * Note that checks for immutable, write access, and a non-empty
-        * target are done by the call to VOP_REMOVE.
+        * Note that checks for immutable and write access are done
+        * by the call to VOP_REMOVE.
          */
         if (tvp && (tdcp->c_mode & S_ISTXT) &&
             (tcnp->cn_cred->cr_uid != 0) &&
             (tcnp->cn_cred->cr_uid != tdcp->c_uid) &&
             (hfs_owner_rights(hfsmp, VTOC(tvp)->c_uid, tcnp->cn_cred, p, false)) ) {
-                       error = EPERM;
+               error = EPERM;
+               goto out;
+       }
+
+#if QUOTA
+       if (tvp)
+               (void)hfs_getinoquota(VTOC(tvp));
+#endif
+
+       /*
+        * Lock all the vnodes before starting a journal transaction.
+        */
+
+       /*
+        * Simple case (same parent) - just lock child (fvp).
+        */
+       if (fdvp == tdvp) {
+               if (error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p))
                         goto out;
+               fvp_locked = 1;
+               goto vnlocked;
         }
  
         /*
-        * All done with preflighting.
+        * If fdvp is the parent of tdvp then we'll need to
+        * drop tdvp's lock before acquiring a lock on fdvp.
          *
-        * We now break the call into two transactions:
-        * 1 - Remove the destionation (if any) using VOP_REMOVE,
-        * which in itself is a complete transaction.
+        *          fdvp
+        *           o 
+        *          / \
+        *         /   \
+        *   tdvp o     o fvp
+        *         \
+        *          \
+        *           o tvp
          *
-        * 2 - Rename source to destination.
-        * 
-        * Since all the preflighting is done, we assume that a
-        * rename failure is unlikely once part 1 is complete.
-        * Breaking rename into two transactions buys us a much
-        * simpler implementation with respect to the locking
-        * protocol.  There are only 3 vnodes to worry about
-        * locking in the correct order (instead of 4).
+        *
+        * If the parent directories are unrelated then we'll
+        * need to aquire their vnode locks in vnode address
+        * order.  Otherwise we can race with another rename
+        * call that involves the same vnodes except that to
+        * and from are switched and potentially deadlock.
+        * [ie rename("a/b", "c/d") vs rename("c/d", "a/b")]
+        *
+        * If its not either of the two above cases then we
+        * can safely lock fdvp and fvp.
          */
+       if ((VTOC(fdvp)->c_cnid == VTOC(tdvp)->c_parentcnid) ||
+           ((VTOC(tdvp)->c_cnid != VTOC(fdvp)->c_parentcnid) &&
+            (fdvp < tdvp))) {
+
+               /* Drop locks on tvp and tdvp */
+               if (tvp_locked) {
+                       VOP_UNLOCK(tvp, 0, p);
+                       tvp_locked = 0;
+               }
+               VOP_UNLOCK(tdvp, 0, p);
+               tdvp_locked = 0;
+
+               /* Aquire locks in correct order */
+               if ((error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, p)))
+                       goto out;
+               fdvp_locked = 1;
+               if ((error = vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY, p)))
+                       goto out;
+               tdvp_locked = 1;
  
-       /*
-        * Part 1 - If the destination exists then it needs to be removed.
-        */
-       if (tvp) {
                 /*
-                * VOP_REMOVE will vput tdvp so we better bump its
-                * ref count and relockit, always set tvp to NULL
-                * afterwards to indicate that we're done with it.
+                * Now that the parents are locked only one thread
+                * can continue. So the lock order of the children
+                * doesn't really matter
                  */
-               VREF(tdvp);
-
                 if (tvp == fvp) {
-                       if (fvp_locked) {
-                               VREF(fvp);
-                       } else {
-                               error = vget(fvp, LK_EXCLUSIVE | LK_RETRY, p);
-                               if (error)
+                       if ((error = vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p)))
+                               goto out;
+                       tvp_locked = 1;
+               } else {
+                       if (tvp) {
+                               if ((error = vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p)))
                                         goto out;
-                               fvp_locked = 1;
+                               tvp_locked = 1;
                         }
-               } else {
-                       cache_purge(tvp);
+                       if ((error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p)))
+                               goto out;
+                       fvp_locked = 1;
                 }
  
-               /* Clear SAVENAME to keep VOP_REMOVE from smashing tcnp. */
-               tcnp->cn_flags &= ~SAVENAME;
-
-               if (tvp->v_type == VDIR)
-                       error = VOP_RMDIR(tdvp, tvp, tcnp);
-               else
-                       error = VOP_REMOVE(tdvp, tvp, tcnp);
-
-               /* Get lock states back in sync. */
-               tdvp_locked = 0;
+       } else /* OK to lock fdvp and fvp */ {
+               if ((error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, p)))
+                       goto out;
+               fdvp_locked = 1;
+               if (error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p))
+                       goto out;
                 if (tvp == fvp)
-                       fvp_locked = 0; 
-               tvp = NULL;     /* all done with tvp */
-               tvp_deleted = 1;
-
-               if (error)
-                       goto out;  /* couldn't remove destination! */
+                       tvp_locked = 1;
+               else
+                       fvp_locked = 1;
         }
-       /*
-        * All done with tvp.
-        *
-        * For POSIX compliance, if tvp was removed the only 
-        * error we can return from this point on is EIO.
-        */  
  
-       /*
-        * Part 2 - rename source to destination
+vnlocked:
+       fdcp = VTOC(fdvp);
+       fcp = VTOC(fvp);
+
+       /* 
+        * While fvp is still locked, purge it from the name cache and
+        * grab it's c_cnid value.  Note that the removal of tvp (below)
+        * can drop fvp's lock when fvp == tvp.
          */
+       cache_purge(fvp);
  
         /*
-        * Lock the vnodes before starting a journal transaction.
+        * When a file moves out of "Cleanup At Startup"
+        * we can drop its NODUMP status.
          */
-       if (fdvp != tdvp) {
-               /*
-                * fvp is a child and must be locked last.
-                */
-               if (fvp_locked) {
-                       VOP_UNLOCK(fvp, 0, p);
-                       fvp_locked = 0;
-               }
-               /*
-                *      If fdvp is the parent of tdvp then it needs to be locked first.
-                */
-               if ((VTOC(fdvp)->c_cnid == VTOC(tdvp)->c_parentcnid)) {
-                       if (tdvp_locked) {
-                               VOP_UNLOCK(tdvp, 0, p);
-                               tdvp_locked = 0;
-                       }
-                       if ((error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, p)))
-                               goto out;
-                       fdvp_locked = 1;
-                       if ((error = vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY, p)))
-                               goto out;
-                       tdvp_locked = 1;
-
-               } else /* Lock tdvp then fdvp */ {
-                       if (!tdvp_locked) {
-                               if ((error = vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY, p)))
-                                       goto out;
-                               tdvp_locked = 1;
-                       }
-                       if ((error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, p)))
-                               goto out;
-                       fdvp_locked = 1;
-               }
-       } else if (!tdvp_locked) {
-                       /*
-                        * fvp is a child and must be locked last.
-                        */
-                       if (fvp_locked) {
-                               VOP_UNLOCK(fvp, 0, p);
-                               fvp_locked = 0;
-                       }
-                       if ((error = vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY, p)))
-                               goto out;
-                       tdvp_locked = 1;
-       }
-
-       /* Now its safe to lock fvp */
-       if (!fvp_locked) {
-               if (error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p))
-                       goto out;
-               fvp_locked = 1;
-       }
-
-       fdcp = VTOC(fdvp);
-       fcp = VTOC(fvp);
-
-       hfs_global_shared_lock_acquire(hfsmp);
-       grabbed_lock = 1;
-       if (hfsmp->jnl) {
-           if ((error = journal_start_transaction(hfsmp->jnl)) != 0) {
-                       goto out;
-           }
-               started_tr = 1;
+       if ((fcp->c_flags & UF_NODUMP) &&
+           (fvp->v_type == VREG) &&
+           (fdvp != tdvp) &&
+           (fdcp->c_desc.cd_nameptr != NULL) &&
+           (strcmp(fdcp->c_desc.cd_nameptr, CARBON_TEMP_DIR_NAME) == 0)) {
+               fcp->c_flags &= ~UF_NODUMP;
+               fcp->c_flag |= C_CHANGE;
+               tv = time;
+               (void) VOP_UPDATE(fvp, &tv, &tv, 0);
         }
  
-       cache_purge(fvp);
-
         bzero(&from_desc, sizeof(from_desc));
         from_desc.cd_nameptr = fcnp->cn_nameptr;
         from_desc.cd_namelen = fcnp->cn_namelen;
@@ -2236,6 +2423,52 @@ hfs_rename(ap)
         to_desc.cd_flags = fcp->c_desc.cd_flags & ~(CD_HASBUF | CD_DECOMPOSED);
         to_desc.cd_cnid = fcp->c_cnid;
  
+       hfs_global_shared_lock_acquire(hfsmp);
+       grabbed_lock = 1;
+       if (hfsmp->jnl) {
+           if ((error = journal_start_transaction(hfsmp->jnl)) != 0) {
+                       goto out;
+           }
+               started_tr = 1;
+       }
+
+       /*
+        * Reserve some space in the Catalog file.
+        */
+       if ((error = cat_preflight(hfsmp, CAT_RENAME + CAT_DELETE, &cookie, p))) {
+               goto out;
+       }
+
+       /*
+        * If the destination exists then it needs to be removed.
+        */
+
+       if (tvp) {
+               if (tvp != fvp)
+                       cache_purge(tvp);
+               /*
+                * Note that hfs_removedir and hfs_removefile
+                * will keep tdvp locked with a reference.
+                * But tvp will lose its lock and reference.
+                */
+               if (tvp->v_type == VDIR)
+                       error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_RENAMEOPTS);
+               else
+                       error = hfs_removefile(tdvp, tvp, tcnp, HFSRM_RENAMEOPTS);
+
+               if (tvp == fvp)
+                       fvp_locked = 0;
+               tvp = NULL;
+               tvp_locked = 0;
+               tvp_deleted = 1;
+               if (error)
+                       goto out;
+       }
+
+       /*
+        * All done with tvp and fvp
+        */
+
         /* Lock catalog b-tree */
         error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p);
         if (error)
@@ -2245,22 +2478,23 @@ hfs_rename(ap)
  
         /* Unlock catalog b-tree */
         (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
-       if (error)
+
+       if (error) {
                 goto out;
+       }
  
         /* Update cnode's catalog descriptor */
-       replace_desc(fcp, &out_desc);
+       if (fvp_locked) {
+               replace_desc(fcp, &out_desc);
+               fcp->c_parentcnid = tdcp->c_cnid;
+               fcp->c_hint = 0;
+       }
  
         hfs_volupdate(hfsmp, fvp->v_type == VDIR ? VOL_RMDIR : VOL_RMFILE,
                       (fdcp->c_cnid == kHFSRootFolderID));
         hfs_volupdate(hfsmp, fvp->v_type == VDIR ? VOL_MKDIR : VOL_MKFILE,
                       (tdcp->c_cnid == kHFSRootFolderID));
  
-       VOP_UNLOCK(fvp, 0, p);
-       fcp = NULL;
-       fvp_locked = 0;
-       /* All done with fvp. */  
-
         /* Update both parent directories. */
         tv = time;
         if (fdvp != tdvp) {
@@ -2278,6 +2512,9 @@ hfs_rename(ap)
         (void) VOP_UPDATE(tdvp, &tv, &tv, 0);
  
  out:
+       if (hfsmp) {
+               cat_postflight(hfsmp, &cookie, p);
+       }
         if (started_tr) {
             journal_end_transaction(hfsmp->jnl);
         }
@@ -2285,6 +2522,14 @@ out:
                 hfs_global_shared_lock_release(hfsmp);
         }
  
+       /* Note that if hfs_removedir or hfs_removefile was invoked above they will already have
+          generated a NOTE_WRITE for tdvp and a NOTE_DELETE for tvp.
+        */
+       if (error == 0) {
+               HFS_KNOTE(fvp, NOTE_RENAME);
+               HFS_KNOTE(fdvp, NOTE_WRITE);
+               if (tdvp != fdvp) HFS_KNOTE(tdvp, NOTE_WRITE);
+       };
         if (fvp_locked) {
                 VOP_UNLOCK(fvp, 0, p);
         }
@@ -2294,18 +2539,18 @@ out:
         if (tdvp_locked) {
                 VOP_UNLOCK(tdvp, 0, p);
         }
-       if (tvp && (tvp != fvp)) {
-               if (tvp != tdvp)
-                       VOP_UNLOCK(tvp, 0, p);
-               vrele(tvp);
+       if (tvp_locked) {
+               VOP_UNLOCK(tvp, 0, p);
         }
  
         vrele(fvp);
         vrele(fdvp);
+       if (tvp)
+               vrele(tvp);
         vrele(tdvp);
  
         /* After tvp is removed the only acceptable error is EIO */
-       if ((error == ENOSPC) && tvp_deleted)
+       if (error && tvp_deleted)
                 error = EIO;
  
         return (error);
@@ -2407,7 +2652,6 @@ hfs_symlink(ap)
         vp = *vpp;
         len = strlen(ap->a_target);
         fp = VTOF(vp);
-       fp->ff_clumpsize = VTOVCB(vp)->blockSize;
  
  #if QUOTA
         (void)hfs_getinoquota(VTOC(vp));
@@ -2536,6 +2780,10 @@ hfs_readdir(ap)
         int eofflag = 0;
         void *user_start = NULL;
         int   user_len;
+
+       int ncookies=0;
+       u_long *cookies=NULL;
+       u_long *cookiep=NULL;
   
         /* We assume it's all one big buffer... */
         if (uio->uio_iovcnt > 1 || uio->uio_resid < AVERAGE_HFSDIRENTRY_SIZE)
@@ -2568,7 +2816,6 @@ hfs_readdir(ap)
                 }
         }
  
-
         /* Create the entries for . and .. */
         if (uio->uio_offset < sizeof(rootdots)) {
                 caddr_t dep;
@@ -2593,10 +2840,58 @@ hfs_readdir(ap)
                         goto Exit;
         }
  
+       if (ap->a_ncookies != NULL) {
+               /*
+                * These cookies are handles that allow NFS to restart
+                * scanning through a directory.  If a directory is large
+                * enough, NFS will issue a successive readdir() with a
+                * uio->uio_offset that is equal to one of these cookies.
+                *
+                * The cookies that we generate are synthesized byte-offsets.
+                * The offset is where the dirent the dirent would be if the
+                * directory were an array of packed dirent structs.  It is
+                * synthetic because that's not how directories are stored in
+                * HFS but other code expects that the cookie is a byte offset.
+                *
+                * We have to pre-allocate the cookies because cat_getdirentries()
+                * is the only one that can properly synthesize the offsets (since
+                * it may have to skip over entries and only it knows the true
+                * virtual offset of any particular directory entry). So we allocate
+                * a cookie table here and pass it in to cat_getdirentries().
+                *
+                * Note that the handling of "." and ".." is mostly done here but
+                * cat_getdirentries() is aware of.
+                *
+                * Only the NFS server uses cookies so fortunately this code is
+                * not executed unless the NFS server is issuing the readdir
+                * request.
+                *
+                * Also note that the NFS server is the one responsible for
+                * free'ing the cookies even though we allocated them.  Ick.
+                *
+                * We allocate a reasonable number of entries for the size of
+                * the buffer that we're going to fill in.  cat_getdirentries()
+                * is smart enough to not overflow if there's more room in the
+                * buffer but not enough room in the cookie table.
+                */
+               if (uio->uio_segflg != UIO_SYSSPACE)
+                       panic("hfs_readdir: unexpected uio from NFS server");
+
+               ncookies = uio->uio_iov->iov_len / (AVERAGE_HFSDIRENTRY_SIZE/2);
+               MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, M_WAITOK);
+
+               *ap->a_ncookies = ncookies;
+               *ap->a_cookies = cookies;
+       }
+
         /* If there are no children then we're done */  
         if (cp->c_entries == 0) {
                 eofflag = 1;
                 retval = 0;
+               if (cookies) {
+                   cookies[0] = 0;
+                   cookies[1] = sizeof(struct hfsdotentry);
+               }
                 goto Exit;
         }
  
@@ -2604,7 +2899,7 @@ hfs_readdir(ap)
         retval = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p);
         if (retval) goto Exit;
  
-       retval = cat_getdirentries(hfsmp, &cp->c_desc, uio, &eofflag);
+       retval = cat_getdirentries(hfsmp, &cp->c_desc, cp->c_entries, uio, &eofflag, cookies, ncookies);
  
         /* Unlock catalog b-tree */
         (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
@@ -2620,38 +2915,6 @@ hfs_readdir(ap)
         }
         
         cp->c_flag |= C_ACCESS;
-                                                                                                                       /* Bake any cookies */
-       if (!retval && ap->a_ncookies != NULL) {
-               struct dirent* dpStart;
-               struct dirent* dpEnd;
-               struct dirent* dp;
-               int ncookies;
-               u_long *cookies;
-               u_long *cookiep;
-
-               /*
-                * Only the NFS server uses cookies, and it loads the
-                * directory block into system space, so we can just look at
-                * it directly.
-                */
-               if (uio->uio_segflg != UIO_SYSSPACE)
-                       panic("hfs_readdir: unexpected uio from NFS server");
-               dpStart = (struct dirent *)(uio->uio_iov->iov_base - (uio->uio_offset - off));
-               dpEnd = (struct dirent *) uio->uio_iov->iov_base;
-               for (dp = dpStart, ncookies = 0;
-                    dp < dpEnd && dp->d_reclen != 0;
-                    dp = (struct dirent *)((caddr_t)dp + dp->d_reclen))
-                       ncookies++;
-               MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, M_WAITOK);
-               for (dp = dpStart, cookiep = cookies;
-                    dp < dpEnd;
-                    dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
-                       off += dp->d_reclen;
-                       *cookiep++ = (u_long) off;
-               }
-               *ap->a_ncookies = ncookies;
-               *ap->a_cookies = cookies;
-       }
  
  Exit:;
         if (hfsmp->jnl && user_start) {
@@ -2727,40 +2990,28 @@ hfs_readlink(ap)
                 }
         }
         retval = uiomove((caddr_t)fp->ff_symlinkptr, (int)fp->ff_size, ap->a_uio);
-
-       return (retval);
-}
-
-
-/*
- * hfs abort op, called after namei() when a CREATE/DELETE isn't actually
- * done. If a buffer has been saved in anticipation of a CREATE, delete it.
-#% abortop     dvp     = = =
-#
- vop_abortop {
-     IN struct vnode *dvp;
-     IN struct componentname *cnp;
-
-     */
-
-/* ARGSUSED */
-
-static int
-hfs_abortop(ap)
-       struct vop_abortop_args /* {
-               struct vnode *a_dvp;
-               struct componentname *a_cnp;
-       } */ *ap;
-{
-       if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) {
-               FREE_ZONE(ap->a_cnp->cn_pnbuf, ap->a_cnp->cn_pnlen, M_NAMEI);
-               ap->a_cnp->cn_flags &= ~HASBUF;
+#if 1
+       /*
+        * Keep track blocks read
+        */
+       if ((VTOHFS(vp)->hfc_stage == HFC_RECORDING) && (retval == 0)) {
+               
+               /*
+                * If this file hasn't been seen since the start of
+                * the current sampling period then start over.
+                */
+               if (cp->c_atime < VTOHFS(vp)->hfc_timebase)
+                       VTOF(vp)->ff_bytesread = fp->ff_size;
+               else
+                       VTOF(vp)->ff_bytesread += fp->ff_size;
+               
+       //      if (VTOF(vp)->ff_bytesread > fp->ff_size)
+       //              cp->c_flag |= C_ACCESS;
         }
-
-       return (0);
+#endif
+       return (retval);
  }
  
-
  /*
   * Lock an cnode. If its already locked, set the WANT bit and sleep.
  #% lock                vp      U L U
@@ -2782,9 +3033,6 @@ hfs_lock(ap)
         struct vnode *vp = ap->a_vp;
         struct cnode *cp = VTOC(vp);
  
-       if (cp == NULL)
-               panic("hfs_lock: cnode in vnode is null\n");
-
         return (lockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, ap->a_p));
  }
  
@@ -2808,10 +3056,12 @@ hfs_unlock(ap)
  {
         struct vnode *vp = ap->a_vp;
         struct cnode *cp = VTOC(vp);
-
-       if (cp == NULL)
-               panic("hfs_unlock: cnode in vnode is null\n");
-
+#if 0
+       if (!lockstatus(&cp->c_lock)) {
+               printf("hfs_unlock: vnode %s wasn't locked!\n",
+                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
+       }
+#endif
         return (lockmgr(&cp->c_lock, ap->a_flags | LK_RELEASE,
                 &vp->v_interlock, ap->a_p));
  }
@@ -2895,6 +3145,9 @@ hfs_pathconf(ap)
         case _PC_PATH_MAX:
                 *ap->a_retval = PATH_MAX; /* 1024 */
                 break;
+       case _PC_PIPE_BUF:
+               *ap->a_retval = PIPE_BUF;
+               break;
         case _PC_CHOWN_RESTRICTED:
                 *ap->a_retval = 1;
                 break;
@@ -2905,7 +3158,10 @@ hfs_pathconf(ap)
                 *ap->a_retval = kHFSPlusMaxFileNameChars;
                 break;
         case _PC_CASE_SENSITIVE:
-               *ap->a_retval = 0;
+               if (VTOHFS(ap->a_vp)->hfs_flags & HFS_CASE_SENSITIVE)
+                       *ap->a_retval = 1;
+               else
+                       *ap->a_retval = 0;
                 break;
         case _PC_CASE_PRESERVING:
                 *ap->a_retval = 1;
@@ -2981,12 +3237,16 @@ hfs_advlock(ap)
                 return (EINVAL);
         }
  
-       if (start < 0)
-               return (EINVAL);
         if (fl->l_len == 0)
                 end = -1;
-       else
+       else if (fl->l_len > 0)
                 end = start + fl->l_len - 1;
+       else { /* l_len is negative */
+               end = start - 1;
+               start += fl->l_len;
+       }
+       if (start < 0)
+               return (EINVAL);
  
         /*
          * Create the hfslockf structure
@@ -3064,14 +3324,14 @@ hfs_update(ap)
         hfsmp = VTOHFS(vp);
  
         /* XXX do we really want to clear the sytem cnode flags here???? */
-       if ((vp->v_flag & VSYSTEM) ||
-           (VTOVFS(vp)->mnt_flag & MNT_RDONLY) ||
+       if (((vp->v_flag & VSYSTEM) && (cp->c_cnid < kHFSFirstUserCatalogNodeID))||
+           (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) ||
             (cp->c_mode == 0)) {
                 cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE);
                 return (0);
         }
  
-       updateflag = cp->c_flag & (C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE);
+       updateflag = cp->c_flag & (C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE | C_FORCEUPDATE);
  
         /* Nothing to update. */
         if (updateflag == 0) {
@@ -3083,23 +3343,15 @@ hfs_update(ap)
         }
         if (updateflag & C_ACCESS) {
                 /*
-                * If only the access time is changing then defer
-                * updating it on-disk util later (in hfs_inactive).
-                * If it was recently updated then skip the update.
+                * When the access time is the only thing changing
+                * then make sure its sufficiently newer before
+                * committing it to disk.
                  */
-               if (updateflag == C_ACCESS) {
-                       cp->c_flag &= ~C_ACCESS;
-       
-                       /* Its going to disk or its sufficiently newer... */
-                       if ((cp->c_flag & C_ATIMEMOD) ||
-                           (ap->a_access->tv_sec > (cp->c_atime + ATIME_ACCURACY))) {
-                               cp->c_atime = ap->a_access->tv_sec;
-                               cp->c_flag |= C_ATIMEMOD;
-                       }
+               if ((updateflag == C_ACCESS) &&
+                   (ap->a_access->tv_sec < (cp->c_atime + ATIME_ONDISK_ACCURACY))) {
                         return (0);
-               } else {
-                       cp->c_atime = ap->a_access->tv_sec;
                 }
+               cp->c_atime = ap->a_access->tv_sec;
         }
         if (updateflag & C_UPDATE) {
                 cp->c_mtime = ap->a_modify->tv_sec;
@@ -3129,15 +3381,21 @@ hfs_update(ap)
          * gets written to disk.
          *
          * Deleted files can defer meta data updates until inactive.
+        *
+        * If we're ever called with the C_FORCEUPDATE flag though
+        * we have to do the update.
          */
-       if (ISSET(cp->c_flag, C_DELETED) ||
+       if (ISSET(cp->c_flag, C_FORCEUPDATE) == 0 &&
+           (ISSET(cp->c_flag, C_DELETED) || 
             (dataforkp && cp->c_datafork->ff_unallocblocks) ||
-           (rsrcforkp && cp->c_rsrcfork->ff_unallocblocks)) {
+           (rsrcforkp && cp->c_rsrcfork->ff_unallocblocks))) {
                 if (updateflag & (C_CHANGE | C_UPDATE))
                         hfs_volupdate(hfsmp, VOL_UPDATE, 0);    
                 cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE);
                 cp->c_flag |= C_MODIFIED;
  
+               HFS_KNOTE(vp, NOTE_ATTRIB);
+
                 return (0);
         }
  
@@ -3161,6 +3419,19 @@ hfs_update(ap)
                 bcopy(dataforkp, &datafork, sizeof(datafork));
                 datafork.cf_size = CIRCLEQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start;
                 dataforkp = &datafork;
+       } else if (dataforkp && (cp->c_datafork->ff_unallocblocks != 0)) {
+               // always make sure the block count and the size 
+               // of the file match the number of blocks actually
+               // allocated to the file on disk
+               bcopy(dataforkp, &datafork, sizeof(datafork));
+               // make sure that we don't assign a negative block count
+               if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) {
+                   panic("hfs: ff_blocks %d is less than unalloc blocks %d\n",
+                         cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks);
+               }
+               datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks);
+               datafork.cf_size   = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
+               dataforkp = &datafork;
         }
  
         /*
@@ -3183,18 +3454,20 @@ hfs_update(ap)
          /* Unlock the Catalog b-tree file. */
         (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
  
-       if (updateflag & (C_CHANGE | C_UPDATE))
+       if (updateflag & (C_CHANGE | C_UPDATE | C_FORCEUPDATE))
                 hfs_volupdate(hfsmp, VOL_UPDATE, 0);    
  
+       /* After the updates are finished, clear the flags */
+       cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE | C_FORCEUPDATE);
+
         // XXXdbg
         if (hfsmp->jnl) {
             journal_end_transaction(hfsmp->jnl);
         }
         hfs_global_shared_lock_release(hfsmp);
  
-       /* After the updates are finished, clear the flags */
-       cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE | C_ATIMEMOD);
-
+       HFS_KNOTE(vp, NOTE_ATTRIB);
+       
         return (error);
  }
  
@@ -3219,6 +3492,7 @@ hfs_makenode(mode, dvp, vpp, cnp)
         struct proc *p;
         struct cat_desc in_desc, out_desc;
         struct cat_attr attr;
+       cat_cookie_t cookie = {0};
         int error, started_tr = 0, grabbed_lock = 0;
         enum vtype vnodetype;
  
@@ -3305,6 +3579,17 @@ hfs_makenode(mode, dvp, vpp, cnp)
                 started_tr = 1;
         }
  
+       /*
+        * Reserve some space in the Catalog file.
+        *
+        * (we also add CAT_DELETE since our getnewvnode
+        *  request can cause an hfs_inactive call to
+        *  delete an unlinked file)
+        */
+       if ((error = cat_preflight(hfsmp, CAT_CREATE | CAT_DELETE, &cookie, p))) {
+               goto exit;
+       }
+
         /* Lock catalog b-tree */
         error = hfs_metafilelocking(VTOHFS(dvp), kHFSCatalogFileID, LK_EXCLUSIVE, p);
         if (error)
@@ -3324,6 +3609,11 @@ hfs_makenode(mode, dvp, vpp, cnp)
         dcp->c_flag |= C_CHANGE | C_UPDATE;
         tv = time;
         (void) VOP_UPDATE(dvp, &tv, &tv, 0);
+       if (vnodetype == VDIR) {
+               HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
+       } else {
+               HFS_KNOTE(dvp, NOTE_WRITE);
+       };
  
         hfs_volupdate(hfsmp, vnodetype == VDIR ? VOL_MKDIR : VOL_MKFILE,
                 (dcp->c_cnid == kHFSRootFolderID));
@@ -3354,6 +3644,8 @@ hfs_makenode(mode, dvp, vpp, cnp)
         if (error)
                 goto exit;
  
+       // XXXdbg
+       cache_enter(dvp, tvp, cnp);
  
  #if QUOTA
         cp = VTOC(tvp);
@@ -3364,16 +3656,15 @@ hfs_makenode(mode, dvp, vpp, cnp)
         */
         if ((error = hfs_getinoquota(cp)) ||
             (error = hfs_chkiq(cp, 1, cnp->cn_cred, FORCE))) {
-               if ((cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) {
-                       FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
-                       cnp->cn_flags &= ~HASBUF;
-               }
                 if (tvp->v_type == VDIR)
                         VOP_RMDIR(dvp,tvp, cnp);
                 else
                         VOP_REMOVE(dvp,tvp, cnp);
  
-               return (error);
+               // because VOP_RMDIR and VOP_REMOVE already
+               // have done the vput()
+               dvp = NULL;
+               goto exit;
         }
  #endif /* QUOTA */
  
@@ -3398,11 +3689,45 @@ hfs_makenode(mode, dvp, vpp, cnp)
  exit:
         cat_releasedesc(&out_desc);
  
-       if ((cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
-               FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
-       vput(dvp);
+       cat_postflight(hfsmp, &cookie, p);
+
+       if ((cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) {
+               char *tmp = cnp->cn_pnbuf;
+               cnp->cn_pnbuf = NULL;
+               cnp->cn_flags &= ~HASBUF;
+               FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI);
+       }
+       /*
+        * Check if a file is located in the "Cleanup At Startup"
+        * directory.  If it is then tag it as NODUMP so that we
+        * can be lazy about zero filling data holes.
+        */
+       if ((error == 0) && dvp && (vnodetype == VREG) &&
+           (dcp->c_desc.cd_nameptr != NULL) &&
+           (strcmp(dcp->c_desc.cd_nameptr, CARBON_TEMP_DIR_NAME) == 0)) {
+               struct vnode *ddvp;
+               cnid_t parid;
+
+               parid = dcp->c_parentcnid;
+               vput(dvp);
+               dvp = NULL;
+
+               /*
+                * The parent of "Cleanup At Startup" should
+                * have the ASCII name of the userid.
+                */
+               if (VFS_VGET(HFSTOVFS(hfsmp), &parid, &ddvp) == 0) {
+                       if (VTOC(ddvp)->c_desc.cd_nameptr &&
+                           (cp->c_uid == strtoul(VTOC(ddvp)->c_desc.cd_nameptr, 0, 0))) {
+                               cp->c_flags |= UF_NODUMP;
+                               cp->c_flag |= C_CHANGE;
+                       }
+                       vput(ddvp);
+               }
+       }
+       if (dvp)
+               vput(dvp);
  
-       // XXXdbg
         if (started_tr) {
             journal_end_transaction(hfsmp->jnl);
                 started_tr = 0;
@@ -3462,6 +3787,158 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, stru
  }
  
  
+static void
+filt_hfsdetach(struct knote *kn)
+{
+       struct vnode *vp;
+       int result;
+       struct proc *p = current_proc();
+       
+       vp = (struct vnode *)kn->kn_hook;
+       if (1) {        /* ! KNDETACH_VNLOCKED */
+               result = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+               if (result) return;
+       };
+
+       result = KNOTE_DETACH(&VTOC(vp)->c_knotes, kn);
+       
+       if (1) {        /* ! KNDETACH_VNLOCKED */
+               VOP_UNLOCK(vp, 0, p);
+       };
+}
+
+/*ARGSUSED*/
+static int
+filt_hfsread(struct knote *kn, long hint)
+{
+       struct vnode *vp = (struct vnode *)kn->kn_fp->f_data;
+
+       if (hint == NOTE_REVOKE) {
+               /*
+                * filesystem is gone, so set the EOF flag and schedule 
+                * the knote for deletion.
+                */
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+               return (1);
+       }
+
+       kn->kn_data = VTOF(vp)->ff_size - kn->kn_fp->f_offset;
+       return (kn->kn_data != 0);
+}
+
+/*ARGSUSED*/
+static int
+filt_hfswrite(struct knote *kn, long hint)
+{
+       if (hint == NOTE_REVOKE) {
+               /*
+                * filesystem is gone, so set the EOF flag and schedule 
+                * the knote for deletion.
+                */
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+       }
+       
+       kn->kn_data = 0;
+       return (1);
+}
+
+static int
+filt_hfsvnode(struct knote *kn, long hint)
+{
+
+       if (kn->kn_sfflags & hint)
+               kn->kn_fflags |= hint;
+       if (hint == NOTE_REVOKE) {
+               kn->kn_flags |= EV_EOF;
+               return (1);
+       }
+       return (kn->kn_fflags != 0);
+}
+
+static struct filterops hfsread_filtops = 
+       { 1, NULL, filt_hfsdetach, filt_hfsread };
+static struct filterops hfswrite_filtops = 
+       { 1, NULL, filt_hfsdetach, filt_hfswrite };
+static struct filterops hfsvnode_filtops = 
+       { 1, NULL, filt_hfsdetach, filt_hfsvnode };
+
+/*
+ #
+ #% kqfilt_add vp      L L L
+ #
+ vop_kqfilt_add
+       IN struct vnode *vp;
+       IN struct knote *kn;
+       IN struct proc *p;
+ */
+static int
+hfs_kqfilt_add(ap)
+       struct vop_kqfilt_add_args /* {
+               struct vnode *a_vp;
+               struct knote *a_kn;
+               struct proc *p;
+       } */ *ap;
+{
+       struct vnode *vp = ap->a_vp;
+       struct knote *kn = ap->a_kn;
+
+       switch (kn->kn_filter) {
+       case EVFILT_READ:
+               if (vp->v_type == VREG) {
+                       kn->kn_fop = &hfsread_filtops;
+               } else {
+                       return EINVAL;
+               };
+               break;
+       case EVFILT_WRITE:
+               if (vp->v_type == VREG) {
+                       kn->kn_fop = &hfswrite_filtops;
+               } else {
+                       return EINVAL;
+               };
+               break;
+       case EVFILT_VNODE:
+               kn->kn_fop = &hfsvnode_filtops;
+               break;
+       default:
+               return (1);
+       }
+
+       kn->kn_hook = (caddr_t)vp;
+
+       /* simple_lock(&vp->v_pollinfo.vpi_lock); */
+       KNOTE_ATTACH(&VTOC(vp)->c_knotes, kn);
+       /* simple_unlock(&vp->v_pollinfo.vpi_lock); */
+
+       return (0);
+}
+
+/*
+ #
+ #% kqfilt_remove      vp      L L L
+ #
+ vop_kqfilt_remove
+       IN struct vnode *vp;
+       IN uintptr_t ident;
+       IN struct proc *p;
+ */
+static int
+hfs_kqfilt_remove(ap)
+       struct vop_kqfilt_remove_args /* {
+               struct vnode *a_vp;
+               uintptr_t ident;
+               struct proc *p;
+       } */ *ap;
+{
+       struct vnode *vp = ap->a_vp;
+       uintptr_t ident = ap->a_ident;
+       int result;
+
+       result = ENOTSUP; /* XXX */
+       
+       return (result);
+}
+
  /*
   * Wrapper for special device reads
   */
@@ -3591,6 +4068,43 @@ hfsfifo_close(ap)
         simple_unlock(&vp->v_interlock);
         return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
  }
+
+/*
+ * kqfilt_add wrapper for fifos.
+ *
+ * Fall through to hfs kqfilt_add routines if needed 
+ */
+int
+hfsfifo_kqfilt_add(ap)
+       struct vop_kqfilt_add_args *ap;
+{
+       extern int (**fifo_vnodeop_p)(void *);
+       int error;
+
+       error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilt_add), ap);
+       if (error)
+               error = hfs_kqfilt_add(ap);
+       return (error);
+}
+
+/*
+ * kqfilt_remove wrapper for fifos.
+ *
+ * Fall through to hfs kqfilt_remove routines if needed 
+ */
+int
+hfsfifo_kqfilt_remove(ap)
+       struct vop_kqfilt_remove_args *ap;
+{
+       extern int (**fifo_vnodeop_p)(void *);
+       int error;
+
+       error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilt_remove), ap);
+       if (error)
+               error = hfs_kqfilt_remove(ap);
+       return (error);
+}
+
  #endif /* FIFO */
  
  
@@ -3641,6 +4155,7 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = {
      { &vop_write_desc, (VOPFUNC)hfs_write },                   /* write */
      { &vop_ioctl_desc, (VOPFUNC)hfs_ioctl },                   /* ioctl */
      { &vop_select_desc, (VOPFUNC)hfs_select },                 /* select */
+    { &vop_revoke_desc, (VOPFUNC)nop_revoke },                 /* revoke */
      { &vop_exchange_desc, (VOPFUNC)hfs_exchange },             /* exchange */
      { &vop_mmap_desc, (VOPFUNC)err_mmap },                     /* mmap */
      { &vop_fsync_desc, (VOPFUNC)hfs_fsync },                   /* fsync */
@@ -3657,7 +4172,7 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = {
      { &vop_readdir_desc, (VOPFUNC)hfs_readdir },               /* readdir */
      { &vop_readdirattr_desc, (VOPFUNC)hfs_readdirattr },  /* readdirattr */
      { &vop_readlink_desc, (VOPFUNC)hfs_readlink },             /* readlink */
-    { &vop_abortop_desc, (VOPFUNC)hfs_abortop },               /* abortop */
+    { &vop_abortop_desc, (VOPFUNC)nop_abortop },               /* abortop */
      { &vop_inactive_desc, (VOPFUNC)hfs_inactive },             /* inactive */
      { &vop_reclaim_desc, (VOPFUNC)hfs_reclaim },               /* reclaim */
      { &vop_lock_desc, (VOPFUNC)hfs_lock },                     /* lock */
@@ -3680,6 +4195,8 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = {
      { &vop_blktooff_desc, (VOPFUNC)hfs_blktooff },             /* blktooff */
      { &vop_offtoblk_desc, (VOPFUNC)hfs_offtoblk },             /* offtoblk */
      { &vop_cmap_desc, (VOPFUNC)hfs_cmap },                     /* cmap */
+       { &vop_kqfilt_add_desc, (VOPFUNC)hfs_kqfilt_add },              /* kqfilt_add */
+       { &vop_kqfilt_remove_desc, (VOPFUNC)hfs_kqfilt_remove },                /* kqfilt_remove */
      { NULL, (VOPFUNC)NULL }
  };
  
@@ -3711,6 +4228,7 @@ struct vnodeopv_entry_desc hfs_specop_entries[] = {
         { &vop_rename_desc, (VOPFUNC)spec_rename },             /* rename */
         { &vop_mkdir_desc, (VOPFUNC)spec_mkdir },               /* mkdir */
         { &vop_rmdir_desc, (VOPFUNC)spec_rmdir },               /* rmdir */
+       { &vop_getattrlist_desc, (VOPFUNC)hfs_getattrlist },
         { &vop_symlink_desc, (VOPFUNC)spec_symlink },           /* symlink */
         { &vop_readdir_desc, (VOPFUNC)spec_readdir },           /* readdir */
         { &vop_readlink_desc, (VOPFUNC)spec_readlink },         /* readlink */
@@ -3769,6 +4287,7 @@ struct vnodeopv_entry_desc hfs_fifoop_entries[] = {
         { &vop_rename_desc, (VOPFUNC)fifo_rename },             /* rename */
         { &vop_mkdir_desc, (VOPFUNC)fifo_mkdir },               /* mkdir */
         { &vop_rmdir_desc, (VOPFUNC)fifo_rmdir },               /* rmdir */
+       { &vop_getattrlist_desc, (VOPFUNC)hfs_getattrlist },
         { &vop_symlink_desc, (VOPFUNC)fifo_symlink },           /* symlink */
         { &vop_readdir_desc, (VOPFUNC)fifo_readdir },           /* readdir */
         { &vop_readlink_desc, (VOPFUNC)fifo_readlink },         /* readlink */
@@ -3796,6 +4315,8 @@ struct vnodeopv_entry_desc hfs_fifoop_entries[] = {
         { &vop_blktooff_desc, (VOPFUNC)hfs_blktooff },          /* blktooff */
         { &vop_offtoblk_desc, (VOPFUNC)hfs_offtoblk },          /* offtoblk */
         { &vop_cmap_desc, (VOPFUNC)hfs_cmap },                  /* cmap */
+       { &vop_kqfilt_add_desc, (VOPFUNC)hfsfifo_kqfilt_add },  /* kqfilt_add */
+       { &vop_kqfilt_remove_desc, (VOPFUNC)hfsfifo_kqfilt_remove },  /* kqfilt_remove */
         { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL }
  };
  struct vnodeopv_desc hfs_fifoop_opv_desc =