]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/hfs/hfs_vnops.c
xnu-517.7.21.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_vnops.c
index b53d8eb2fb41c44190fde32d4a50c12df599f11c..be91f25e68e3045af505d3bafb1388762ce6e779 100644 (file)
@@ -1,24 +1,21 @@
 /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
  * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
@@ -35,6 +32,8 @@
 #include <sys/namei.h>
 #include <sys/ubc.h>
 #include <sys/quota.h>
+#include <sys/time.h>
+#include <sys/disk.h>
 
 #include <miscfs/specfs/specdev.h>
 #include <miscfs/fifofs/fifo.h>
 #define MAKE_DELETED_NAME(NAME,FID) \
            (void) sprintf((NAME), "%s%d", HFS_DELETE_PREFIX, (FID))
 
+#define KNDETACH_VNLOCKED 0x00000001
 
-extern uid_t console_user;
+#define CARBON_TEMP_DIR_NAME   "Cleanup At Startup"
 
-extern unsigned long strtoul(const char *, char **, int);
 
 /* Global vfs data structures for hfs */
 
 
+extern unsigned long strtoul(const char *, char **, int);
+
 extern int groupmember(gid_t gid, struct ucred *cred);
 
 static int hfs_makenode(int mode, struct vnode *dvp, struct vnode **vpp,
@@ -76,6 +77,19 @@ static int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp,
 
 static int hfs_metasync(struct hfsmount *hfsmp, daddr_t node, struct proc *p);
 
+static int hfs_removedir(struct vnode *, struct vnode *, struct componentname *,
+                         int);
+
+static int hfs_removefile(struct vnode *, struct vnode *, struct componentname *,
+                          int);
+
+/* Options for hfs_removedir and hfs_removefile */
+#define HFSRM_PARENT_LOCKED    0x01
+#define HFSRM_SKIP_RESERVE     0x02
+#define HFSRM_SAVE_NAME        0x04
+#define HFSRM_RENAMEOPTS       0x07
+
+
 int hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean considerFlags);
 
 int hfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred,
@@ -200,6 +214,8 @@ hfs_open(ap)
        } */ *ap;
 {
        struct vnode *vp = ap->a_vp;
+       struct filefork *fp = VTOF(vp);
+       struct timeval tv;
 
        /*
         * Files marked append-only must be opened for appending.
@@ -208,6 +224,36 @@ hfs_open(ap)
            (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
                return (EPERM);
 
+       if (ap->a_mode & O_EVTONLY) {
+               if (vp->v_type == VREG) {
+                       ++VTOF(vp)->ff_evtonly_refs;
+               } else {
+                       ++VTOC(vp)->c_evtonly_refs;
+               };
+       };
+
+       /*
+        * On the first (non-busy) open of a fragmented
+        * file attempt to de-frag it (if its less than 20MB).
+        */
+       if ((VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) ||
+           !UBCISVALID(vp) || ubc_isinuse(vp, 1)) {
+               return (0);
+       }
+       fp = VTOF(vp);
+       if (fp->ff_blocks &&
+           fp->ff_extents[7].blockCount != 0 &&
+           fp->ff_size <= (20 * 1024 * 1024)) {
+               /* 
+                * Wait until system bootup is done (3 min).
+                */
+               microuptime(&tv);
+               if (tv.tv_sec < (60 * 3)) {
+                       return (0);
+               }
+               (void) hfs_relocate(vp, VTOVCB(vp)->nextAllocation + 4096, ap->a_cred, ap->a_p);
+       }
+
        return (0);
 }
 
@@ -252,6 +298,14 @@ hfs_close(ap)
        }
        simple_unlock(&vp->v_interlock);
 
+       if (ap->a_fflag & O_EVTONLY) {
+               if (vp->v_type == VREG) {
+                       --VTOF(vp)->ff_evtonly_refs;
+               } else {
+                       --VTOC(vp)->c_evtonly_refs;
+               };
+       };
+
        /*
         * VOP_CLOSE can be called with vp locked (from vclean).
         * We check for this case using VOP_ISLOCKED and bail.
@@ -263,7 +317,9 @@ hfs_close(ap)
 
        leof = fp->ff_size;
        
-       if ((fp->ff_blocks > 0) && !ISSET(cp->c_flag, C_DELETED)) {
+       if ((fp->ff_blocks > 0) &&
+           !ISSET(cp->c_flag, C_DELETED) &&
+           ((VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) == 0)) {
                enum vtype our_type = vp->v_type;
                u_long our_id = vp->v_id;
                int was_nocache = ISSET(vp->v_flag, VNOCACHE_DATA);
@@ -336,6 +392,8 @@ hfs_close(ap)
                }
                VOP_UNLOCK(vp, 0, p);
        }
+       if ((vp->v_flag & VSYSTEM) && (vp->v_usecount == 1))
+               vgone(vp);
        return (0);
 }
 
@@ -378,7 +436,7 @@ hfs_access(ap)
                case VDIR:
                case VLNK:
                case VREG:
-                       if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+                       if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                                return (EROFS);
 #if QUOTA
                        if ((error = hfs_getinoquota(cp)))
@@ -386,20 +444,20 @@ hfs_access(ap)
 #endif /* QUOTA */
                        break;
                }
+               /* If immutable bit set, nobody gets to write it. */
+               if (cp->c_flags & IMMUTABLE)
+                       return (EPERM);
        }
 
-       /* If immutable bit set, nobody gets to write it. */
-       if ((mode & VWRITE) && (cp->c_flags & IMMUTABLE))
-               return (EPERM);
 
        /* Otherwise, user id 0 always gets access. */
-       if (ap->a_cred->cr_uid == 0)
+       if (cred->cr_uid == 0)
                return (0);
 
        mask = 0;
 
        /* Otherwise, check the owner. */
-       if (hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, ap->a_p, false) == 0) {
+       if ( (cp->c_uid == cred->cr_uid) || (cp->c_uid == UNKNOWNUID) ) {
                if (mode & VEXEC)
                        mask |= S_IXUSR;
                if (mode & VREAD)
@@ -466,6 +524,8 @@ hfs_getattr(ap)
        CTIMES(cp, &tv, &tv);
 
        vap->va_type = vp->v_type;
+       vap->va_mode = cp->c_mode;
+       vap->va_nlink = cp->c_nlink;
        /*
         * [2856576]  Since we are dynamically changing the owner, also
         * effectively turn off the set-user-id and set-group-id bits,
@@ -473,9 +533,12 @@ hfs_getattr(ap)
         * a security hole where set-user-id programs run as whoever is
         * logged on (or root if nobody is logged in yet!)
         */
-       vap->va_mode = (cp->c_uid == UNKNOWNUID) ? cp->c_mode & ~(S_ISUID | S_ISGID) : cp->c_mode;
-       vap->va_nlink = cp->c_nlink;
-       vap->va_uid = (cp->c_uid == UNKNOWNUID) ? console_user : cp->c_uid;
+       if (cp->c_uid == UNKNOWNUID) {
+               vap->va_mode &= ~(S_ISUID | S_ISGID);
+               vap->va_uid = ap->a_cred->cr_uid;
+       } else {
+               vap->va_uid = cp->c_uid;
+       }
        vap->va_gid = cp->c_gid;
        vap->va_fsid = cp->c_dev;
        /*
@@ -502,7 +565,6 @@ hfs_getattr(ap)
        vap->va_rdev = 0;
        vap->va_blocksize = VTOVFS(vp)->mnt_stat.f_iosize;
        vap->va_filerev = 0;
-       vap->va_spare = 0;
        if (vp->v_type == VDIR) {
                vap->va_size = cp->c_nlink * AVERAGE_HFSDIRENTRY_SIZE;
                vap->va_bytes = 0;
@@ -555,8 +617,19 @@ hfs_setattr(ap)
                return (EINVAL);
        }
 
+       // XXXdbg
+       // don't allow people to set the attributes of symlinks
+       // (nfs has a bad habit of doing ths and it can cause
+       // problems for journaling).
+       //
+       if (vp->v_type == VLNK) {
+               return 0;
+       }
+
+
+
        if (vap->va_flags != VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+               if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                        return (EROFS);
                if ((error = hfs_chflags(vp, vap->va_flags, cred, p)))
                        return (error);
@@ -571,7 +644,7 @@ hfs_setattr(ap)
        if (VTOHFS(vp)->jnl && cp->c_datafork) {
                struct HFSPlusExtentDescriptor *extd;
 
-               extd = &cp->c_datafork->ff_data.cf_extents[0];
+               extd = &cp->c_datafork->ff_extents[0];
                if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
                        return EPERM;
                }
@@ -581,7 +654,7 @@ hfs_setattr(ap)
         * Go through the fields and update iff not VNOVAL.
         */
        if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+               if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                        return (EROFS);
                if ((error = hfs_chown(vp, vap->va_uid, vap->va_gid, cred, p)))
                        return (error);
@@ -597,7 +670,7 @@ hfs_setattr(ap)
                        return (EISDIR);
                case VLNK:
                case VREG:
-                       if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+                       if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                                return (EROFS);
                        break;
                default:
@@ -608,7 +681,7 @@ hfs_setattr(ap)
        }
        cp = VTOC(vp);
        if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+               if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                        return (EROFS);
                if (((error = hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, p, true)) != 0) &&
                    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
@@ -640,10 +713,11 @@ hfs_setattr(ap)
        }
        error = 0;
        if (vap->va_mode != (mode_t)VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+               if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                        return (EROFS);
                error = hfs_chmod(vp, (int)vap->va_mode, cred, p);
        }
+       HFS_KNOTE(vp, NOTE_ATTRIB);
        return (error);
 }
 
@@ -652,6 +726,7 @@ hfs_setattr(ap)
  * Change the mode on a file.
  * cnode must be locked before calling.
  */
+__private_extern__
 int
 hfs_chmod(vp, mode, cred, p)
        register struct vnode *vp;
@@ -669,7 +744,7 @@ hfs_chmod(vp, mode, cred, p)
        if (VTOHFS(vp)->jnl && cp && cp->c_datafork) {
                struct HFSPlusExtentDescriptor *extd;
 
-               extd = &cp->c_datafork->ff_data.cf_extents[0];
+               extd = &cp->c_datafork->ff_extents[0];
                if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
                        return EPERM;
                }
@@ -695,6 +770,7 @@ hfs_chmod(vp, mode, cred, p)
 }
 
 
+__private_extern__
 int
 hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean considerFlags)
 {
@@ -712,9 +788,9 @@ hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean c
        case VDIR:
        case VLNK:
        case VREG:
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+               if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                        return (EROFS);
-        break;
+               break;
        default:
                break;
        }
@@ -747,6 +823,7 @@ hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean c
  * Change the flags on a file or directory.
  * cnode must be locked before calling.
  */
+__private_extern__
 int
 hfs_chflags(vp, flags, cred, p)
        register struct vnode *vp;
@@ -789,6 +866,7 @@ hfs_chflags(vp, flags, cred, p)
  * Perform chown operation on cnode cp;
  * code must be locked prior to call.
  */
+__private_extern__
 int
 hfs_chown(vp, uid, gid, cred, p)
        register struct vnode *vp;
@@ -934,14 +1012,13 @@ hfs_exchange(ap)
 {
        struct vnode *from_vp = ap->a_fvp;
        struct vnode *to_vp = ap->a_tvp;
-       struct vnode *from_rvp = NULL;
-       struct vnode *to_rvp = NULL;
        struct cnode *from_cp = VTOC(from_vp);
        struct cnode *to_cp = VTOC(to_vp);
        struct hfsmount *hfsmp = VTOHFS(from_vp);
        struct cat_desc tempdesc;
        struct cat_attr tempattr;
        int error = 0, started_tr = 0, grabbed_lock = 0;
+       cat_cookie_t cookie = {0};
 
        /* The files must be on the same volume. */
        if (from_vp->v_mount != to_vp->v_mount)
@@ -958,45 +1035,20 @@ hfs_exchange(ap)
                struct HFSPlusExtentDescriptor *extd;
 
                if (from_cp->c_datafork) {
-                       extd = &from_cp->c_datafork->ff_data.cf_extents[0];
+                       extd = &from_cp->c_datafork->ff_extents[0];
                        if (extd->startBlock == VTOVCB(from_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) {
                                return EPERM;
                        }
                }
 
                if (to_cp->c_datafork) {
-                       extd = &to_cp->c_datafork->ff_data.cf_extents[0];
+                       extd = &to_cp->c_datafork->ff_extents[0];
                        if (extd->startBlock == VTOVCB(to_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) {
                                return EPERM;
                        }
                }
        }
 
-       from_rvp = from_cp->c_rsrc_vp;
-       to_rvp = to_cp->c_rsrc_vp;
-
-       /* If one of the resource forks is open then get the other one. */
-       if (from_rvp || to_rvp) {
-               error = hfs_vgetrsrc(hfsmp, from_vp, &from_rvp, ap->a_p);
-               if (error)
-                       return (error);
-               error = hfs_vgetrsrc(hfsmp, to_vp, &to_rvp, ap->a_p);
-               if (error) {
-                       vrele(from_rvp);
-                       return (error);
-               } 
-       }
-
-       /* Ignore any errors, we are doing a 'best effort' on flushing */
-       if (from_vp)
-               (void) vinvalbuf(from_vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
-       if (to_vp)
-               (void) vinvalbuf(to_vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
-       if (from_rvp)
-               (void) vinvalbuf(from_rvp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
-       if (to_rvp)
-               (void) vinvalbuf(to_rvp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
-
        // XXXdbg
        hfs_global_shared_lock_acquire(hfsmp);
        grabbed_lock = 1;
@@ -1007,6 +1059,13 @@ hfs_exchange(ap)
                started_tr = 1;
        }
        
+       /*
+        * Reserve some space in the Catalog file.
+        */
+       if ((error = cat_preflight(hfsmp, CAT_EXCHANGE, &cookie, ap->a_p))) {
+               goto Err_Exit;
+       }
+
        /* Lock catalog b-tree */
        error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, ap->a_p);
        if (error) goto Err_Exit;
@@ -1091,18 +1150,17 @@ hfs_exchange(ap)
                from_cp->c_flags &= ~UF_NODUMP;
                from_cp->c_flag |= C_CHANGE;
        }
-
        if ((to_cp->c_flags & UF_NODUMP) &&
            (to_cp->c_parentcnid != from_cp->c_parentcnid)) {
                to_cp->c_flags &= ~UF_NODUMP;
                to_cp->c_flag |= C_CHANGE;
        }
 
+       HFS_KNOTE(from_vp, NOTE_ATTRIB);
+       HFS_KNOTE(to_vp, NOTE_ATTRIB);
+
 Err_Exit:
-       if (to_rvp)
-               vrele(to_rvp);
-       if (from_rvp)
-               vrele(from_rvp);
+       cat_postflight(hfsmp, &cookie, ap->a_p);
 
        // XXXdbg
        if (started_tr) {
@@ -1161,12 +1219,7 @@ hfs_fsync(ap)
        if (vp->v_flag & VSYSTEM) {
            if (VTOF(vp)->fcbBTCBPtr != NULL) {
                        // XXXdbg
-                       if (hfsmp->jnl) {
-                               if (BTIsDirty(VTOF(vp))) {
-                                       panic("hfs: system file vp 0x%x has dirty blocks (jnl 0x%x)\n",
-                                                 vp, hfsmp->jnl);
-                               }
-                       } else {
+                       if (hfsmp->jnl == NULL) {
                                BTFlushPath(VTOF(vp));
                        }
            }
@@ -1311,6 +1364,18 @@ metasync:
                    !ISSET(cp->c_flag, C_DELETED | C_NOEXISTS)) {
                        hfs_metasync(VTOHFS(vp), cp->c_hint, ap->a_p);
                }
+
+               // make sure that we've really been called from the user
+               // fsync() and if so push out any pending transactions 
+               // that this file might is a part of (and get them on
+               // stable storage).
+               if (vp->v_flag & VFULLFSYNC) {
+                   if (hfsmp->jnl) {
+                       journal_flush(hfsmp->jnl);
+                   } else {
+                       VOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NOCRED, ap->a_p);
+                   }
+               }
        }
 
        return (retval);
@@ -1443,13 +1508,25 @@ hfs_rmdir(ap)
                struct componentname *a_cnp;
        } */ *ap;
 {
-       struct vnode *vp = ap->a_vp;
-       struct vnode *dvp = ap->a_dvp;
-       struct proc *p = ap->a_cnp->cn_proc;
+       return (hfs_removedir(ap->a_dvp, ap->a_vp, ap->a_cnp, 0));
+}
+
+/*
+ * hfs_removedir
+ */
+static int
+hfs_removedir(dvp, vp, cnp, options)
+       struct vnode *dvp;
+       struct vnode *vp;
+       struct componentname *cnp;
+       int options;
+{
+       struct proc *p = cnp->cn_proc;
        struct cnode *cp;
        struct cnode *dcp;
        struct hfsmount * hfsmp;
        struct timeval tv;
+       cat_cookie_t cookie = {0};
        int error = 0, started_tr = 0, grabbed_lock = 0;
 
        cp = VTOC(vp);
@@ -1465,7 +1542,6 @@ hfs_rmdir(ap)
 #if QUOTA
        (void)hfs_getinoquota(cp);
 #endif
-
        // XXXdbg
        hfs_global_shared_lock_acquire(hfsmp);
        grabbed_lock = 1;
@@ -1476,6 +1552,15 @@ hfs_rmdir(ap)
                started_tr = 1;
        }
 
+       if (!(options & HFSRM_SKIP_RESERVE)) {
+               /*
+                * Reserve some space in the Catalog file.
+                */
+               if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
+                       goto out;
+               }
+       }
+
        /*
         * Verify the directory is empty (and valid).
         * (Rmdir ".." won't be valid since
@@ -1520,16 +1605,22 @@ hfs_rmdir(ap)
        dcp->c_flag |= C_CHANGE | C_UPDATE;
        tv = time;
        (void) VOP_UPDATE(dvp, &tv, &tv, 0);
+       HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
 
        hfs_volupdate(hfsmp, VOL_RMDIR, (dcp->c_cnid == kHFSRootFolderID));
 
        cp->c_mode = 0;  /* Makes the vnode go away...see inactive */
        cp->c_flag |= C_NOEXISTS;
 out:
-       if (dvp) 
+       if (!(options & HFSRM_PARENT_LOCKED)) {
                vput(dvp);
+       }
+       HFS_KNOTE(vp, NOTE_DELETE);
        vput(vp);
 
+       if (!(options & HFSRM_SKIP_RESERVE)) {
+               cat_postflight(hfsmp, &cookie, p);
+       }
        // XXXdbg
        if (started_tr) { 
            journal_end_transaction(hfsmp->jnl);
@@ -1561,23 +1652,42 @@ hfs_remove(ap)
                struct componentname *a_cnp;
        } */ *ap;
 {
-       struct vnode *vp = ap->a_vp;
-       struct vnode *dvp = ap->a_dvp;
+       return (hfs_removefile(ap->a_dvp, ap->a_vp, ap->a_cnp, 0));
+}
+
+
+
+/*
+ * hfs_removefile
+ *
+ * Similar to hfs_remove except there are additional options.
+ */
+static int
+hfs_removefile(dvp, vp, cnp, options)
+       struct vnode *dvp;
+       struct vnode *vp;
+       struct componentname *cnp;
+       int options;
+{
        struct vnode *rvp = NULL;
        struct cnode *cp;
        struct cnode *dcp;
        struct hfsmount *hfsmp;
-       struct proc *p = current_proc();
+       struct proc *p = cnp->cn_proc;
        int dataforkbusy = 0;
        int rsrcforkbusy = 0;
        int truncated = 0;
        struct timeval tv;
+       cat_cookie_t cookie = {0};
        int error = 0;
        int started_tr = 0, grabbed_lock = 0;
+       int refcount, isbigfile = 0;
 
-       /* Redirect directories to rmdir */
-       if (vp->v_type == VDIR)
-               return (hfs_rmdir(ap));
+       /* Directories should call hfs_rmdir! */
+       if (vp->v_type == VDIR) {
+               error = EISDIR;
+               goto out;
+       }
 
        cp = VTOC(vp);
        dcp = VTOC(dvp);
@@ -1610,7 +1720,7 @@ hfs_remove(ap)
        if (hfsmp->jnl && cp->c_datafork) {
                struct HFSPlusExtentDescriptor *extd;
 
-               extd = &cp->c_datafork->ff_data.cf_extents[0];
+               extd = &cp->c_datafork->ff_extents[0];
                if (extd->startBlock == HFSTOVCB(hfsmp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) {
                        error = EPERM;
                        goto out;
@@ -1624,18 +1734,27 @@ hfs_remove(ap)
         * vnode (vp).  And we took a ref on the resource vnode (rvp).
         * Hence set 1 in the tookref parameter of ubc_isinuse().
         */
-       if (UBCISVALID(vp) && ubc_isinuse(vp, 1))
+       if (VTOC(vp)->c_flag & C_VPREFHELD) {
+           refcount = 2;
+       } else {
+           refcount = 1;
+       }
+       if (UBCISVALID(vp) && ubc_isinuse(vp, refcount))
                dataforkbusy = 1;
        if (rvp && UBCISVALID(rvp) && ubc_isinuse(rvp, 1))
                rsrcforkbusy = 1;
 
+       // need this to check if we have to break the deletion 
+       // into multiple pieces
+       isbigfile = (VTOC(vp)->c_datafork->ff_size >= HFS_BIGFILE_SIZE);
+
        /*
         * Carbon semantics prohibit deleting busy files.
         * (enforced when NODELETEBUSY is requested)
         */
        if ((dataforkbusy || rsrcforkbusy) &&
-           ((ap->a_cnp->cn_flags & NODELETEBUSY) ||
-            (hfsmp->hfs_private_metadata_dir == 0))) {
+           ((cnp->cn_flags & NODELETEBUSY) ||
+            (hfsmp->hfs_privdir_desc.cd_cnid == 0))) {
                error = EBUSY;
                goto out;
        }
@@ -1654,6 +1773,15 @@ hfs_remove(ap)
            started_tr = 1;
        }
 
+       if (!(options & HFSRM_SKIP_RESERVE)) {
+               /*
+                * Reserve some space in the Catalog file.
+                */
+               if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
+                       goto out;
+               }
+       }
+
        /* Remove our entry from the namei cache. */
        cache_purge(vp);
 
@@ -1695,7 +1823,7 @@ hfs_remove(ap)
        if ((cp->c_flag & C_HARDLINK) == 0) {
                int mode = cp->c_mode;
 
-               if (!dataforkbusy && cp->c_datafork->ff_blocks != 0) {
+               if (!dataforkbusy && !isbigfile && cp->c_datafork->ff_blocks != 0) {
                        cp->c_mode = 0;  /* Suppress VOP_UPDATES */
                        error = VOP_TRUNCATE(vp, (off_t)0, IO_NDELAY, NOCRED, p);
                        cp->c_mode = mode;
@@ -1722,16 +1850,16 @@ hfs_remove(ap)
        if (cp->c_flag & C_HARDLINK) {
                struct cat_desc desc;
 
-               if ((ap->a_cnp->cn_flags & HASBUF) == 0 ||
-                   ap->a_cnp->cn_nameptr[0] == '\0') {
+               if ((cnp->cn_flags & HASBUF) == 0 ||
+                   cnp->cn_nameptr[0] == '\0') {
                        error = ENOENT; /* name missing! */
                        goto out;
                }
 
                /* Setup a descriptor for the link */
                bzero(&desc, sizeof(desc));
-               desc.cd_nameptr = ap->a_cnp->cn_nameptr;
-               desc.cd_namelen = ap->a_cnp->cn_namelen;
+               desc.cd_nameptr = cnp->cn_nameptr;
+               desc.cd_namelen = cnp->cn_namelen;
                desc.cd_parentcnid = dcp->c_cnid;
                /* XXX - if cnid is out of sync then the wrong thread rec will get deleted. */
                desc.cd_cnid = cp->c_cnid;
@@ -1760,7 +1888,7 @@ hfs_remove(ap)
                        bzero(&from_desc, sizeof(from_desc));
                        from_desc.cd_nameptr = inodename;
                        from_desc.cd_namelen = strlen(inodename);
-                       from_desc.cd_parentcnid = hfsmp->hfs_private_metadata_dir;
+                       from_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid;
                        from_desc.cd_flags = 0;
                        from_desc.cd_cnid = cp->c_fileid;
 
@@ -1768,7 +1896,7 @@ hfs_remove(ap)
                        bzero(&to_desc, sizeof(to_desc));
                        to_desc.cd_nameptr = delname;
                        to_desc.cd_namelen = strlen(delname);
-                       to_desc.cd_parentcnid = hfsmp->hfs_private_metadata_dir;
+                       to_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid;
                        to_desc.cd_flags = 0;
                        to_desc.cd_cnid = cp->c_fileid;
        
@@ -1780,10 +1908,6 @@ hfs_remove(ap)
                /* Unlock the Catalog */
                (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
 
-               /* All done with component name... */
-               if ((ap->a_cnp->cn_flags & (HASBUF | SAVENAME)) == (HASBUF | SAVENAME))
-                       FREE_ZONE(ap->a_cnp->cn_pnbuf, ap->a_cnp->cn_pnlen, M_NAMEI);
-
                if (error != 0)
                        goto out;
 
@@ -1793,7 +1917,7 @@ hfs_remove(ap)
 
                hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID));
 
-       } else if (dataforkbusy || rsrcforkbusy) {
+       } else if (dataforkbusy || rsrcforkbusy || isbigfile) {
                char delname[32];
                struct cat_desc to_desc;
                struct cat_desc todir_desc;
@@ -1808,7 +1932,7 @@ hfs_remove(ap)
                bzero(&to_desc, sizeof(to_desc));
                to_desc.cd_nameptr = delname;
                to_desc.cd_namelen = strlen(delname);
-               to_desc.cd_parentcnid = hfsmp->hfs_private_metadata_dir;
+               to_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid;
                to_desc.cd_flags = 0;
                to_desc.cd_cnid = cp->c_cnid;
 
@@ -1839,9 +1963,14 @@ hfs_remove(ap)
        } else /* Not busy */ {
 
                if (cp->c_blocks > 0) {
-                       printf("hfs_remove: attempting to delete a non-empty file!");
+#if 0
+                       panic("hfs_remove: attempting to delete a non-empty file!");
+#else
+                       printf("hfs_remove: attempting to delete a non-empty file %s\n",
+                               cp->c_desc.cd_nameptr);
                        error = EBUSY;
                        goto out;
+#endif
                }
 
                /* Lock catalog b-tree */
@@ -1852,10 +1981,10 @@ hfs_remove(ap)
                error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
 
                if (error && error != ENXIO && error != ENOENT && truncated) {
-                       if ((cp->c_datafork && cp->c_datafork->ff_data.cf_size != 0) ||
-                               (cp->c_rsrcfork && cp->c_rsrcfork->ff_data.cf_size != 0)) {
+                       if ((cp->c_datafork && cp->c_datafork->ff_size != 0) ||
+                               (cp->c_rsrcfork && cp->c_rsrcfork->ff_size != 0)) {
                                panic("hfs: remove: couldn't delete a truncated file! (%d, data sz %lld; rsrc sz %lld)",
-                                         error, cp->c_datafork->ff_data.cf_size, cp->c_rsrcfork->ff_data.cf_size);
+                                         error, cp->c_datafork->ff_size, cp->c_rsrcfork->ff_size);
                        } else {
                                printf("hfs: remove: strangely enough, deleting truncated file %s (%d) got err %d\n",
                                           cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error);
@@ -1871,6 +2000,7 @@ hfs_remove(ap)
 #endif /* QUOTA */
 
                cp->c_mode = 0;
+               truncated  = 0;    // because the catalog entry is gone
                cp->c_flag |= C_CHANGE | C_NOEXISTS;
                --cp->c_nlink;
                hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID));
@@ -1894,39 +2024,29 @@ hfs_remove(ap)
        dcp->c_flag |= C_CHANGE | C_UPDATE;
        tv = time;
        (void) VOP_UPDATE(dvp, &tv, &tv, 0);
+       HFS_KNOTE(dvp, NOTE_WRITE);
 
-       // XXXdbg
-       if (started_tr) {
-           journal_end_transaction(hfsmp->jnl);
-       }
-       if (grabbed_lock) {
-               hfs_global_shared_lock_release(hfsmp);
+out:
+       /* All done with component name... */
+       if ((options & HFSRM_SAVE_NAME) == 0 &&
+           (cnp != 0) &&
+           (cnp->cn_flags & (HASBUF | SAVENAME)) == (HASBUF | SAVENAME)) {
+               char *tmp = cnp->cn_pnbuf;
+               cnp->cn_pnbuf = NULL;
+               cnp->cn_flags &= ~HASBUF;
+               FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI);
        }
 
-       if (rvp)
-               vrele(rvp);
-       VOP_UNLOCK(vp, 0, p);
-       // XXXdbg - try to prevent the lost ubc_info panic
-       if ((cp->c_flag & C_HARDLINK) == 0 || cp->c_nlink == 0) {
-               (void) ubc_uncache(vp);
+       if (!(options & HFSRM_SKIP_RESERVE)) {
+               cat_postflight(hfsmp, &cookie, p);
        }
-       vrele(vp);
-       vput(dvp);
-
-       return (0);
 
-out:
-       if (rvp)
-               vrele(rvp);
-       
        /* Commit the truncation to the catalog record */
        if (truncated) {
-               cp->c_flag |= C_CHANGE | C_UPDATE;
-               tv = time;
-               (void) VOP_UPDATE(vp, &tv, &tv, 0);
+           cp->c_flag |= C_CHANGE | C_UPDATE | C_FORCEUPDATE;
+           tv = time;
+           (void) VOP_UPDATE(vp, &tv, &tv, 0);
        }
-       vput(vp);
-       vput(dvp);
 
        // XXXdbg
        if (started_tr) {
@@ -1936,6 +2056,26 @@ out:
                hfs_global_shared_lock_release(hfsmp);
        }
 
+       HFS_KNOTE(vp, NOTE_DELETE);
+       if (rvp) {
+               HFS_KNOTE(rvp, NOTE_DELETE);
+               vrele(rvp);
+       };
+
+       if (error) {
+               vput(vp);
+       } else {
+               VOP_UNLOCK(vp, 0, p);
+               // XXXdbg - try to prevent the lost ubc_info panic
+               if ((cp->c_flag & C_HARDLINK) == 0 || cp->c_nlink == 0) {
+                       (void) ubc_uncache(vp);
+               }
+               vrele(vp);
+       }
+       if (!(options & HFSRM_PARENT_LOCKED)) {
+               vput(dvp);
+       }
+
        return (error);
 }
 
@@ -1950,7 +2090,7 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp)
                cp->c_desc.cd_nameptr = 0;
                cp->c_desc.cd_namelen = 0;
                cp->c_desc.cd_flags &= ~CD_HASBUF;
-               FREE(name, M_TEMP);
+               remove_name(name);
        }
        bcopy(cdp, &cp->c_desc, sizeof(cp->c_desc));
 
@@ -1963,19 +2103,11 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp)
 
 /*
 #
-#% rename      fdvp    U U U
-#% rename      fvp     U U U
-#% rename      tdvp    L U U
-#% rename      tvp     X U U
+#% rename      fdvp   U U U
+#% rename      fvp    U U U
+#% rename      tdvp   L U U
+#% rename      tvp    X U U
 #
-       vop_rename {
-               IN WILLRELE struct vnode *fdvp;
-               IN WILLRELE struct vnode *fvp;
-               IN struct componentname *fcnp;
-               IN WILLRELE struct vnode *tdvp;
-               IN WILLRELE struct vnode *tvp;
-               IN struct componentname *tcnp;
-       };
 */
 /*
  * Rename a cnode.
@@ -2014,53 +2146,114 @@ hfs_rename(ap)
        struct cat_desc from_desc;
        struct cat_desc to_desc;
        struct cat_desc out_desc;
-       struct hfsmount *hfsmp;
+       struct hfsmount *hfsmp = NULL;
        struct timeval tv;
-       int fdvp_locked, fvp_locked, tdvp_locked;
+       cat_cookie_t cookie = {0};
+       int fdvp_locked, fvp_locked, tdvp_locked, tvp_locked;
        int tvp_deleted;
        int started_tr = 0, grabbed_lock = 0;
        int error = 0;
 
-       hfsmp = VTOHFS(tdvp);
 
        /* Establish our vnode lock state. */
        tdvp_locked = 1;
+       tvp_locked = (tvp != 0);
        fdvp_locked = 0;
        fvp_locked = 0;
        tvp_deleted = 0;
 
+       /*
+        * Check for cross-device rename.
+        */
+       if ((fvp->v_mount != tdvp->v_mount) ||
+           (tvp && (fvp->v_mount != tvp->v_mount))) {
+               error = EXDEV;
+               goto out;
+       }
+
        /*
         * When fvp matches tvp they must be case variants
         * or hard links.
         *
-        * For the hardlink case there can be an extra ref on fvp.
+        * In some cases tvp will be locked in other cases
+        * it be unlocked with no reference.  Normalize the
+        * state here (unlocked with a reference) so that
+        * we can exit in a known state.
         */
        if (fvp == tvp) {
-               if (VOP_ISLOCKED(fvp) &&
-                       (VTOC(fvp)->c_lock.lk_lockholder == p->p_pid) &&
-                       (VTOC(fvp)->c_lock.lk_lockthread == current_thread())) {
-                       fvp_locked = 1;
-                       vrele(fvp);  /* drop the extra ref */
+               if (VOP_ISLOCKED(tvp) &&
+                   (VTOC(tvp)->c_lock.lk_lockholder == p->p_pid) &&
+                   (VTOC(tvp)->c_lock.lk_lockthread == current_thread())) {
+                       vput(tvp);
                }
                tvp = NULL;
+               tvp_locked = 0;
+
                /*
-                * If this a hard link and its not a case
-                * variant then keep tvp around for removal.
+                * If this a hard link with different parents
+                * and its not a case variant then keep tvp
+                * around for removal.
                 */
                if ((VTOC(fvp)->c_flag & C_HARDLINK) &&
                    ((fdvp != tdvp) ||
                     (hfs_namecmp(fcnp->cn_nameptr, fcnp->cn_namelen,
                                  tcnp->cn_nameptr, tcnp->cn_namelen) != 0))) {
                        tvp = fvp;
+                       vref(tvp);
                }
        }
         
        /*
-        * Check for cross-device rename.
+        * The following edge case is caught here:
+        * (to cannot be a descendent of from)
+        *
+        *       o fdvp
+        *      /
+        *     /
+        *    o fvp
+        *     \
+        *      \
+        *       o tdvp
+        *      /
+        *     /
+        *    o tvp
         */
-       if ((fvp->v_mount != tdvp->v_mount) ||
-           (tvp && (fvp->v_mount != tvp->v_mount))) {
-               error = EXDEV;
+       if (tdcp->c_parentcnid == VTOC(fvp)->c_cnid) {
+               error = EINVAL;
+               goto out;
+       }
+
+       /*
+        * The following two edge cases are caught here:
+        * (note tvp is not empty)
+        *
+        *       o tdvp               o tdvp
+        *      /                    /
+        *     /                    /
+        *    o tvp            tvp o fdvp
+        *     \                    \
+        *      \                    \
+        *       o fdvp               o fvp
+        *      /
+        *     /
+        *    o fvp
+        */
+       if (tvp && (tvp->v_type == VDIR) && (VTOC(tvp)->c_entries != 0)) {
+               error = ENOTEMPTY;
+               goto out;
+       }
+
+       /*
+        * The following edge case is caught here:
+        * (the from child and parent are the same)
+        *
+        *          o tdvp
+        *         /
+        *        /
+        *  fdvp o fvp
+        */
+       if (fdvp == fvp) {
+               error = EINVAL;
                goto out;
        }
 
@@ -2073,16 +2266,7 @@ hfs_rename(ap)
                goto out;
        }
 
-       /*
-        * Be sure we are not renaming ".", "..", or an alias of ".".
-        */
-       if ((fvp->v_type == VDIR) &&
-           (((fcnp->cn_namelen == 1) && (fcnp->cn_nameptr[0] == '.')) ||
-             (fdvp == fvp) ||
-             (fcnp->cn_flags&ISDOTDOT))) {
-                       error = EINVAL;
-                       goto out;
-       }
+       hfsmp = VTOHFS(tdvp);
 
        /*
         * If the destination parent directory is "sticky", then the
@@ -2090,146 +2274,123 @@ hfs_rename(ap)
         * the rename, otherwise the destination may not be changed
         * (except by root). This implements append-only directories.
         *
-        * Note that checks for immutable, write access, and a non-empty
-        * target are done by the call to VOP_REMOVE.
+        * Note that checks for immutable and write access are done
+        * by the call to VOP_REMOVE.
         */
        if (tvp && (tdcp->c_mode & S_ISTXT) &&
            (tcnp->cn_cred->cr_uid != 0) &&
            (tcnp->cn_cred->cr_uid != tdcp->c_uid) &&
            (hfs_owner_rights(hfsmp, VTOC(tvp)->c_uid, tcnp->cn_cred, p, false)) ) {
-                       error = EPERM;
-                       goto out;
+               error = EPERM;
+               goto out;
        }
 
+#if QUOTA
+       if (tvp)
+               (void)hfs_getinoquota(VTOC(tvp));
+#endif
+
        /*
-        * All done with preflighting.
-        *
-        * We now break the call into two transactions:
-        * 1 - Remove the destionation (if any) using VOP_REMOVE,
-        * which in itself is a complete transaction.
-        *
-        * 2 - Rename source to destination.
-        * 
-        * Since all the preflighting is done, we assume that a
-        * rename failure is unlikely once part 1 is complete.
-        * Breaking rename into two transactions buys us a much
-        * simpler implementation with respect to the locking
-        * protocol.  There are only 3 vnodes to worry about
-        * locking in the correct order (instead of 4).
+        * Lock all the vnodes before starting a journal transaction.
         */
 
        /*
-        * Part 1 - If the destination exists then it needs to be removed.
+        * Simple case (same parent) - just lock child (fvp).
         */
-       if (tvp) {
-               /*
-                * VOP_REMOVE will vput tdvp so we better bump its
-                * ref count and relockit, always set tvp to NULL
-                * afterwards to indicate that we're done with it.
-                */
-               VREF(tdvp);
-
-               if (tvp == fvp) {
-                       if (fvp_locked) {
-                               VREF(fvp);
-                       } else {
-                               error = vget(fvp, LK_EXCLUSIVE | LK_RETRY, p);
-                               if (error)
-                                       goto out;
-                               fvp_locked = 1;
-                       }
-               } else {
-                       cache_purge(tvp);
-               }
-
-               /* Clear SAVENAME to keep VOP_REMOVE from smashing tcnp. */
-               tcnp->cn_flags &= ~SAVENAME;
-
-               if (tvp->v_type == VDIR)
-                       error = VOP_RMDIR(tdvp, tvp, tcnp);
-               else
-                       error = VOP_REMOVE(tdvp, tvp, tcnp);
-
-               /* Get lock states back in sync. */
-               tdvp_locked = 0;
-               if (tvp == fvp)
-                       fvp_locked = 0; 
-               tvp = NULL;     /* all done with tvp */
-               tvp_deleted = 1;
-
-               if (error)
-                       goto out;  /* couldn't remove destination! */
+       if (fdvp == tdvp) {
+               if (error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p))
+                       goto out;
+               fvp_locked = 1;
+               goto vnlocked;
        }
-       /*
-        * All done with tvp.
-        *
-        * For POSIX compliance, if tvp was removed the only 
-        * error we can return from this point on is EIO.
-        */  
 
        /*
-        * Part 2 - rename source to destination
+        * If fdvp is the parent of tdvp then we'll need to
+        * drop tdvp's lock before acquiring a lock on fdvp.
+        *
+        *          fdvp
+        *           o 
+        *          / \
+        *         /   \
+        *   tdvp o     o fvp
+        *         \
+        *          \
+        *           o tvp
+        *
+        *
+        * If the parent directories are unrelated then we'll
+        * need to aquire their vnode locks in vnode address
+        * order.  Otherwise we can race with another rename
+        * call that involves the same vnodes except that to
+        * and from are switched and potentially deadlock.
+        * [ie rename("a/b", "c/d") vs rename("c/d", "a/b")]
+        *
+        * If its not either of the two above cases then we
+        * can safely lock fdvp and fvp.
         */
+       if ((VTOC(fdvp)->c_cnid == VTOC(tdvp)->c_parentcnid) ||
+           ((VTOC(tdvp)->c_cnid != VTOC(fdvp)->c_parentcnid) &&
+            (fdvp < tdvp))) {
 
-       /*
-        * Lock the vnodes before starting a journal transaction.
-        */
-       if (fdvp != tdvp) {
-               /*
-                * fvp is a child and must be locked last.
-                */
-               if (fvp_locked) {
-                       VOP_UNLOCK(fvp, 0, p);
-                       fvp_locked = 0;
+               /* Drop locks on tvp and tdvp */
+               if (tvp_locked) {
+                       VOP_UNLOCK(tvp, 0, p);
+                       tvp_locked = 0;
                }
+               VOP_UNLOCK(tdvp, 0, p);
+               tdvp_locked = 0;
+
+               /* Aquire locks in correct order */
+               if ((error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, p)))
+                       goto out;
+               fdvp_locked = 1;
+               if ((error = vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY, p)))
+                       goto out;
+               tdvp_locked = 1;
+
                /*
-                *      If fdvp is the parent of tdvp then it needs to be locked first.
+                * Now that the parents are locked only one thread
+                * can continue. So the lock order of the children
+                * doesn't really matter
                 */
-               if ((VTOC(fdvp)->c_cnid == VTOC(tdvp)->c_parentcnid)) {
-                       if (tdvp_locked) {
-                               VOP_UNLOCK(tdvp, 0, p);
-                               tdvp_locked = 0;
-                       }
-                       if ((error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, p)))
-                               goto out;
-                       fdvp_locked = 1;
-                       if ((error = vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY, p)))
+               if (tvp == fvp) {
+                       if ((error = vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p)))
                                goto out;
-                       tdvp_locked = 1;
-
-               } else /* Lock tdvp then fdvp */ {
-                       if (!tdvp_locked) {
-                               if ((error = vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY, p)))
+                       tvp_locked = 1;
+               } else {
+                       if (tvp) {
+                               if ((error = vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p)))
                                        goto out;
-                               tdvp_locked = 1;
+                               tvp_locked = 1;
                        }
-                       if ((error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, p)))
+                       if ((error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p)))
                                goto out;
-                       fdvp_locked = 1;
+                       fvp_locked = 1;
                }
-       } else if (!tdvp_locked) {
-                       /*
-                        * fvp is a child and must be locked last.
-                        */
-                       if (fvp_locked) {
-                               VOP_UNLOCK(fvp, 0, p);
-                               fvp_locked = 0;
-                       }
-                       if ((error = vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY, p)))
-                               goto out;
-                       tdvp_locked = 1;
-       }
 
-       /* Now its safe to lock fvp */
-       if (!fvp_locked) {
+       } else /* OK to lock fdvp and fvp */ {
+               if ((error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, p)))
+                       goto out;
+               fdvp_locked = 1;
                if (error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p))
                        goto out;
-               fvp_locked = 1;
+               if (tvp == fvp)
+                       tvp_locked = 1;
+               else
+                       fvp_locked = 1;
        }
 
+vnlocked:
        fdcp = VTOC(fdvp);
        fcp = VTOC(fvp);
 
+       /* 
+        * While fvp is still locked, purge it from the name cache and
+        * grab it's c_cnid value.  Note that the removal of tvp (below)
+        * can drop fvp's lock when fvp == tvp.
+        */
+       cache_purge(fvp);
+
        /*
         * When a file moves out of "Cleanup At Startup"
         * we can drop its NODUMP status.
@@ -2238,24 +2399,13 @@ hfs_rename(ap)
            (fvp->v_type == VREG) &&
            (fdvp != tdvp) &&
            (fdcp->c_desc.cd_nameptr != NULL) &&
-           (strcmp(fdcp->c_desc.cd_nameptr, "Cleanup At Startup") == 0)) {
+           (strcmp(fdcp->c_desc.cd_nameptr, CARBON_TEMP_DIR_NAME) == 0)) {
                fcp->c_flags &= ~UF_NODUMP;
                fcp->c_flag |= C_CHANGE;
                tv = time;
                (void) VOP_UPDATE(fvp, &tv, &tv, 0);
        }
 
-       hfs_global_shared_lock_acquire(hfsmp);
-       grabbed_lock = 1;
-       if (hfsmp->jnl) {
-           if ((error = journal_start_transaction(hfsmp->jnl)) != 0) {
-                       goto out;
-           }
-               started_tr = 1;
-       }
-
-       cache_purge(fvp);
-
        bzero(&from_desc, sizeof(from_desc));
        from_desc.cd_nameptr = fcnp->cn_nameptr;
        from_desc.cd_namelen = fcnp->cn_namelen;
@@ -2270,6 +2420,52 @@ hfs_rename(ap)
        to_desc.cd_flags = fcp->c_desc.cd_flags & ~(CD_HASBUF | CD_DECOMPOSED);
        to_desc.cd_cnid = fcp->c_cnid;
 
+       hfs_global_shared_lock_acquire(hfsmp);
+       grabbed_lock = 1;
+       if (hfsmp->jnl) {
+           if ((error = journal_start_transaction(hfsmp->jnl)) != 0) {
+                       goto out;
+           }
+               started_tr = 1;
+       }
+
+       /*
+        * Reserve some space in the Catalog file.
+        */
+       if ((error = cat_preflight(hfsmp, CAT_RENAME + CAT_DELETE, &cookie, p))) {
+               goto out;
+       }
+
+       /*
+        * If the destination exists then it needs to be removed.
+        */
+
+       if (tvp) {
+               if (tvp != fvp)
+                       cache_purge(tvp);
+               /*
+                * Note that hfs_removedir and hfs_removefile
+                * will keep tdvp locked with a reference.
+                * But tvp will lose its lock and reference.
+                */
+               if (tvp->v_type == VDIR)
+                       error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_RENAMEOPTS);
+               else
+                       error = hfs_removefile(tdvp, tvp, tcnp, HFSRM_RENAMEOPTS);
+
+               if (tvp == fvp)
+                       fvp_locked = 0;
+               tvp = NULL;
+               tvp_locked = 0;
+               tvp_deleted = 1;
+               if (error)
+                       goto out;
+       }
+
+       /*
+        * All done with tvp and fvp
+        */
+
        /* Lock catalog b-tree */
        error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p);
        if (error)
@@ -2279,22 +2475,23 @@ hfs_rename(ap)
 
        /* Unlock catalog b-tree */
        (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
-       if (error)
+
+       if (error) {
                goto out;
+       }
 
        /* Update cnode's catalog descriptor */
-       replace_desc(fcp, &out_desc);
+       if (fvp_locked) {
+               replace_desc(fcp, &out_desc);
+               fcp->c_parentcnid = tdcp->c_cnid;
+               fcp->c_hint = 0;
+       }
 
        hfs_volupdate(hfsmp, fvp->v_type == VDIR ? VOL_RMDIR : VOL_RMFILE,
                      (fdcp->c_cnid == kHFSRootFolderID));
        hfs_volupdate(hfsmp, fvp->v_type == VDIR ? VOL_MKDIR : VOL_MKFILE,
                      (tdcp->c_cnid == kHFSRootFolderID));
 
-       VOP_UNLOCK(fvp, 0, p);
-       fcp = NULL;
-       fvp_locked = 0;
-       /* All done with fvp. */  
-
        /* Update both parent directories. */
        tv = time;
        if (fdvp != tdvp) {
@@ -2312,6 +2509,9 @@ hfs_rename(ap)
        (void) VOP_UPDATE(tdvp, &tv, &tv, 0);
 
 out:
+       if (hfsmp) {
+               cat_postflight(hfsmp, &cookie, p);
+       }
        if (started_tr) {
            journal_end_transaction(hfsmp->jnl);
        }
@@ -2319,6 +2519,14 @@ out:
                hfs_global_shared_lock_release(hfsmp);
        }
 
+       /* Note that if hfs_removedir or hfs_removefile was invoked above they will already have
+          generated a NOTE_WRITE for tdvp and a NOTE_DELETE for tvp.
+        */
+       if (error == 0) {
+               HFS_KNOTE(fvp, NOTE_RENAME);
+               HFS_KNOTE(fdvp, NOTE_WRITE);
+               if (tdvp != fdvp) HFS_KNOTE(tdvp, NOTE_WRITE);
+       };
        if (fvp_locked) {
                VOP_UNLOCK(fvp, 0, p);
        }
@@ -2328,18 +2536,18 @@ out:
        if (tdvp_locked) {
                VOP_UNLOCK(tdvp, 0, p);
        }
-       if (tvp && (tvp != fvp)) {
-               if (tvp != tdvp)
-                       VOP_UNLOCK(tvp, 0, p);
-               vrele(tvp);
+       if (tvp_locked) {
+               VOP_UNLOCK(tvp, 0, p);
        }
 
        vrele(fvp);
        vrele(fdvp);
+       if (tvp)
+               vrele(tvp);
        vrele(tdvp);
 
        /* After tvp is removed the only acceptable error is EIO */
-       if ((error == ENOSPC) && tvp_deleted)
+       if (error && tvp_deleted)
                error = EIO;
 
        return (error);
@@ -2441,7 +2649,6 @@ hfs_symlink(ap)
        vp = *vpp;
        len = strlen(ap->a_target);
        fp = VTOF(vp);
-       fp->ff_clumpsize = VTOVCB(vp)->blockSize;
 
 #if QUOTA
        (void)hfs_getinoquota(VTOC(vp));
@@ -2570,6 +2777,10 @@ hfs_readdir(ap)
        int eofflag = 0;
        void *user_start = NULL;
        int   user_len;
+
+       int ncookies=0;
+       u_long *cookies=NULL;
+       u_long *cookiep=NULL;
  
        /* We assume it's all one big buffer... */
        if (uio->uio_iovcnt > 1 || uio->uio_resid < AVERAGE_HFSDIRENTRY_SIZE)
@@ -2602,7 +2813,6 @@ hfs_readdir(ap)
                }
        }
 
-
        /* Create the entries for . and .. */
        if (uio->uio_offset < sizeof(rootdots)) {
                caddr_t dep;
@@ -2627,10 +2837,58 @@ hfs_readdir(ap)
                        goto Exit;
        }
 
+       if (ap->a_ncookies != NULL) {
+               /*
+                * These cookies are handles that allow NFS to restart
+                * scanning through a directory.  If a directory is large
+                * enough, NFS will issue a successive readdir() with a
+                * uio->uio_offset that is equal to one of these cookies.
+                *
+                * The cookies that we generate are synthesized byte-offsets.
+                * The offset is where the dirent the dirent would be if the
+                * directory were an array of packed dirent structs.  It is
+                * synthetic because that's not how directories are stored in
+                * HFS but other code expects that the cookie is a byte offset.
+                *
+                * We have to pre-allocate the cookies because cat_getdirentries()
+                * is the only one that can properly synthesize the offsets (since
+                * it may have to skip over entries and only it knows the true
+                * virtual offset of any particular directory entry). So we allocate
+                * a cookie table here and pass it in to cat_getdirentries().
+                *
+                * Note that the handling of "." and ".." is mostly done here but
+                * cat_getdirentries() is aware of.
+                *
+                * Only the NFS server uses cookies so fortunately this code is
+                * not executed unless the NFS server is issuing the readdir
+                * request.
+                *
+                * Also note that the NFS server is the one responsible for
+                * free'ing the cookies even though we allocated them.  Ick.
+                *
+                * We allocate a reasonable number of entries for the size of
+                * the buffer that we're going to fill in.  cat_getdirentries()
+                * is smart enough to not overflow if there's more room in the
+                * buffer but not enough room in the cookie table.
+                */
+               if (uio->uio_segflg != UIO_SYSSPACE)
+                       panic("hfs_readdir: unexpected uio from NFS server");
+
+               ncookies = uio->uio_iov->iov_len / (AVERAGE_HFSDIRENTRY_SIZE/2);
+               MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, M_WAITOK);
+
+               *ap->a_ncookies = ncookies;
+               *ap->a_cookies = cookies;
+       }
+
        /* If there are no children then we're done */  
        if (cp->c_entries == 0) {
                eofflag = 1;
                retval = 0;
+               if (cookies) {
+                   cookies[0] = 0;
+                   cookies[1] = sizeof(struct hfsdotentry);
+               }
                goto Exit;
        }
 
@@ -2638,7 +2896,7 @@ hfs_readdir(ap)
        retval = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p);
        if (retval) goto Exit;
 
-       retval = cat_getdirentries(hfsmp, &cp->c_desc, uio, &eofflag);
+       retval = cat_getdirentries(hfsmp, &cp->c_desc, cp->c_entries, uio, &eofflag, cookies, ncookies);
 
        /* Unlock catalog b-tree */
        (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
@@ -2654,38 +2912,6 @@ hfs_readdir(ap)
        }
        
        cp->c_flag |= C_ACCESS;
-                                                                                                                       /* Bake any cookies */
-       if (!retval && ap->a_ncookies != NULL) {
-               struct dirent* dpStart;
-               struct dirent* dpEnd;
-               struct dirent* dp;
-               int ncookies;
-               u_long *cookies;
-               u_long *cookiep;
-
-               /*
-                * Only the NFS server uses cookies, and it loads the
-                * directory block into system space, so we can just look at
-                * it directly.
-                */
-               if (uio->uio_segflg != UIO_SYSSPACE)
-                       panic("hfs_readdir: unexpected uio from NFS server");
-               dpStart = (struct dirent *)(uio->uio_iov->iov_base - (uio->uio_offset - off));
-               dpEnd = (struct dirent *) uio->uio_iov->iov_base;
-               for (dp = dpStart, ncookies = 0;
-                    dp < dpEnd && dp->d_reclen != 0;
-                    dp = (struct dirent *)((caddr_t)dp + dp->d_reclen))
-                       ncookies++;
-               MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, M_WAITOK);
-               for (dp = dpStart, cookiep = cookies;
-                    dp < dpEnd;
-                    dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
-                       off += dp->d_reclen;
-                       *cookiep++ = (u_long) off;
-               }
-               *ap->a_ncookies = ncookies;
-               *ap->a_cookies = cookies;
-       }
 
 Exit:;
        if (hfsmp->jnl && user_start) {
@@ -2761,40 +2987,28 @@ hfs_readlink(ap)
                }
        }
        retval = uiomove((caddr_t)fp->ff_symlinkptr, (int)fp->ff_size, ap->a_uio);
-
-       return (retval);
-}
-
-
-/*
- * hfs abort op, called after namei() when a CREATE/DELETE isn't actually
- * done. If a buffer has been saved in anticipation of a CREATE, delete it.
-#% abortop     dvp     = = =
-#
- vop_abortop {
-     IN struct vnode *dvp;
-     IN struct componentname *cnp;
-
-     */
-
-/* ARGSUSED */
-
-static int
-hfs_abortop(ap)
-       struct vop_abortop_args /* {
-               struct vnode *a_dvp;
-               struct componentname *a_cnp;
-       } */ *ap;
-{
-       if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) {
-               FREE_ZONE(ap->a_cnp->cn_pnbuf, ap->a_cnp->cn_pnlen, M_NAMEI);
-               ap->a_cnp->cn_flags &= ~HASBUF;
+#if 1
+       /*
+        * Keep track blocks read
+        */
+       if ((VTOHFS(vp)->hfc_stage == HFC_RECORDING) && (retval == 0)) {
+               
+               /*
+                * If this file hasn't been seen since the start of
+                * the current sampling period then start over.
+                */
+               if (cp->c_atime < VTOHFS(vp)->hfc_timebase)
+                       VTOF(vp)->ff_bytesread = fp->ff_size;
+               else
+                       VTOF(vp)->ff_bytesread += fp->ff_size;
+               
+       //      if (VTOF(vp)->ff_bytesread > fp->ff_size)
+       //              cp->c_flag |= C_ACCESS;
        }
-
-       return (0);
+#endif
+       return (retval);
 }
 
-
 /*
  * Lock an cnode. If its already locked, set the WANT bit and sleep.
 #% lock                vp      U L U
@@ -2816,9 +3030,6 @@ hfs_lock(ap)
        struct vnode *vp = ap->a_vp;
        struct cnode *cp = VTOC(vp);
 
-       if (cp == NULL)
-               panic("hfs_lock: cnode in vnode is null\n");
-
        return (lockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, ap->a_p));
 }
 
@@ -2842,10 +3053,12 @@ hfs_unlock(ap)
 {
        struct vnode *vp = ap->a_vp;
        struct cnode *cp = VTOC(vp);
-
-       if (cp == NULL)
-               panic("hfs_unlock: cnode in vnode is null\n");
-
+#if 0
+       if (!lockstatus(&cp->c_lock)) {
+               printf("hfs_unlock: vnode %s wasn't locked!\n",
+                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
+       }
+#endif
        return (lockmgr(&cp->c_lock, ap->a_flags | LK_RELEASE,
                &vp->v_interlock, ap->a_p));
 }
@@ -2929,6 +3142,9 @@ hfs_pathconf(ap)
        case _PC_PATH_MAX:
                *ap->a_retval = PATH_MAX; /* 1024 */
                break;
+       case _PC_PIPE_BUF:
+               *ap->a_retval = PIPE_BUF;
+               break;
        case _PC_CHOWN_RESTRICTED:
                *ap->a_retval = 1;
                break;
@@ -2939,7 +3155,10 @@ hfs_pathconf(ap)
                *ap->a_retval = kHFSPlusMaxFileNameChars;
                break;
        case _PC_CASE_SENSITIVE:
-               *ap->a_retval = 0;
+               if (VTOHFS(ap->a_vp)->hfs_flags & HFS_CASE_SENSITIVE)
+                       *ap->a_retval = 1;
+               else
+                       *ap->a_retval = 0;
                break;
        case _PC_CASE_PRESERVING:
                *ap->a_retval = 1;
@@ -3015,12 +3234,16 @@ hfs_advlock(ap)
                return (EINVAL);
        }
 
-       if (start < 0)
-               return (EINVAL);
        if (fl->l_len == 0)
                end = -1;
-       else
+       else if (fl->l_len > 0)
                end = start + fl->l_len - 1;
+       else { /* l_len is negative */
+               end = start - 1;
+               start += fl->l_len;
+       }
+       if (start < 0)
+               return (EINVAL);
 
        /*
         * Create the hfslockf structure
@@ -3098,14 +3321,14 @@ hfs_update(ap)
        hfsmp = VTOHFS(vp);
 
        /* XXX do we really want to clear the sytem cnode flags here???? */
-       if ((vp->v_flag & VSYSTEM) ||
-           (VTOVFS(vp)->mnt_flag & MNT_RDONLY) ||
+       if (((vp->v_flag & VSYSTEM) && (cp->c_cnid < kHFSFirstUserCatalogNodeID))||
+           (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) ||
            (cp->c_mode == 0)) {
                cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE);
                return (0);
        }
 
-       updateflag = cp->c_flag & (C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE);
+       updateflag = cp->c_flag & (C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE | C_FORCEUPDATE);
 
        /* Nothing to update. */
        if (updateflag == 0) {
@@ -3117,23 +3340,15 @@ hfs_update(ap)
        }
        if (updateflag & C_ACCESS) {
                /*
-                * If only the access time is changing then defer
-                * updating it on-disk util later (in hfs_inactive).
-                * If it was recently updated then skip the update.
+                * When the access time is the only thing changing
+                * then make sure its sufficiently newer before
+                * committing it to disk.
                 */
-               if (updateflag == C_ACCESS) {
-                       cp->c_flag &= ~C_ACCESS;
-       
-                       /* Its going to disk or its sufficiently newer... */
-                       if ((cp->c_flag & C_ATIMEMOD) ||
-                           (ap->a_access->tv_sec > (cp->c_atime + ATIME_ACCURACY))) {
-                               cp->c_atime = ap->a_access->tv_sec;
-                               cp->c_flag |= C_ATIMEMOD;
-                       }
+               if ((updateflag == C_ACCESS) &&
+                   (ap->a_access->tv_sec < (cp->c_atime + ATIME_ONDISK_ACCURACY))) {
                        return (0);
-               } else {
-                       cp->c_atime = ap->a_access->tv_sec;
                }
+               cp->c_atime = ap->a_access->tv_sec;
        }
        if (updateflag & C_UPDATE) {
                cp->c_mtime = ap->a_modify->tv_sec;
@@ -3163,15 +3378,21 @@ hfs_update(ap)
         * gets written to disk.
         *
         * Deleted files can defer meta data updates until inactive.
+        *
+        * If we're ever called with the C_FORCEUPDATE flag though
+        * we have to do the update.
         */
-       if (ISSET(cp->c_flag, C_DELETED) ||
+       if (ISSET(cp->c_flag, C_FORCEUPDATE) == 0 &&
+           (ISSET(cp->c_flag, C_DELETED) || 
            (dataforkp && cp->c_datafork->ff_unallocblocks) ||
-           (rsrcforkp && cp->c_rsrcfork->ff_unallocblocks)) {
+           (rsrcforkp && cp->c_rsrcfork->ff_unallocblocks))) {
                if (updateflag & (C_CHANGE | C_UPDATE))
                        hfs_volupdate(hfsmp, VOL_UPDATE, 0);    
                cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE);
                cp->c_flag |= C_MODIFIED;
 
+               HFS_KNOTE(vp, NOTE_ATTRIB);
+
                return (0);
        }
 
@@ -3195,6 +3416,19 @@ hfs_update(ap)
                bcopy(dataforkp, &datafork, sizeof(datafork));
                datafork.cf_size = CIRCLEQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start;
                dataforkp = &datafork;
+       } else if (dataforkp && (cp->c_datafork->ff_unallocblocks != 0)) {
+               // always make sure the block count and the size 
+               // of the file match the number of blocks actually
+               // allocated to the file on disk
+               bcopy(dataforkp, &datafork, sizeof(datafork));
+               // make sure that we don't assign a negative block count
+               if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) {
+                   panic("hfs: ff_blocks %d is less than unalloc blocks %d\n",
+                         cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks);
+               }
+               datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks);
+               datafork.cf_size   = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
+               dataforkp = &datafork;
        }
 
        /*
@@ -3217,18 +3451,20 @@ hfs_update(ap)
         /* Unlock the Catalog b-tree file. */
        (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
 
-       if (updateflag & (C_CHANGE | C_UPDATE))
+       if (updateflag & (C_CHANGE | C_UPDATE | C_FORCEUPDATE))
                hfs_volupdate(hfsmp, VOL_UPDATE, 0);    
 
+       /* After the updates are finished, clear the flags */
+       cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE | C_FORCEUPDATE);
+
        // XXXdbg
        if (hfsmp->jnl) {
            journal_end_transaction(hfsmp->jnl);
        }
        hfs_global_shared_lock_release(hfsmp);
 
-       /* After the updates are finished, clear the flags */
-       cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE | C_ATIMEMOD);
-
+       HFS_KNOTE(vp, NOTE_ATTRIB);
+       
        return (error);
 }
 
@@ -3253,6 +3489,7 @@ hfs_makenode(mode, dvp, vpp, cnp)
        struct proc *p;
        struct cat_desc in_desc, out_desc;
        struct cat_attr attr;
+       cat_cookie_t cookie = {0};
        int error, started_tr = 0, grabbed_lock = 0;
        enum vtype vnodetype;
 
@@ -3339,6 +3576,17 @@ hfs_makenode(mode, dvp, vpp, cnp)
                started_tr = 1;
        }
 
+       /*
+        * Reserve some space in the Catalog file.
+        *
+        * (we also add CAT_DELETE since our getnewvnode
+        *  request can cause an hfs_inactive call to
+        *  delete an unlinked file)
+        */
+       if ((error = cat_preflight(hfsmp, CAT_CREATE | CAT_DELETE, &cookie, p))) {
+               goto exit;
+       }
+
        /* Lock catalog b-tree */
        error = hfs_metafilelocking(VTOHFS(dvp), kHFSCatalogFileID, LK_EXCLUSIVE, p);
        if (error)
@@ -3358,6 +3606,11 @@ hfs_makenode(mode, dvp, vpp, cnp)
        dcp->c_flag |= C_CHANGE | C_UPDATE;
        tv = time;
        (void) VOP_UPDATE(dvp, &tv, &tv, 0);
+       if (vnodetype == VDIR) {
+               HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
+       } else {
+               HFS_KNOTE(dvp, NOTE_WRITE);
+       };
 
        hfs_volupdate(hfsmp, vnodetype == VDIR ? VOL_MKDIR : VOL_MKFILE,
                (dcp->c_cnid == kHFSRootFolderID));
@@ -3388,6 +3641,8 @@ hfs_makenode(mode, dvp, vpp, cnp)
        if (error)
                goto exit;
 
+       // XXXdbg
+       cache_enter(dvp, tvp, cnp);
 
 #if QUOTA
        cp = VTOC(tvp);
@@ -3398,16 +3653,15 @@ hfs_makenode(mode, dvp, vpp, cnp)
        */
        if ((error = hfs_getinoquota(cp)) ||
            (error = hfs_chkiq(cp, 1, cnp->cn_cred, FORCE))) {
-               if ((cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) {
-                       FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
-                       cnp->cn_flags &= ~HASBUF;
-               }
                if (tvp->v_type == VDIR)
                        VOP_RMDIR(dvp,tvp, cnp);
                else
                        VOP_REMOVE(dvp,tvp, cnp);
 
-               return (error);
+               // because VOP_RMDIR and VOP_REMOVE already
+               // have done the vput()
+               dvp = NULL;
+               goto exit;
        }
 #endif /* QUOTA */
 
@@ -3432,17 +3686,22 @@ hfs_makenode(mode, dvp, vpp, cnp)
 exit:
        cat_releasedesc(&out_desc);
 
-       if ((cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
-               FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
+       cat_postflight(hfsmp, &cookie, p);
 
+       if ((cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) {
+               char *tmp = cnp->cn_pnbuf;
+               cnp->cn_pnbuf = NULL;
+               cnp->cn_flags &= ~HASBUF;
+               FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI);
+       }
        /*
         * Check if a file is located in the "Cleanup At Startup"
         * directory.  If it is then tag it as NODUMP so that we
         * can be lazy about zero filling data holes.
         */
-       if ((error == 0) && (vnodetype == VREG) &&
+       if ((error == 0) && dvp && (vnodetype == VREG) &&
            (dcp->c_desc.cd_nameptr != NULL) &&
-           (strcmp(dcp->c_desc.cd_nameptr, "Cleanup At Startup") == 0)) {
+           (strcmp(dcp->c_desc.cd_nameptr, CARBON_TEMP_DIR_NAME) == 0)) {
                struct vnode *ddvp;
                cnid_t parid;
 
@@ -3463,11 +3722,9 @@ exit:
                        vput(ddvp);
                }
        }
-
        if (dvp)
                vput(dvp);
 
-       // XXXdbg
        if (started_tr) {
            journal_end_transaction(hfsmp->jnl);
                started_tr = 0;
@@ -3527,6 +3784,158 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, stru
 }
 
 
+static void
+filt_hfsdetach(struct knote *kn)
+{
+       struct vnode *vp;
+       int result;
+       struct proc *p = current_proc();
+       
+       vp = (struct vnode *)kn->kn_hook;
+       if (1) {        /* ! KNDETACH_VNLOCKED */
+               result = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+               if (result) return;
+       };
+
+       result = KNOTE_DETACH(&VTOC(vp)->c_knotes, kn);
+       
+       if (1) {        /* ! KNDETACH_VNLOCKED */
+               VOP_UNLOCK(vp, 0, p);
+       };
+}
+
+/*ARGSUSED*/
+static int
+filt_hfsread(struct knote *kn, long hint)
+{
+       struct vnode *vp = (struct vnode *)kn->kn_fp->f_data;
+
+       if (hint == NOTE_REVOKE) {
+               /*
+                * filesystem is gone, so set the EOF flag and schedule 
+                * the knote for deletion.
+                */
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+               return (1);
+       }
+
+       kn->kn_data = VTOF(vp)->ff_size - kn->kn_fp->f_offset;
+       return (kn->kn_data != 0);
+}
+
+/*ARGSUSED*/
+static int
+filt_hfswrite(struct knote *kn, long hint)
+{
+       if (hint == NOTE_REVOKE) {
+               /*
+                * filesystem is gone, so set the EOF flag and schedule 
+                * the knote for deletion.
+                */
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+       }
+       
+       kn->kn_data = 0;
+       return (1);
+}
+
+static int
+filt_hfsvnode(struct knote *kn, long hint)
+{
+
+       if (kn->kn_sfflags & hint)
+               kn->kn_fflags |= hint;
+       if (hint == NOTE_REVOKE) {
+               kn->kn_flags |= EV_EOF;
+               return (1);
+       }
+       return (kn->kn_fflags != 0);
+}
+
+static struct filterops hfsread_filtops = 
+       { 1, NULL, filt_hfsdetach, filt_hfsread };
+static struct filterops hfswrite_filtops = 
+       { 1, NULL, filt_hfsdetach, filt_hfswrite };
+static struct filterops hfsvnode_filtops = 
+       { 1, NULL, filt_hfsdetach, filt_hfsvnode };
+
+/*
+ #
+ #% kqfilt_add vp      L L L
+ #
+ vop_kqfilt_add
+       IN struct vnode *vp;
+       IN struct knote *kn;
+       IN struct proc *p;
+ */
+static int
+hfs_kqfilt_add(ap)
+       struct vop_kqfilt_add_args /* {
+               struct vnode *a_vp;
+               struct knote *a_kn;
+               struct proc *p;
+       } */ *ap;
+{
+       struct vnode *vp = ap->a_vp;
+       struct knote *kn = ap->a_kn;
+
+       switch (kn->kn_filter) {
+       case EVFILT_READ:
+               if (vp->v_type == VREG) {
+                       kn->kn_fop = &hfsread_filtops;
+               } else {
+                       return EINVAL;
+               };
+               break;
+       case EVFILT_WRITE:
+               if (vp->v_type == VREG) {
+                       kn->kn_fop = &hfswrite_filtops;
+               } else {
+                       return EINVAL;
+               };
+               break;
+       case EVFILT_VNODE:
+               kn->kn_fop = &hfsvnode_filtops;
+               break;
+       default:
+               return (1);
+       }
+
+       kn->kn_hook = (caddr_t)vp;
+
+       /* simple_lock(&vp->v_pollinfo.vpi_lock); */
+       KNOTE_ATTACH(&VTOC(vp)->c_knotes, kn);
+       /* simple_unlock(&vp->v_pollinfo.vpi_lock); */
+
+       return (0);
+}
+
+/*
+ #
+ #% kqfilt_remove      vp      L L L
+ #
+ vop_kqfilt_remove
+       IN struct vnode *vp;
+       IN uintptr_t ident;
+       IN struct proc *p;
+ */
+static int
+hfs_kqfilt_remove(ap)
+       struct vop_kqfilt_remove_args /* {
+               struct vnode *a_vp;
+               uintptr_t ident;
+               struct proc *p;
+       } */ *ap;
+{
+       struct vnode *vp = ap->a_vp;
+       uintptr_t ident = ap->a_ident;
+       int result;
+
+       result = ENOTSUP; /* XXX */
+       
+       return (result);
+}
+
 /*
  * Wrapper for special device reads
  */
@@ -3656,6 +4065,43 @@ hfsfifo_close(ap)
        simple_unlock(&vp->v_interlock);
        return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
 }
+
+/*
+ * kqfilt_add wrapper for fifos.
+ *
+ * Fall through to hfs kqfilt_add routines if needed 
+ */
+int
+hfsfifo_kqfilt_add(ap)
+       struct vop_kqfilt_add_args *ap;
+{
+       extern int (**fifo_vnodeop_p)(void *);
+       int error;
+
+       error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilt_add), ap);
+       if (error)
+               error = hfs_kqfilt_add(ap);
+       return (error);
+}
+
+/*
+ * kqfilt_remove wrapper for fifos.
+ *
+ * Fall through to hfs kqfilt_remove routines if needed 
+ */
+int
+hfsfifo_kqfilt_remove(ap)
+       struct vop_kqfilt_remove_args *ap;
+{
+       extern int (**fifo_vnodeop_p)(void *);
+       int error;
+
+       error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilt_remove), ap);
+       if (error)
+               error = hfs_kqfilt_remove(ap);
+       return (error);
+}
+
 #endif /* FIFO */
 
 
@@ -3706,6 +4152,7 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = {
     { &vop_write_desc, (VOPFUNC)hfs_write },                   /* write */
     { &vop_ioctl_desc, (VOPFUNC)hfs_ioctl },                   /* ioctl */
     { &vop_select_desc, (VOPFUNC)hfs_select },                 /* select */
+    { &vop_revoke_desc, (VOPFUNC)nop_revoke },                 /* revoke */
     { &vop_exchange_desc, (VOPFUNC)hfs_exchange },             /* exchange */
     { &vop_mmap_desc, (VOPFUNC)err_mmap },                     /* mmap */
     { &vop_fsync_desc, (VOPFUNC)hfs_fsync },                   /* fsync */
@@ -3722,7 +4169,7 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = {
     { &vop_readdir_desc, (VOPFUNC)hfs_readdir },               /* readdir */
     { &vop_readdirattr_desc, (VOPFUNC)hfs_readdirattr },  /* readdirattr */
     { &vop_readlink_desc, (VOPFUNC)hfs_readlink },             /* readlink */
-    { &vop_abortop_desc, (VOPFUNC)hfs_abortop },               /* abortop */
+    { &vop_abortop_desc, (VOPFUNC)nop_abortop },               /* abortop */
     { &vop_inactive_desc, (VOPFUNC)hfs_inactive },             /* inactive */
     { &vop_reclaim_desc, (VOPFUNC)hfs_reclaim },               /* reclaim */
     { &vop_lock_desc, (VOPFUNC)hfs_lock },                     /* lock */
@@ -3745,6 +4192,8 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = {
     { &vop_blktooff_desc, (VOPFUNC)hfs_blktooff },             /* blktooff */
     { &vop_offtoblk_desc, (VOPFUNC)hfs_offtoblk },             /* offtoblk */
     { &vop_cmap_desc, (VOPFUNC)hfs_cmap },                     /* cmap */
+       { &vop_kqfilt_add_desc, (VOPFUNC)hfs_kqfilt_add },              /* kqfilt_add */
+       { &vop_kqfilt_remove_desc, (VOPFUNC)hfs_kqfilt_remove },                /* kqfilt_remove */
     { NULL, (VOPFUNC)NULL }
 };
 
@@ -3776,6 +4225,7 @@ struct vnodeopv_entry_desc hfs_specop_entries[] = {
        { &vop_rename_desc, (VOPFUNC)spec_rename },             /* rename */
        { &vop_mkdir_desc, (VOPFUNC)spec_mkdir },               /* mkdir */
        { &vop_rmdir_desc, (VOPFUNC)spec_rmdir },               /* rmdir */
+       { &vop_getattrlist_desc, (VOPFUNC)hfs_getattrlist },
        { &vop_symlink_desc, (VOPFUNC)spec_symlink },           /* symlink */
        { &vop_readdir_desc, (VOPFUNC)spec_readdir },           /* readdir */
        { &vop_readlink_desc, (VOPFUNC)spec_readlink },         /* readlink */
@@ -3834,6 +4284,7 @@ struct vnodeopv_entry_desc hfs_fifoop_entries[] = {
        { &vop_rename_desc, (VOPFUNC)fifo_rename },             /* rename */
        { &vop_mkdir_desc, (VOPFUNC)fifo_mkdir },               /* mkdir */
        { &vop_rmdir_desc, (VOPFUNC)fifo_rmdir },               /* rmdir */
+       { &vop_getattrlist_desc, (VOPFUNC)hfs_getattrlist },
        { &vop_symlink_desc, (VOPFUNC)fifo_symlink },           /* symlink */
        { &vop_readdir_desc, (VOPFUNC)fifo_readdir },           /* readdir */
        { &vop_readlink_desc, (VOPFUNC)fifo_readlink },         /* readlink */
@@ -3861,6 +4312,8 @@ struct vnodeopv_entry_desc hfs_fifoop_entries[] = {
        { &vop_blktooff_desc, (VOPFUNC)hfs_blktooff },          /* blktooff */
        { &vop_offtoblk_desc, (VOPFUNC)hfs_offtoblk },          /* offtoblk */
        { &vop_cmap_desc, (VOPFUNC)hfs_cmap },                  /* cmap */
+       { &vop_kqfilt_add_desc, (VOPFUNC)hfsfifo_kqfilt_add },  /* kqfilt_add */
+       { &vop_kqfilt_remove_desc, (VOPFUNC)hfsfifo_kqfilt_remove },  /* kqfilt_remove */
        { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL }
 };
 struct vnodeopv_desc hfs_fifoop_opv_desc =