xnu-1228.7.58.tar.gz

[apple/xnu.git] / bsd / hfs / hfs_vnops.c
diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c

index 19006da0ea82e25b227cd4441b311913df24e88a..eef6b5e9660486b10484d49bd60c81c25a9bcf21 100644 (file)
--- a/bsd/hfs/hfs_vnops.c
+++ b/bsd/hfs/hfs_vnops.c
@@ -1,37 +1,50 @@
  /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
   *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
   * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
   * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   */
  
  #include <sys/systm.h>
  #include <sys/kernel.h>
-#include <sys/file.h>
+#include <sys/param.h>
+#include <sys/file_internal.h>
  #include <sys/dirent.h>
  #include <sys/stat.h>
  #include <sys/buf.h>
  #include <sys/mount.h>
-#include <sys/vnode.h>
+#include <sys/vnode_if.h>
+#include <sys/vnode_internal.h>
  #include <sys/malloc.h>
-#include <sys/namei.h>
  #include <sys/ubc.h>
+#include <sys/ubc_internal.h>
+#include <sys/paths.h>
  #include <sys/quota.h>
+#include <sys/time.h>
+#include <sys/disk.h>
+#include <sys/kauth.h>
+#include <sys/uio_internal.h>
  
  #include <miscfs/specfs/specdev.h>
  #include <miscfs/fifofs/fifo.h>
@@ -39,11 +52,11 @@
  #include <machine/spl.h>
  
  #include <sys/kdebug.h>
+#include <sys/sysctl.h>
  
  #include "hfs.h"
  #include "hfs_catalog.h"
  #include "hfs_cnode.h"
-#include "hfs_lockf.h"
  #include "hfs_dbg.h"
  #include "hfs_mount.h"
  #include "hfs_quota.h"
@@ -52,33 +65,68 @@
  #include "hfscommon/headers/BTreesInternal.h"
  #include "hfscommon/headers/FileMgrInternal.h"
  
-#define MAKE_DELETED_NAME(NAME,FID) \
-           (void) sprintf((NAME), "%s%d", HFS_DELETE_PREFIX, (FID))
  
+#define KNDETACH_VNLOCKED 0x00000001
+
+#define CARBON_TEMP_DIR_NAME   "Cleanup At Startup"
  
-extern uid_t console_user;
  
  /* Global vfs data structures for hfs */
  
+/* Always F_FULLFSYNC? 1=yes,0=no (default due to "various" reasons is 'no') */
+int always_do_fullfsync = 0;
+SYSCTL_INT (_kern, OID_AUTO, always_do_fullfsync, CTLFLAG_RW, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called");
+
+static int hfs_makenode(struct vnode *dvp, struct vnode **vpp,
+                        struct componentname *cnp, struct vnode_attr *vap,
+                        vfs_context_t ctx);
+
+static int hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p);
+static int hfs_metasync_all(struct hfsmount *hfsmp);
+
+static int hfs_removedir(struct vnode *, struct vnode *, struct componentname *,
+                         int);
+
+static int hfs_removefile(struct vnode *, struct vnode *, struct componentname *,
+                          int, int, int);
  
-extern int groupmember(gid_t gid, struct ucred *cred);
+#if FIFO
+static int hfsfifo_read(struct vnop_read_args *);
+static int hfsfifo_write(struct vnop_write_args *);
+static int hfsfifo_close(struct vnop_close_args *);
+static int hfsfifo_kqfilt_add(struct vnop_kqfilt_add_args *);
+static int hfsfifo_kqfilt_remove(struct vnop_kqfilt_remove_args *);
+
+extern int (**fifo_vnodeop_p)(void *);
+#endif /* FIFO */
  
-static int hfs_makenode(int mode, struct vnode *dvp, struct vnode **vpp,
-                        struct componentname *cnp);
-                        
-static int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp,
-                       struct vnode **rvpp, struct proc *p);
+static int hfs_vnop_close(struct vnop_close_args*);
+static int hfs_vnop_create(struct vnop_create_args*);
+static int hfs_vnop_exchange(struct vnop_exchange_args*);
+static int hfs_vnop_fsync(struct vnop_fsync_args*);
+static int hfs_vnop_mkdir(struct vnop_mkdir_args*);
+static int hfs_vnop_mknod(struct vnop_mknod_args*);
+static int hfs_vnop_getattr(struct vnop_getattr_args*);
+static int hfs_vnop_open(struct vnop_open_args*);
+static int hfs_vnop_readdir(struct vnop_readdir_args*);
+static int hfs_vnop_remove(struct vnop_remove_args*);
+static int hfs_vnop_rename(struct vnop_rename_args*);
+static int hfs_vnop_rmdir(struct vnop_rmdir_args*);
+static int hfs_vnop_symlink(struct vnop_symlink_args*);
+static int hfs_vnop_setattr(struct vnop_setattr_args*);
+static int hfs_vnop_readlink(struct vnop_readlink_args *);
+static int hfs_vnop_pathconf(struct vnop_pathconf_args *);
+static int hfs_vnop_kqfiltremove(struct vnop_kqfilt_remove_args *);
+static int hfs_vnop_whiteout(struct vnop_whiteout_args *);
+static int hfsspec_read(struct vnop_read_args *);
+static int hfsspec_write(struct vnop_write_args *);
+static int hfsspec_close(struct vnop_close_args *);
+
+/* Options for hfs_removedir and hfs_removefile */
+#define HFSRM_SKIP_RESERVE  0x01
  
-static int hfs_metasync(struct hfsmount *hfsmp, daddr_t node, struct proc *p);
  
-int hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean considerFlags);
  
-int hfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred,
-                       struct proc *p);
-int hfs_chmod(struct vnode *vp, int mode, struct ucred *cred,
-                       struct proc *p);
-int hfs_chown(struct vnode *vp, uid_t uid, gid_t gid,
-                       struct ucred *cred, struct proc *p);
  
  /*****************************************************************************
  *
@@ -87,392 +135,451 @@ int hfs_chown(struct vnode *vp, uid_t uid, gid_t gid,
  *****************************************************************************/
  
  /*
- * Create a regular file
-#% create      dvp     L U U
-#% create      vpp     - L -
-#
- vop_create {
-     IN WILLRELE struct vnode *dvp;
-     OUT struct vnode **vpp;
-     IN struct componentname *cnp;
-     IN struct vattr *vap;
-       
-     We are responsible for freeing the namei buffer,
-        it is done in hfs_makenode()
-*/
-
+ * Create a regular file.
+ */
  static int
-hfs_create(ap)
-       struct vop_create_args /* {
-               struct vnode *a_dvp;
-               struct vnode **a_vpp;
-               struct componentname *a_cnp;
-               struct vattr *a_vap;
-       } */ *ap;
+hfs_vnop_create(struct vnop_create_args *ap)
  {
-       struct vattr *vap = ap->a_vap;
+       int error;
  
-       return (hfs_makenode(MAKEIMODE(vap->va_type, vap->va_mode),
-                               ap->a_dvp, ap->a_vpp, ap->a_cnp));
-}
+again:
+       error = hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context);
  
+       /*
+        * We speculatively skipped the original lookup of the leaf
+        * for CREATE.  Since it exists, go get it as long as they
+        * didn't want an exclusive create.
+        */
+       if ((error == EEXIST) && !(ap->a_vap->va_vaflags & VA_EXCLUSIVE)) {
+               struct vnop_lookup_args args;
+
+               args.a_desc = &vnop_lookup_desc;
+               args.a_dvp = ap->a_dvp;
+               args.a_vpp = ap->a_vpp;
+               args.a_cnp = ap->a_cnp;
+               args.a_context = ap->a_context;
+               args.a_cnp->cn_nameiop = LOOKUP;
+               error = hfs_vnop_lookup(&args);
+               /*
+                * We can also race with remove for this file.
+                */
+               if (error == ENOENT) {
+                       goto again;
+               }
  
-/*
- * Mknod vnode call
-
-#% mknod       dvp     L U U
-#% mknod       vpp     - X -
-#
- vop_mknod {
-     IN WILLRELE struct vnode *dvp;
-     OUT WILLRELE struct vnode **vpp;
-     IN struct componentname *cnp;
-     IN struct vattr *vap;
-     */
-/* ARGSUSED */
+               /* Make sure it was file. */
+               if ((error == 0) && !vnode_isreg(*args.a_vpp)) {
+                       vnode_put(*args.a_vpp);
+                       error = EEXIST;
+               }
+               args.a_cnp->cn_nameiop = CREATE;
+       }
+       return (error);
+}
  
+/*
+ * Make device special file.
+ */
  static int
-hfs_mknod(ap)
-       struct vop_mknod_args /* {
-               struct vnode *a_dvp;
-               struct vnode **a_vpp;
-               struct componentname *a_cnp;
-               struct vattr *a_vap;
-       } */ *ap;
+hfs_vnop_mknod(struct vnop_mknod_args *ap)
  {
-       struct vattr *vap = ap->a_vap;
+       struct vnode_attr *vap = ap->a_vap;
+       struct vnode *dvp = ap->a_dvp;
         struct vnode **vpp = ap->a_vpp;
         struct cnode *cp;
         int error;
  
-       if (VTOVCB(ap->a_dvp)->vcbSigWord != kHFSPlusSigWord) {
-               VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
-               vput(ap->a_dvp);
-               return (EOPNOTSUPP);
+       if (VTOVCB(dvp)->vcbSigWord != kHFSPlusSigWord) {
+               return (ENOTSUP);
         }
  
         /* Create the vnode */
-       error = hfs_makenode(MAKEIMODE(vap->va_type, vap->va_mode),
-                            ap->a_dvp, vpp, ap->a_cnp);
+       error = hfs_makenode(dvp, vpp, ap->a_cnp, vap, ap->a_context);
         if (error)
                 return (error);
+
         cp = VTOC(*vpp);
-       cp->c_flag |= C_ACCESS | C_CHANGE | C_UPDATE;
+       cp->c_touch_acctime = TRUE;
+       cp->c_touch_chgtime = TRUE;
+       cp->c_touch_modtime = TRUE;
+
         if ((vap->va_rdev != VNOVAL) &&
             (vap->va_type == VBLK || vap->va_type == VCHR))
                 cp->c_rdev = vap->va_rdev;
-       /*
-        * Remove cnode so that it will be reloaded by lookup and
-        * checked to see if it is an alias of an existing vnode.
-        * Note: unlike UFS, we don't bash v_type here.
-        */
-       vput(*vpp);
-       vgone(*vpp);
-       *vpp = 0;
+
         return (0);
  }
  
-
  /*
- * Open called.
-#% open                vp      L L L
-#
- vop_open {
-     IN struct vnode *vp;
-     IN int mode;
-     IN struct ucred *cred;
-     IN struct proc *p;
-     */
-
-
+ * Open a file/directory.
+ */
  static int
-hfs_open(ap)
-       struct vop_open_args /* {
-               struct vnode *a_vp;
-               int  a_mode;
-               struct ucred *a_cred;
-               struct proc *a_p;
-       } */ *ap;
+hfs_vnop_open(struct vnop_open_args *ap)
  {
         struct vnode *vp = ap->a_vp;
+       struct filefork *fp;
+       struct timeval tv;
+       int error;
  
         /*
          * Files marked append-only must be opened for appending.
          */
-       if ((vp->v_type != VDIR) && (VTOC(vp)->c_flags & APPEND) &&
+       if ((VTOC(vp)->c_flags & APPEND) && !vnode_isdir(vp) &&
             (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
                 return (EPERM);
  
+       if (vnode_isreg(vp) && !UBCINFOEXISTS(vp))
+               return (EBUSY);  /* file is in use by the kernel */
+
+       /* Don't allow journal file to be opened externally. */
+       if (VTOC(vp)->c_fileid == VTOHFS(vp)->hfs_jnlfileid)
+               return (EPERM);
+       /*
+        * On the first (non-busy) open of a fragmented
+        * file attempt to de-frag it (if its less than 20MB).
+        */
+       if ((VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) ||
+           (VTOHFS(vp)->jnl == NULL) ||
+#if NAMEDSTREAMS
+           !vnode_isreg(vp) || vnode_isinuse(vp, 0) || vnode_isnamedstream(vp)) {
+#else
+           !vnode_isreg(vp) || vnode_isinuse(vp, 0)) {
+#endif
+               return (0);
+       }
+
+       if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
+               return (error);
+       fp = VTOF(vp);
+       if (fp->ff_blocks &&
+           fp->ff_extents[7].blockCount != 0 &&
+           fp->ff_size <= (20 * 1024 * 1024)) {
+               struct timeval now;
+               struct cnode *cp = VTOC(vp);
+               /* 
+                * Wait until system bootup is done (3 min).
+                * And don't relocate a file that's been modified
+                * within the past minute -- this can lead to
+                * system thrashing.
+                */
+               microuptime(&tv);
+               microtime(&now);
+               if (tv.tv_sec > (60 * 3) &&
+                  ((now.tv_sec - cp->c_mtime) > 60)) {
+                       (void) hfs_relocate(vp, VTOVCB(vp)->nextAllocation + 4096,
+                                           vfs_context_ucred(ap->a_context),
+                                           vfs_context_proc(ap->a_context));
+               }
+       }
+       hfs_unlock(VTOC(vp));
+
         return (0);
  }
  
-/*
- * Close called.
- *
- * Update the times on the cnode.
-#% close       vp      U U U
-#
- vop_close {
-     IN struct vnode *vp;
-     IN int fflag;
-     IN struct ucred *cred;
-     IN struct proc *p;
-     */
-
  
+/*
+ * Close a file/directory.
+ */
  static int
-hfs_close(ap)
-       struct vop_close_args /* {
+hfs_vnop_close(ap)
+       struct vnop_close_args /* {
                 struct vnode *a_vp;
                 int a_fflag;
-               struct ucred *a_cred;
-               struct proc *a_p;
+               vfs_context_t a_context;
         } */ *ap;
  {
         register struct vnode *vp = ap->a_vp;
-       register struct cnode *cp = VTOC(vp);
-       register struct filefork *fp = VTOF(vp);
-       struct proc *p = ap->a_p;
-       struct timeval tv;
-       off_t leof;
-       u_long blks, blocksize;
-       int devBlockSize;
-       int error;
+       register struct cnode *cp;
+       struct proc *p = vfs_context_proc(ap->a_context);
+       struct hfsmount *hfsmp;
+       int busy;
+       int knownrefs = 0;
+       int tooktrunclock = 0;
  
-       simple_lock(&vp->v_interlock);
-       if ((!UBCISVALID(vp) && vp->v_usecount > 1)
-           || (UBCISVALID(vp) && ubc_isinuse(vp, 1))) {
-               tv = time;
-               CTIMES(cp, &tv, &tv);
-       }
-       simple_unlock(&vp->v_interlock);
+       if ( hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0)
+               return (0);
+       cp = VTOC(vp);
+       hfsmp = VTOHFS(vp);
  
         /*
-        * VOP_CLOSE can be called with vp locked (from vclean).
-        * We check for this case using VOP_ISLOCKED and bail.
-        * 
-        * XXX During a force unmount we won't do the cleanup below!
+        * If the rsrc fork is a named stream, it holds a usecount on 
+        * the data fork, which prevents the data fork from getting recycled, which
+        * then prevents the de-allocation of its extra blocks.  
+        * Do checks for truncation on close. Purge extra extents if they
+        * exist.  Make sure the vp is not a directory, that it has a resource
+        * fork, and that rsrc fork is a named stream.
          */
-       if (vp->v_type == VDIR || VOP_ISLOCKED(vp))
-               return (0);
-
-       leof = fp->ff_size;
         
-       if ((fp->ff_blocks > 0) && !ISSET(cp->c_flag, C_DELETED)) {
-               enum vtype our_type = vp->v_type;
-               u_long our_id = vp->v_id;
-               int was_nocache = ISSET(vp->v_flag, VNOCACHE_DATA);
-
-               error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-               if (error)
-                       return (0);
-               /*
-                * Since we can context switch in vn_lock our vnode
-                * could get recycled (eg umount -f).  Double check
-                * that its still ours.
-                */
-               if (vp->v_type != our_type || vp->v_id != our_id
-                   || cp != VTOC(vp) || !UBCINFOEXISTS(vp)) {
-                       VOP_UNLOCK(vp, 0, p);
-                       return (0);
-               }
+       if ((vp->v_type == VREG) && (cp->c_rsrc_vp)
+                       && (vnode_isnamedstream(cp->c_rsrc_vp))) {
+               uint32_t blks;
  
+               blks = howmany(VTOF(vp)->ff_size, VTOVCB(vp)->blockSize);
                 /*
-                * Last chance to explicitly zero out the areas
-                * that are currently marked invalid:
+                *  If there are any extra blocks and there are only 2 refs on 
+                *  this vp (ourselves + rsrc fork holding ref on us), go ahead
+                *  and try to truncate the extra blocks away.
                  */
-               VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
-               (void) cluster_push(vp);
-               SET(vp->v_flag, VNOCACHE_DATA); /* Don't cache zeros */
-               while (!CIRCLEQ_EMPTY(&fp->ff_invalidranges)) {
-                       struct rl_entry *invalid_range = CIRCLEQ_FIRST(&fp->ff_invalidranges);
-                       off_t start = invalid_range->rl_start;
-                       off_t end = invalid_range->rl_end;
-               
-                       /* The range about to be written must be validated
-                        * first, so that VOP_CMAP() will return the
-                        * appropriate mapping for the cluster code:
-                        */
-                       rl_remove(start, end, &fp->ff_invalidranges);
+               if ((blks < VTOF(vp)->ff_blocks) && (!vnode_isinuse(vp, 2))) {
+                       // release cnode lock ; must acquire truncate lock BEFORE cnode lock
+                       hfs_unlock (cp);
+
+                       hfs_lock_truncate(cp, TRUE);
+                       tooktrunclock = 1;
+                       
+                       if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) {
+                               hfs_unlock_truncate(cp, TRUE);
+                               return (0);                     
+                       }
  
-                       (void) cluster_write(vp, (struct uio *) 0, leof,
-                                       invalid_range->rl_end + 1, invalid_range->rl_start,
-                                       (off_t)0, devBlockSize, IO_HEADZEROFILL | IO_NOZERODIRTY);
+                       //now re-test to make sure it's still valid.
+                       if (cp->c_rsrc_vp) {
+                               knownrefs = 1 + vnode_isnamedstream(cp->c_rsrc_vp);
+                               if (!vnode_isinuse(vp, knownrefs)) {
+                                       blks = howmany(VTOF(vp)->ff_size, VTOVCB(vp)->blockSize);
+                                       if (blks < VTOF(vp)->ff_blocks) {
+                                               (void) hfs_truncate(vp, VTOF(vp)->ff_size, IO_NDELAY, 0, ap->a_context);
+                                       }
+                               }
+                       }
+               }
+       }
  
-                       if (ISSET(vp->v_flag, VHASDIRTY))
-                               (void) cluster_push(vp);
+       // if we froze the fs and we're exiting, then "thaw" the fs 
+       if (hfsmp->hfs_freezing_proc == p && proc_exiting(p)) {
+           hfsmp->hfs_freezing_proc = NULL;
+           hfs_global_exclusive_lock_release(hfsmp);
+           lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
+       }
  
-                       cp->c_flag |= C_MODIFIED;
-               }
-               cp->c_flag &= ~C_ZFWANTSYNC;
-               cp->c_zftimeout = 0;
-               blocksize = VTOVCB(vp)->blockSize;
-               blks = leof / blocksize;
-               if (((off_t)blks * (off_t)blocksize) != leof)
-                       blks++;
-               /*
-                * Shrink the peof to the smallest size neccessary to contain the leof.
-                */
-               if (blks < fp->ff_blocks)
-                       (void) VOP_TRUNCATE(vp, leof, IO_NDELAY, ap->a_cred, p);
-               (void) cluster_push(vp);
+       busy = vnode_isinuse(vp, 1);
  
-               if (!was_nocache)
-                       CLR(vp->v_flag, VNOCACHE_DATA);
-               
-               /*
-                * If the VOP_TRUNCATE didn't happen to flush the vnode's
-                * information out to disk, force it to be updated now that
-                * all invalid ranges have been zero-filled and validated:
-                */
-               if (cp->c_flag & C_MODIFIED) {
-                       tv = time;
-                       VOP_UPDATE(vp, &tv, &tv, 0);
-               }
-               VOP_UNLOCK(vp, 0, p);
+       if (busy) {
+               hfs_touchtimes(VTOHFS(vp), cp); 
+       }
+       if (vnode_isdir(vp)) {
+               hfs_reldirhints(cp, busy);
+       } else if (vnode_issystem(vp) && !busy) {
+               vnode_recycle(vp);
+       }
+       if (tooktrunclock) {
+               hfs_unlock_truncate(cp, TRUE);
         }
+       
+       hfs_unlock(cp);
         return (0);
  }
  
  /*
-#% access      vp      L L L
-#
- vop_access {
-     IN struct vnode *vp;
-     IN int mode;
-     IN struct ucred *cred;
-     IN struct proc *p;
-
-     */
-
+ * Get basic attributes.
+ */
  static int
-hfs_access(ap)
-       struct vop_access_args /* {
-               struct vnode *a_vp;
-               int a_mode;
-               struct ucred *a_cred;
-               struct proc *a_p;
-       } */ *ap;
+hfs_vnop_getattr(struct vnop_getattr_args *ap)
  {
+#define VNODE_ATTR_TIMES  \
+       (VNODE_ATTR_va_access_time|VNODE_ATTR_va_change_time|VNODE_ATTR_va_modify_time)
+#define VNODE_ATTR_AUTH  \
+       (VNODE_ATTR_va_mode | VNODE_ATTR_va_uid | VNODE_ATTR_va_gid | \
+         VNODE_ATTR_va_flags | VNODE_ATTR_va_acl)
+
         struct vnode *vp = ap->a_vp;
-       struct cnode *cp = VTOC(vp);
-       struct ucred *cred = ap->a_cred;
-       register gid_t *gp;
-       mode_t mode = ap->a_mode;
-       mode_t mask = 0;
-       int i;
-       int error;
+       struct vnode_attr *vap = ap->a_vap;
+       struct vnode *rvp = NULLVP;
+       struct hfsmount *hfsmp;
+       struct cnode *cp;
+       uint64_t data_size;
+       enum vtype v_type;
+       int error = 0;
+
+       cp = VTOC(vp);
  
         /*
-        * Disallow write attempts on read-only file systems;
-        * unless the file is a socket, fifo, or a block or
-        * character device resident on the file system.
+        * Shortcut for vnode_authorize path.  Each of the attributes
+        * in this set is updated atomically so we don't need to take
+        * the cnode lock to access them.
          */
-       if (mode & VWRITE) {
-               switch (vp->v_type) {
-               case VDIR:
-               case VLNK:
-               case VREG:
-                       if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
-                               return (EROFS);
-#if QUOTA
-                       if ((error = hfs_getinoquota(cp)))
-                               return (error);
-#endif /* QUOTA */
-                       break;
+       if ((vap->va_active & ~VNODE_ATTR_AUTH) == 0) {
+               /* Make sure file still exists. */
+               if (cp->c_flag & C_NOEXISTS)
+                       return (ENOENT);
+
+               vap->va_uid = cp->c_uid;
+               vap->va_gid = cp->c_gid;
+               vap->va_mode = cp->c_mode;
+               vap->va_flags = cp->c_flags;
+               vap->va_supported |= VNODE_ATTR_AUTH & ~VNODE_ATTR_va_acl;
+
+               if ((cp->c_attr.ca_recflags & kHFSHasSecurityMask) == 0) {
+                       vap->va_acl = (kauth_acl_t) KAUTH_FILESEC_NONE;
+                       VATTR_SET_SUPPORTED(vap, va_acl);
                 }
-       }
-
-       /* If immutable bit set, nobody gets to write it. */
-       if ((mode & VWRITE) && (cp->c_flags & IMMUTABLE))
-               return (EPERM);
-
-       /* Otherwise, user id 0 always gets access. */
-       if (ap->a_cred->cr_uid == 0)
                 return (0);
-
-       mask = 0;
-
-       /* Otherwise, check the owner. */
-       if (hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, ap->a_p, false) == 0) {
-               if (mode & VEXEC)
-                       mask |= S_IXUSR;
-               if (mode & VREAD)
-                       mask |= S_IRUSR;
-               if (mode & VWRITE)
-                       mask |= S_IWUSR;
-               return ((cp->c_mode & mask) == mask ? 0 : EACCES);
         }
+       hfsmp = VTOHFS(vp);
+       v_type = vnode_vtype(vp);
  
-       /* Otherwise, check the groups. */
-       if (! (VTOVFS(vp)->mnt_flag & MNT_UNKNOWNPERMISSIONS)) {
-               for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
-                       if (cp->c_gid == *gp) {
-                               if (mode & VEXEC)
-                                       mask |= S_IXGRP;
-                               if (mode & VREAD)
-                                       mask |= S_IRGRP;
-                               if (mode & VWRITE)
-                                       mask |= S_IWGRP;
-                               return ((cp->c_mode & mask) == mask ? 0 : EACCES);
-                       }
+       /*
+        * If time attributes are requested and we have cnode times
+        * that require updating, then acquire an exclusive lock on
+        * the cnode before updating the times.  Otherwise we can
+        * just acquire a shared lock.
+        */
+       if ((vap->va_active & VNODE_ATTR_TIMES) &&
+           (cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime)) {
+               if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)))
+                       return (error);
+               hfs_touchtimes(hfsmp, cp);
+       } else {
+               if ((error = hfs_lock(cp, HFS_SHARED_LOCK)))
+                       return (error);
         }
  
-       /* Otherwise, check everyone else. */
-       if (mode & VEXEC)
-               mask |= S_IXOTH;
-       if (mode & VREAD)
-               mask |= S_IROTH;
-       if (mode & VWRITE)
-               mask |= S_IWOTH;
-       return ((cp->c_mode & mask) == mask ? 0 : EACCES);
-}
-
-
+       if (v_type == VDIR) {
+               data_size = (cp->c_entries + 2) * AVERAGE_HFSDIRENTRY_SIZE;
  
-/*
-#% getattr     vp      = = =
-#
- vop_getattr {
-     IN struct vnode *vp;
-     IN struct vattr *vap;
-     IN struct ucred *cred;
-     IN struct proc *p;
+               if (VATTR_IS_ACTIVE(vap, va_nlink)) {
+                       int nlink;
+       
+                       /*
+                        * For directories, the va_nlink is esentially a count
+                        * of the ".." references to a directory plus the "."
+                        * reference and the directory itself. So for HFS+ this
+                        * becomes the sub-directory count plus two.
+                        *
+                        * In the absence of a sub-directory count we use the
+                        * directory's item count.  This will be too high in
+                        * most cases since it also includes files.
+                        */
+                       if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) && 
+                           (cp->c_attr.ca_recflags & kHFSHasFolderCountMask))
+                               nlink = cp->c_attr.ca_dircount;  /* implied ".." entries */
+                       else
+                               nlink = cp->c_entries;
+
+                       /* Account for ourself and our "." entry */
+                       nlink += 2;  
+                        /* Hide our private directories. */
+                       if (cp->c_cnid == kHFSRootFolderID) {
+                               if (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid != 0) {
+                                       --nlink;    
+                               }
+                               if (hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid != 0) {
+                                       --nlink;
+                               }
+                       }
+                       VATTR_RETURN(vap, va_nlink, (u_int64_t)nlink);
+               }               
+               if (VATTR_IS_ACTIVE(vap, va_nchildren)) {
+                       int entries;
+       
+                       entries = cp->c_entries;
+                       /* Hide our private files and directories. */
+                       if (cp->c_cnid == kHFSRootFolderID) {
+                               if (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid != 0)
+                                       --entries;
+                               if (hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid != 0)
+                                       --entries;
+                               if (hfsmp->jnl || ((hfsmp->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY)))
+                                       entries -= 2;   /* hide the journal files */
+                       }
+                       VATTR_RETURN(vap, va_nchildren, entries);
+               }
+               /*
+                * The va_dirlinkcount is the count of real directory hard links.
+                * (i.e. its not the sum of the implied "." and ".." references)
+                */
+               if (VATTR_IS_ACTIVE(vap, va_dirlinkcount)) {
+                       VATTR_RETURN(vap, va_dirlinkcount, (uint32_t)cp->c_linkcount);
+               }
+       } else /* !VDIR */ {
+               data_size = VCTOF(vp, cp)->ff_size;
  
-     */
+               VATTR_RETURN(vap, va_nlink, (u_int64_t)cp->c_linkcount);
+               if (VATTR_IS_ACTIVE(vap, va_data_alloc)) {
+                       u_int64_t blocks;
+       
+                       blocks = VCTOF(vp, cp)->ff_blocks;
+                       VATTR_RETURN(vap, va_data_alloc, blocks * (u_int64_t)hfsmp->blockSize);
+               }
+       }
  
+       /* conditional because 64-bit arithmetic can be expensive */
+       if (VATTR_IS_ACTIVE(vap, va_total_size)) {
+               if (v_type == VDIR) {
+                       VATTR_RETURN(vap, va_total_size, (cp->c_entries + 2) * AVERAGE_HFSDIRENTRY_SIZE);
+               } else {
+                       u_int64_t total_size = 0;
+                       struct cnode *rcp;
+                       
+                       if (cp->c_datafork) {
+                               total_size = cp->c_datafork->ff_size;
+                       }
  
-/* ARGSUSED */
-static int
-hfs_getattr(ap)
-       struct vop_getattr_args /* {
-               struct vnode *a_vp;
-               struct vattr *a_vap;
-               struct ucred *a_cred;
-               struct proc *a_p;
-       } */ *ap;
-{
-       struct vnode *vp = ap->a_vp;
-       struct cnode *cp = VTOC(vp);
-       struct vattr *vap = ap->a_vap;
-       struct timeval tv;
+                       if (cp->c_blocks - VTOF(vp)->ff_blocks) {
+                               /* We deal with resource fork vnode iocount at the end of the function */
+                               error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
+                               if (error) {
+                                       goto out;
+                               }
+                               rcp = VTOC(rvp);
+                               if (rcp && rcp->c_rsrcfork) {
+                                       total_size += rcp->c_rsrcfork->ff_size;
+                               }
+                       }
  
-       tv = time;
-       CTIMES(cp, &tv, &tv);
+                       VATTR_RETURN(vap, va_total_size, total_size);
+               }
+       }
+       if (VATTR_IS_ACTIVE(vap, va_total_alloc)) {
+               if (v_type == VDIR) {
+                       VATTR_RETURN(vap, va_total_alloc, 0);
+               } else {
+                       VATTR_RETURN(vap, va_total_alloc, (u_int64_t)cp->c_blocks * (u_int64_t)hfsmp->blockSize);
+               }
+       }
  
-       vap->va_type = vp->v_type;
         /*
-        * [2856576]  Since we are dynamically changing the owner, also
-        * effectively turn off the set-user-id and set-group-id bits,
-        * just like chmod(2) would when changing ownership.  This prevents
-        * a security hole where set-user-id programs run as whoever is
-        * logged on (or root if nobody is logged in yet!)
+        * If the VFS wants extended security data, and we know that we
+        * don't have any (because it never told us it was setting any)
+        * then we can return the supported bit and no data.  If we do
+        * have extended security, we can just leave the bit alone and
+        * the VFS will use the fallback path to fetch it.
          */
-       vap->va_mode = (cp->c_uid == UNKNOWNUID) ? cp->c_mode & ~(S_ISUID | S_ISGID) : cp->c_mode;
-       vap->va_nlink = cp->c_nlink;
-       vap->va_uid = (cp->c_uid == UNKNOWNUID) ? console_user : cp->c_uid;
+       if (VATTR_IS_ACTIVE(vap, va_acl)) {
+               if ((cp->c_attr.ca_recflags & kHFSHasSecurityMask) == 0) {
+                       vap->va_acl = (kauth_acl_t) KAUTH_FILESEC_NONE;
+                       VATTR_SET_SUPPORTED(vap, va_acl);
+               }
+       }
+       if (VATTR_IS_ACTIVE(vap, va_access_time)) {
+               /* Access times are lazily updated, get current time if needed */
+               if (cp->c_touch_acctime) {
+                       struct timeval tv;
+       
+                       microtime(&tv);
+                       vap->va_access_time.tv_sec = tv.tv_sec;
+               } else {
+                       vap->va_access_time.tv_sec = cp->c_atime;
+               }
+               vap->va_access_time.tv_nsec = 0;
+               VATTR_SET_SUPPORTED(vap, va_access_time);
+       }
+       vap->va_create_time.tv_sec = cp->c_itime;
+       vap->va_create_time.tv_nsec = 0;
+       vap->va_modify_time.tv_sec = cp->c_mtime;
+       vap->va_modify_time.tv_nsec = 0;
+       vap->va_change_time.tv_sec = cp->c_ctime;
+       vap->va_change_time.tv_nsec = 0;
+       vap->va_backup_time.tv_sec = cp->c_btime;
+       vap->va_backup_time.tv_nsec = 0;        
+
+       /* XXX is this really a good 'optimal I/O size'? */
+       vap->va_iosize = hfsmp->hfs_logBlockSize;
+       vap->va_uid = cp->c_uid;
         vap->va_gid = cp->c_gid;
-       vap->va_fsid = cp->c_dev;
+       vap->va_mode = cp->c_mode;
+       vap->va_flags = cp->c_flags;
+
         /*
          * Exporting file IDs from HFS Plus:
          *
@@ -481,153 +588,296 @@ hfs_getattr(ap)
          * c_cnid belongs to the active directory entry (ie the link)
          * and the c_fileid is for the actual inode (ie the data file).
          *
-        * The stat call (getattr) will always return the c_fileid
-        * and Carbon APIs, which are hardlink-ignorant, will always
-        * receive the c_cnid (from getattrlist).
-        */
-       vap->va_fileid = cp->c_fileid;
-       vap->va_atime.tv_sec = cp->c_atime;
-       vap->va_atime.tv_nsec = 0;
-       vap->va_mtime.tv_sec = cp->c_mtime;
-       vap->va_mtime.tv_nsec = cp->c_mtime_nsec;
-       vap->va_ctime.tv_sec = cp->c_ctime;
-       vap->va_ctime.tv_nsec = 0;
-       vap->va_gen = 0;
-       vap->va_flags = cp->c_flags;
-       vap->va_rdev = 0;
-       vap->va_blocksize = VTOVFS(vp)->mnt_stat.f_iosize;
-       vap->va_filerev = 0;
-       vap->va_spare = 0;
-       if (vp->v_type == VDIR) {
-               vap->va_size = cp->c_nlink * AVERAGE_HFSDIRENTRY_SIZE;
-               vap->va_bytes = 0;
+        * The stat call (getattr) uses va_fileid and the Carbon APIs,
+        * which are hardlink-ignorant, will ask for va_linkid.
+        */
+       vap->va_fileid = (u_int64_t)cp->c_fileid;
+       /* 
+        * We need to use the origin cache for both hardlinked files 
+        * and directories. Hardlinked directories have multiple cnids 
+        * and parents (one per link). Hardlinked files also have their 
+        * own parents and link IDs separate from the indirect inode number. 
+        * If we don't use the cache, we could end up vending the wrong ID 
+        * because the cnode will only reflect the link that was looked up most recently.
+        */
+       if (cp->c_flag & C_HARDLINK) {
+               vap->va_linkid = (u_int64_t)hfs_currentcnid(cp);
+               vap->va_parentid = (u_int64_t)hfs_currentparent(cp);
         } else {
-               vap->va_size = VTOF(vp)->ff_size;
-               vap->va_bytes = (u_quad_t)cp->c_blocks *
-                                   (u_quad_t)VTOVCB(vp)->blockSize;
-               if (vp->v_type == VBLK || vp->v_type == VCHR)
-                       vap->va_rdev = cp->c_rdev;
+               vap->va_linkid = (u_int64_t)cp->c_cnid;
+               vap->va_parentid = (u_int64_t)cp->c_parentcnid;
+       }
+       vap->va_fsid = cp->c_dev;
+       vap->va_filerev = 0;
+       vap->va_encoding = cp->c_encoding;
+       vap->va_rdev = (v_type == VBLK || v_type == VCHR) ? cp->c_rdev : 0;
+       vap->va_data_size = data_size;
+
+       /* Mark them all at once instead of individual VATTR_SET_SUPPORTED calls. */
+       vap->va_supported |= VNODE_ATTR_va_create_time | VNODE_ATTR_va_modify_time |
+                            VNODE_ATTR_va_change_time| VNODE_ATTR_va_backup_time |
+                            VNODE_ATTR_va_iosize | VNODE_ATTR_va_uid |
+                            VNODE_ATTR_va_gid | VNODE_ATTR_va_mode |
+                            VNODE_ATTR_va_flags |VNODE_ATTR_va_fileid |
+                            VNODE_ATTR_va_linkid | VNODE_ATTR_va_parentid |
+                            VNODE_ATTR_va_fsid | VNODE_ATTR_va_filerev |
+                            VNODE_ATTR_va_encoding | VNODE_ATTR_va_rdev |
+                            VNODE_ATTR_va_data_size;
+
+       /* If this is the root, let VFS to find out the mount name, which may be different from the real name.
+        * Otherwise, we need to just take care for hardlinked files, which need to be looked up, if necessary
+        */
+       if (VATTR_IS_ACTIVE(vap, va_name) && (cp->c_cnid != kHFSRootFolderID)) {
+               struct cat_desc linkdesc;
+               int lockflags;
+               int uselinkdesc = 0;
+               cnid_t nextlinkid = 0;
+               cnid_t prevlinkid = 0;  
+
+               /* Get the name for ATTR_CMN_NAME.  We need to take special care for hardlinks      
+                * here because the info. for the link ID requested by getattrlist may be
+                * different than what's currently in the cnode.  This is because the cnode     
+                * will be filled in with the information for the most recent link ID that went
+                * through namei/lookup().  If there are competing lookups for hardlinks that point 
+                * to the same inode, one (or more) getattrlists could be vended incorrect name information.
+                * Also, we need to beware of open-unlinked files which could have a namelen of 0.  Note
+                * that if another hardlink sibling of this file is being unlinked, that could also thrash
+                * the name fields but it should *not* be treated like an open-unlinked file here.
+                */
+               if ((cp->c_flag & C_HARDLINK) &&
+                               ((cp->c_desc.cd_namelen == 0) || (vap->va_linkid != cp->c_cnid))) {
+                       /* If we have no name and our linkID is the raw inode number, then we may
+                        * have an open-unlinked file.  Go to the next link in this case. 
+                        */
+                       if ((cp->c_desc.cd_namelen == 0) && (vap->va_linkid == cp->c_fileid)) {
+                               if ((error = hfs_lookuplink(hfsmp, vap->va_linkid, &prevlinkid, &nextlinkid))) {
+                                       goto out;
+                               }
+                       }
+                       else {
+                               nextlinkid = vap->va_linkid;
+                       }
+                       /* Now probe the catalog for the linkID.  Note that we don't know if we have
+                        * the exclusive lock here for the cnode, so we can't just update the descriptor.  
+                        * Instead, we should just store the descriptor's value locally and then use it to pass
+                        * out the name value as needed below.
+                        */
+                       if (nextlinkid) {
+                               lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+                               error = cat_findname(hfsmp, nextlinkid, &linkdesc);     
+                               hfs_systemfile_unlock(hfsmp, lockflags);
+                               if (error == 0) {
+                                       uselinkdesc = 1;
+                               }
+                       }
+               }
+               
+               /* By this point, we either patched the name above, and the c_desc points 
+                * to correct data, or it already did, in which case we just proceed by copying
+                * the name into the VAP.  Note that we will never set va_name to supported if
+                * nextlinkid is never initialized.  This could happen in the degenerate case above
+                * involving the raw inode number, where it has no nextlinkid.  In this case, we will
+                * simply not export the name as supported.
+                */
+               if (uselinkdesc) {
+                       strlcpy(vap->va_name, (const char *)linkdesc.cd_nameptr, MAXPATHLEN);
+                       VATTR_SET_SUPPORTED(vap, va_name);
+                       cat_releasedesc(&linkdesc);     
+               }
+               else if (cp->c_desc.cd_namelen) {
+                       strlcpy(vap->va_name, (const char *)cp->c_desc.cd_nameptr, MAXPATHLEN);
+                       VATTR_SET_SUPPORTED(vap, va_name);
+               }
         }
-       return (0);
-}
-
-/*
- * Set attribute vnode op. called from several syscalls
-#% setattr     vp      L L L
-#
- vop_setattr {
-     IN struct vnode *vp;
-     IN struct vattr *vap;
-     IN struct ucred *cred;
-     IN struct proc *p;
  
-     */
+out:
+       hfs_unlock(cp);
+       /* 
+        * We need to drop the iocount on the rsrc fork vnode only *after* we've 
+        * released the cnode lock, since vnode_put can trigger an inactive call, which
+        * will go back into the HFS and try to acquire a cnode lock.    
+        */
+       if (rvp) {
+               vnode_put(rvp);
+       }
+       return (error);
+}
  
  static int
-hfs_setattr(ap)
-       struct vop_setattr_args /* {
+hfs_vnop_setattr(ap)
+       struct vnop_setattr_args /* {
                 struct vnode *a_vp;
-               struct vattr *a_vap;
-               struct ucred *a_cred;
-               struct proc *a_p;
+               struct vnode_attr *a_vap;
+               vfs_context_t a_context;
         } */ *ap;
  {
-       struct vattr *vap = ap->a_vap;
+       struct vnode_attr *vap = ap->a_vap;
         struct vnode *vp = ap->a_vp;
-       struct cnode *cp = VTOC(vp);
-       struct ucred *cred = ap->a_cred;
-       struct proc *p = ap->a_p;
-       struct timeval atimeval, mtimeval;
-       int error;
+       struct cnode *cp = NULL;
+       struct hfsmount *hfsmp;
+       kauth_cred_t cred = vfs_context_ucred(ap->a_context);
+       struct proc *p = vfs_context_proc(ap->a_context);
+       int error = 0;
+       uid_t nuid;
+       gid_t ngid;
+
+       hfsmp = VTOHFS(vp);
+
+       /* Don't allow modification of the journal file. */
+       if (hfsmp->hfs_jnlfileid == VTOC(vp)->c_fileid) {
+               return (EPERM);
+       }
  
         /*
-        * Check for unsettable attributes.
+        * File size change request.
+        * We are guaranteed that this is not a directory, and that
+        * the filesystem object is writeable.
          */
-       if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
-           (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
-           (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
-           ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
-               return (EINVAL);
-       }
+       VATTR_SET_SUPPORTED(vap, va_data_size);
+       if (VATTR_IS_ACTIVE(vap, va_data_size) && !vnode_islnk(vp)) {
  
-       if (vap->va_flags != VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
-                       return (EROFS);
-               if ((error = hfs_chflags(vp, vap->va_flags, cred, p)))
+               /* Take truncate lock before taking cnode lock. */
+               hfs_lock_truncate(VTOC(vp), TRUE);
+               
+               /* Perform the ubc_setsize before taking the cnode lock. */
+               ubc_setsize(vp, vap->va_data_size);
+
+               if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
+                       hfs_unlock_truncate(VTOC(vp), TRUE);
                         return (error);
-               if (vap->va_flags & (IMMUTABLE | APPEND))
-                       return (0);
+               }
+               cp = VTOC(vp);
+
+               error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 1, ap->a_context);
+
+               hfs_unlock_truncate(cp, TRUE);
+               if (error)
+                       goto out;
+       }
+       if (cp == NULL) {
+               if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
+                       return (error);
+               cp = VTOC(vp);
         }
  
-       if (cp->c_flags & (IMMUTABLE | APPEND))
-               return (EPERM);
         /*
-        * Go through the fields and update iff not VNOVAL.
+        * If it is just an access time update request by itself
+        * we know the request is from kernel level code, and we
+        * can delay it without being as worried about consistency.
+        * This change speeds up mmaps, in the rare case that they
+        * get caught behind a sync.
          */
-       if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
-                       return (EROFS);
-               if ((error = hfs_chown(vp, vap->va_uid, vap->va_gid, cred, p)))
-                       return (error);
+
+       if (vap->va_active == VNODE_ATTR_va_access_time) {
+               cp->c_touch_acctime=TRUE;
+               goto out;
         }
-       if (vap->va_size != VNOVAL) {
+
+
+
+       /*
+        * Owner/group change request.
+        * We are guaranteed that the new owner/group is valid and legal.
+        */
+       VATTR_SET_SUPPORTED(vap, va_uid);
+       VATTR_SET_SUPPORTED(vap, va_gid);
+       nuid = VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : (uid_t)VNOVAL;
+       ngid = VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : (gid_t)VNOVAL;
+       if (((nuid != (uid_t)VNOVAL) || (ngid != (gid_t)VNOVAL)) &&
+           ((error = hfs_chown(vp, nuid, ngid, cred, p)) != 0))
+               goto out;
+
+       /*
+        * Mode change request.
+        * We are guaranteed that the mode value is valid and that in
+        * conjunction with the owner and group, this change is legal.
+        */
+       VATTR_SET_SUPPORTED(vap, va_mode);
+       if (VATTR_IS_ACTIVE(vap, va_mode) &&
+           ((error = hfs_chmod(vp, (int)vap->va_mode, cred, p)) != 0))
+           goto out;
+
+       /*
+        * File flags change.
+        * We are guaranteed that only flags allowed to change given the
+        * current securelevel are being changed.
+        */
+       VATTR_SET_SUPPORTED(vap, va_flags);
+       if (VATTR_IS_ACTIVE(vap, va_flags)) {
+               u_int16_t *fdFlags;
+
+               cp->c_flags = vap->va_flags;
+               cp->c_touch_chgtime = TRUE;
+               
                 /*
-                * Disallow write attempts on read-only file systems;
-                * unless the file is a socket, fifo, or a block or
-                * character device resident on the file system.
+                * Mirror the UF_HIDDEN flag to the invisible bit of the Finder Info.
+                *
+                * The fdFlags for files and frFlags for folders are both 8 bytes
+                * into the userInfo (the first 16 bytes of the Finder Info).  They
+                * are both 16-bit fields.
                  */
-               switch (vp->v_type) {
-               case VDIR:
-                       return (EISDIR);
-               case VLNK:
-               case VREG:
-                       if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
-                               return (EROFS);
-                       break;
-               default:
-                       break;
-               }
-               if ((error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p)))
-                       return (error);
+               fdFlags = (u_int16_t *) &cp->c_finderinfo[8];
+               if (vap->va_flags & UF_HIDDEN)
+                       *fdFlags |= OSSwapHostToBigConstInt16(kFinderInvisibleMask);
+               else
+                       *fdFlags &= ~OSSwapHostToBigConstInt16(kFinderInvisibleMask);
         }
-       cp = VTOC(vp);
-       if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
-                       return (EROFS);
-               if (((error = hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, p, true)) != 0) &&
-                   ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
-                   (error = VOP_ACCESS(vp, VWRITE, cred, p)))) {
-                       return (error);
+
+       /*
+        * Timestamp updates.
+        */
+       VATTR_SET_SUPPORTED(vap, va_create_time);
+       VATTR_SET_SUPPORTED(vap, va_access_time);
+       VATTR_SET_SUPPORTED(vap, va_modify_time);
+       VATTR_SET_SUPPORTED(vap, va_backup_time);
+       VATTR_SET_SUPPORTED(vap, va_change_time);
+       if (VATTR_IS_ACTIVE(vap, va_create_time) ||
+           VATTR_IS_ACTIVE(vap, va_access_time) ||
+           VATTR_IS_ACTIVE(vap, va_modify_time) ||
+           VATTR_IS_ACTIVE(vap, va_backup_time)) {
+               if (VATTR_IS_ACTIVE(vap, va_create_time))
+                       cp->c_itime = vap->va_create_time.tv_sec;
+               if (VATTR_IS_ACTIVE(vap, va_access_time)) {
+                       cp->c_atime = vap->va_access_time.tv_sec;
+                       cp->c_touch_acctime = FALSE;
                 }
-               if (vap->va_atime.tv_sec != VNOVAL)
-                       cp->c_flag |= C_ACCESS;
-               if (vap->va_mtime.tv_sec != VNOVAL) {
-                       cp->c_flag |= C_CHANGE | C_UPDATE;
+               if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
+                       cp->c_mtime = vap->va_modify_time.tv_sec;
+                       cp->c_touch_modtime = FALSE;
+                       cp->c_touch_chgtime = TRUE;
+
                         /*
                          * The utimes system call can reset the modification
                          * time but it doesn't know about HFS create times.
-                        * So we need to insure that the creation time is
+                        * So we need to ensure that the creation time is
                          * always at least as old as the modification time.
                          */
                         if ((VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) &&
-                           (cp->c_cnid != kRootDirID) &&
-                           (vap->va_mtime.tv_sec < cp->c_itime)) {
-                               cp->c_itime = vap->va_mtime.tv_sec;
+                           (cp->c_cnid != kHFSRootFolderID) &&
+                           (cp->c_mtime < cp->c_itime)) {
+                               cp->c_itime = cp->c_mtime;
                         }
                 }
-               atimeval.tv_sec = vap->va_atime.tv_sec;
-               atimeval.tv_usec = 0;
-               mtimeval.tv_sec = vap->va_mtime.tv_sec;
-               mtimeval.tv_usec = 0;
-               if ((error = VOP_UPDATE(vp, &atimeval, &mtimeval, 1)))
-                       return (error);
+               if (VATTR_IS_ACTIVE(vap, va_backup_time))
+                       cp->c_btime = vap->va_backup_time.tv_sec;
+               cp->c_flag |= C_MODIFIED;
         }
-       error = 0;
-       if (vap->va_mode != (mode_t)VNOVAL) {
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
-                       return (EROFS);
-               error = hfs_chmod(vp, (int)vap->va_mode, cred, p);
+       
+       /*
+        * Set name encoding.
+        */
+       VATTR_SET_SUPPORTED(vap, va_encoding);
+       if (VATTR_IS_ACTIVE(vap, va_encoding)) {
+               cp->c_encoding = vap->va_encoding;
+               hfs_setencodingbits(hfsmp, cp->c_encoding);
         }
+
+       if ((error = hfs_update(vp, TRUE)) != 0)
+               goto out;
+       HFS_KNOTE(vp, NOTE_ATTRIB);
+out:
+       if (cp)
+               hfs_unlock(cp);
         return (error);
  }
  
@@ -636,59 +886,57 @@ hfs_setattr(ap)
   * Change the mode on a file.
   * cnode must be locked before calling.
   */
+__private_extern__
  int
-hfs_chmod(vp, mode, cred, p)
-       register struct vnode *vp;
-       register int mode;
-       register struct ucred *cred;
-       struct proc *p;
+hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struct proc *p)
  {
         register struct cnode *cp = VTOC(vp);
-       int error;
  
         if (VTOVCB(vp)->vcbSigWord != kHFSPlusSigWord)
                 return (0);
  
+       // XXXdbg - don't allow modification of the journal or journal_info_block
+       if (VTOHFS(vp)->jnl && cp && cp->c_datafork) {
+               struct HFSPlusExtentDescriptor *extd;
+
+               extd = &cp->c_datafork->ff_extents[0];
+               if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
+                       return EPERM;
+               }
+       }
+
  #if OVERRIDE_UNKNOWN_PERMISSIONS
-       if (VTOVFS(vp)->mnt_flag & MNT_UNKNOWNPERMISSIONS) {
+       if (((unsigned int)vfs_flags(VTOVFS(vp))) & MNT_UNKNOWNPERMISSIONS) {
                 return (0);
         };
  #endif
-       if ((error = hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, p, true)) != 0)
-               return (error);
-       if (cred->cr_uid) {
-               if (vp->v_type != VDIR && (mode & S_ISTXT))
-                       return (EFTYPE);
-               if (!groupmember(cp->c_gid, cred) && (mode & S_ISGID))
-                       return (EPERM);
-       }
         cp->c_mode &= ~ALLPERMS;
         cp->c_mode |= (mode & ALLPERMS);
-       cp->c_flag |= C_CHANGE;
+       cp->c_touch_chgtime = TRUE;
         return (0);
  }
  
  
+__private_extern__
  int
-hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean considerFlags)
+hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean considerFlags)
  {
         struct cnode *cp = VTOC(vp);
-       gid_t *gp;
         int retval = 0;
-       int i;
+       int is_member;
  
         /*
          * Disallow write attempts on read-only file systems;
          * unless the file is a socket, fifo, or a block or
          * character device resident on the file system.
          */
-       switch (vp->v_type) {
+       switch (vnode_vtype(vp)) {
         case VDIR:
         case VLNK:
         case VREG:
-               if (VTOVFS(vp)->mnt_flag & MNT_RDONLY)
+               if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
                         return (EROFS);
-        break;
+               break;
         default:
                 break;
         }
@@ -698,7 +946,7 @@ hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean c
                 return (EPERM);
  
         /* Otherwise, user id 0 always gets access. */
-       if (cred->cr_uid == 0)
+       if (!suser(cred, NULL))
                 return (0);
  
         /* Otherwise, check the owner. */
@@ -706,9 +954,8 @@ hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean c
                 return ((cp->c_mode & S_IWUSR) == S_IWUSR ? 0 : EACCES);
   
         /* Otherwise, check the groups. */
-       for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) {
-               if (cp->c_gid == *gp)
-                       return ((cp->c_mode & S_IWGRP) == S_IWGRP ? 0 : EACCES);
+       if (kauth_cred_ismember_gid(cred, cp->c_gid, &is_member) == 0 && is_member) {
+               return ((cp->c_mode & S_IWGRP) == S_IWGRP ? 0 : EACCES);
         }
   
         /* Otherwise, check everyone else. */
@@ -716,89 +963,52 @@ hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean c
  }
  
  
-
-/*
- * Change the flags on a file or directory.
- * cnode must be locked before calling.
- */
-int
-hfs_chflags(vp, flags, cred, p)
-       register struct vnode *vp;
-       register u_long flags;
-       register struct ucred *cred;
-       struct proc *p;
-{
-       register struct cnode *cp = VTOC(vp);
-       int retval;
-
-       if (VTOVCB(vp)->vcbSigWord == kHFSSigWord) {
-               if ((retval = hfs_write_access(vp, cred, p, false)) != 0) {
-                       return retval;
-               };
-       } else if ((retval = hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, p, true)) != 0) {
-               return retval;
-       };
-
-       if (cred->cr_uid == 0) {
-               if ((cp->c_flags & (SF_IMMUTABLE | SF_APPEND)) &&
-                       securelevel > 0) {
-                       return EPERM;
-               };
-               cp->c_flags = flags;
-       } else {
-               if (cp->c_flags & (SF_IMMUTABLE | SF_APPEND) ||
-                       (flags & UF_SETTABLE) != flags) {
-                       return EPERM;
-               };
-               cp->c_flags &= SF_SETTABLE;
-               cp->c_flags |= (flags & UF_SETTABLE);
-       }
-       cp->c_flag |= C_CHANGE;
-
-       return (0);
-}
-
-
  /*
   * Perform chown operation on cnode cp;
   * code must be locked prior to call.
   */
+__private_extern__
  int
-hfs_chown(vp, uid, gid, cred, p)
-       register struct vnode *vp;
-       uid_t uid;
-       gid_t gid;
-       struct ucred *cred;
-       struct proc *p;
+#if !QUOTA
+hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, __unused kauth_cred_t cred,
+       __unused struct proc *p)
+#else 
+hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
+       __unused struct proc *p)
+#endif
  {
         register struct cnode *cp = VTOC(vp);
         uid_t ouid;
         gid_t ogid;
-       int error = 0;
  #if QUOTA
+       int error = 0;
         register int i;
         int64_t change;
  #endif /* QUOTA */
  
         if (VTOVCB(vp)->vcbSigWord != kHFSPlusSigWord)
-               return (EOPNOTSUPP);
+               return (ENOTSUP);
  
-       if (VTOVFS(vp)->mnt_flag & MNT_UNKNOWNPERMISSIONS)
+       if (((unsigned int)vfs_flags(VTOVFS(vp))) & MNT_UNKNOWNPERMISSIONS)
                 return (0);
         
         if (uid == (uid_t)VNOVAL)
                 uid = cp->c_uid;
         if (gid == (gid_t)VNOVAL)
                 gid = cp->c_gid;
+
+#if 0  /* we are guaranteed that this is already the case */
         /*
          * If we don't own the file, are trying to change the owner
          * of the file, or are not a member of the target group,
          * the caller must be superuser or the call fails.
          */
-       if ((cred->cr_uid != cp->c_uid || uid != cp->c_uid ||
-           (gid != cp->c_gid && !groupmember((gid_t)gid, cred))) &&
-           (error = suser(cred, &p->p_acflag)))
+       if ((kauth_cred_getuid(cred) != cp->c_uid || uid != cp->c_uid ||
+           (gid != cp->c_gid &&
+            (kauth_cred_ismember_gid(cred, gid, &is_member) || !is_member))) &&
+           (error = suser(cred, 0)))
                 return (error);
+#endif
  
         ogid = cp->c_gid;
         ouid = cp->c_uid;
@@ -806,26 +1016,26 @@ hfs_chown(vp, uid, gid, cred, p)
         if ((error = hfs_getinoquota(cp)))
                 return (error);
         if (ouid == uid) {
-               dqrele(vp, cp->c_dquot[USRQUOTA]);
+               dqrele(cp->c_dquot[USRQUOTA]);
                 cp->c_dquot[USRQUOTA] = NODQUOT;
         }
         if (ogid == gid) {
-               dqrele(vp, cp->c_dquot[GRPQUOTA]);
+               dqrele(cp->c_dquot[GRPQUOTA]);
                 cp->c_dquot[GRPQUOTA] = NODQUOT;
         }
  
         /*
          * Eventually need to account for (fake) a block per directory
-        *if (vp->v_type == VDIR)
-        *change = VTOVCB(vp)->blockSize;
-        *else
+        * if (vnode_isdir(vp))
+        *     change = VTOHFS(vp)->blockSize;
+        * else
          */
  
         change = (int64_t)(cp->c_blocks) * (int64_t)VTOVCB(vp)->blockSize;
         (void) hfs_chkdq(cp, -change, cred, CHOWN);
         (void) hfs_chkiq(cp, -1, cred, CHOWN);
         for (i = 0; i < MAXQUOTAS; i++) {
-               dqrele(vp, cp->c_dquot[i]);
+               dqrele(cp->c_dquot[i]);
                 cp->c_dquot[i] = NODQUOT;
         }
  #endif /* QUOTA */
@@ -834,11 +1044,11 @@ hfs_chown(vp, uid, gid, cred, p)
  #if QUOTA
         if ((error = hfs_getinoquota(cp)) == 0) {
                 if (ouid == uid) {
-                       dqrele(vp, cp->c_dquot[USRQUOTA]);
+                       dqrele(cp->c_dquot[USRQUOTA]);
                         cp->c_dquot[USRQUOTA] = NODQUOT;
                 }
                 if (ogid == gid) {
-                       dqrele(vp, cp->c_dquot[GRPQUOTA]);
+                       dqrele(cp->c_dquot[GRPQUOTA]);
                         cp->c_dquot[GRPQUOTA] = NODQUOT;
                 }
                 if ((error = hfs_chkdq(cp, change, cred, CHOWN)) == 0) {
@@ -848,7 +1058,7 @@ hfs_chown(vp, uid, gid, cred, p)
                                 (void) hfs_chkdq(cp, -change, cred, CHOWN|FORCE);
                 }
                 for (i = 0; i < MAXQUOTAS; i++) {
-                       dqrele(vp, cp->c_dquot[i]);
+                       dqrele(cp->c_dquot[i]);
                         cp->c_dquot[i] = NODQUOT;
                 }
         }
@@ -856,11 +1066,11 @@ hfs_chown(vp, uid, gid, cred, p)
         cp->c_uid = ouid;
         if (hfs_getinoquota(cp) == 0) {
                 if (ouid == uid) {
-                       dqrele(vp, cp->c_dquot[USRQUOTA]);
+                       dqrele(cp->c_dquot[USRQUOTA]);
                         cp->c_dquot[USRQUOTA] = NODQUOT;
                 }
                 if (ogid == gid) {
-                       dqrele(vp, cp->c_dquot[GRPQUOTA]);
+                       dqrele(cp->c_dquot[GRPQUOTA]);
                         cp->c_dquot[GRPQUOTA] = NODQUOT;
                 }
                 (void) hfs_chkdq(cp, change, cred, FORCE|CHOWN);
@@ -873,110 +1083,147 @@ good:
                 panic("hfs_chown: lost quota");
  #endif /* QUOTA */
  
-       if (ouid != uid || ogid != gid)
-               cp->c_flag |= C_CHANGE;
-       if (ouid != uid && cred->cr_uid != 0)
-               cp->c_mode &= ~S_ISUID;
-       if (ogid != gid && cred->cr_uid != 0)
-               cp->c_mode &= ~S_ISGID;
+
+       /*
+         According to the SUSv3 Standard, chown() shall mark
+         for update the st_ctime field of the file.
+         (No exceptions mentioned)
+       */
+               cp->c_touch_chgtime = TRUE;
         return (0);
  }
  
  
  /*
-#
-#% exchange fvp                L L L
-#% exchange tvp                L L L
-#
+ * The hfs_exchange routine swaps the fork data in two files by
+ * exchanging some of the information in the cnode.  It is used
+ * to preserve the file ID when updating an existing file, in
+ * case the file is being tracked through its file ID. Typically
+ * its used after creating a new file during a safe-save.
   */
- /*
-  * The hfs_exchange routine swaps the fork data in two files by
-  * exchanging some of the information in the cnode.  It is used
-  * to preserve the file ID when updating an existing file, in
-  * case the file is being tracked through its file ID. Typically
-  * its used after creating a new file during a safe-save.
-  */
-  
  static int
-hfs_exchange(ap)
-       struct vop_exchange_args /* {
+hfs_vnop_exchange(ap)
+       struct vnop_exchange_args /* {
                 struct vnode *a_fvp;
                 struct vnode *a_tvp;
-               struct ucred *a_cred;
-               struct proc *a_p;
+               int a_options;
+               vfs_context_t a_context;
         } */ *ap;
  {
         struct vnode *from_vp = ap->a_fvp;
         struct vnode *to_vp = ap->a_tvp;
-       struct vnode *from_rvp = NULL;
-       struct vnode *to_rvp = NULL;
-       struct cnode *from_cp = VTOC(from_vp);
-       struct cnode *to_cp = VTOC(to_vp);
-       struct hfsmount *hfsmp = VTOHFS(from_vp);
+       struct cnode *from_cp;
+       struct cnode *to_cp;
+       struct hfsmount *hfsmp;
         struct cat_desc tempdesc;
         struct cat_attr tempattr;
-       int error = 0;
+       const unsigned char *from_nameptr;
+       const unsigned char *to_nameptr;
+       char from_iname[32];
+       char to_iname[32];
+       u_int32_t tempflag;
+       cnid_t  from_parid;
+       cnid_t  to_parid;
+       int lockflags;
+       int error = 0, started_tr = 0, got_cookie = 0;
+       cat_cookie_t cookie;
  
         /* The files must be on the same volume. */
-       if (from_vp->v_mount != to_vp->v_mount)
+       if (vnode_mount(from_vp) != vnode_mount(to_vp))
                 return (EXDEV);
  
-       /* Only normal files can be exchanged. */
-       if ((from_vp->v_type != VREG) || (to_vp->v_type != VREG) ||
-           (from_cp->c_flag & C_HARDLINK) || (to_cp->c_flag & C_HARDLINK) ||
-           VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp))
+       if (from_vp == to_vp)
                 return (EINVAL);
  
-       from_rvp = from_cp->c_rsrc_vp;
-       to_rvp = to_cp->c_rsrc_vp;
+       if ((error = hfs_lockpair(VTOC(from_vp), VTOC(to_vp), HFS_EXCLUSIVE_LOCK)))
+               return (error);
  
-       /* If one of the resource forks is open then get the other one. */
-       if (from_rvp || to_rvp) {
-               error = hfs_vgetrsrc(hfsmp, from_vp, &from_rvp, ap->a_p);
-               if (error)
-                       return (error);
-               error = hfs_vgetrsrc(hfsmp, to_vp, &to_rvp, ap->a_p);
-               if (error) {
-                       vrele(from_rvp);
-                       return (error);
-               } 
+       from_cp = VTOC(from_vp);
+       to_cp = VTOC(to_vp);
+       hfsmp = VTOHFS(from_vp);
+
+       /* Only normal files can be exchanged. */
+       if (!vnode_isreg(from_vp) || !vnode_isreg(to_vp) ||
+           VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) {
+               error = EINVAL;
+               goto exit;
         }
  
-       /* Ignore any errors, we are doing a 'best effort' on flushing */
-       if (from_vp)
-               (void) vinvalbuf(from_vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
-       if (to_vp)
-               (void) vinvalbuf(to_vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
-       if (from_rvp)
-               (void) vinvalbuf(from_rvp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
-       if (to_rvp)
-               (void) vinvalbuf(to_rvp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
+       // XXXdbg - don't allow modification of the journal or journal_info_block
+       if (hfsmp->jnl) {
+               struct HFSPlusExtentDescriptor *extd;
  
-       /* Lock catalog b-tree */
-       error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, ap->a_p);
-       if (error) goto Err_Exit;
+               if (from_cp->c_datafork) {
+                       extd = &from_cp->c_datafork->ff_extents[0];
+                       if (extd->startBlock == VTOVCB(from_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) {
+                               error = EPERM;
+                               goto exit;
+                       }
+               }
+
+               if (to_cp->c_datafork) {
+                       extd = &to_cp->c_datafork->ff_extents[0];
+                       if (extd->startBlock == VTOVCB(to_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) {
+                               error = EPERM;
+                               goto exit;
+                       }
+               }
+       }
+
+       if ((error = hfs_start_transaction(hfsmp)) != 0) {
+           goto exit;
+       }
+       started_tr = 1;
+       
+       /*
+        * Reserve some space in the Catalog file.
+        */
+       if ((error = cat_preflight(hfsmp, CAT_EXCHANGE, &cookie, vfs_context_proc(ap->a_context)))) {
+               goto exit;
+       }
+       got_cookie = 1;
  
         /* The backend code always tries to delete the virtual
-        * extent id for exchanging files so we neeed to lock
+        * extent id for exchanging files so we need to lock
          * the extents b-tree.
          */
-       error = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-       if (error) {
-               (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, ap->a_p);
-               goto Err_Exit;
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+
+       /* Account for the location of the catalog objects. */
+       if (from_cp->c_flag & C_HARDLINK) {
+               MAKE_INODE_NAME(from_iname, sizeof(from_iname),
+                               from_cp->c_attr.ca_linkref);
+               from_nameptr = (unsigned char *)from_iname;
+               from_parid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+               from_cp->c_hint = 0;
+       } else {
+               from_nameptr = from_cp->c_desc.cd_nameptr;
+               from_parid = from_cp->c_parentcnid;
+       }
+       if (to_cp->c_flag & C_HARDLINK) {
+               MAKE_INODE_NAME(to_iname, sizeof(to_iname),
+                               to_cp->c_attr.ca_linkref);
+               to_nameptr = (unsigned char *)to_iname;
+               to_parid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+               to_cp->c_hint = 0;
+       } else {
+               to_nameptr = to_cp->c_desc.cd_nameptr;
+               to_parid = to_cp->c_parentcnid;
         }
  
         /* Do the exchange */
-       error = MacToVFSError(ExchangeFileIDs(HFSTOVCB(hfsmp),
-                               from_cp->c_desc.cd_nameptr, to_cp->c_desc.cd_nameptr,
-                               from_cp->c_parentcnid, to_cp->c_parentcnid,
-                               from_cp->c_hint, to_cp->c_hint));
+       error = ExchangeFileIDs(hfsmp, from_nameptr, to_nameptr, from_parid,
+                               to_parid, from_cp->c_hint, to_cp->c_hint);
+       hfs_systemfile_unlock(hfsmp, lockflags);
  
-       (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, ap->a_p);
-       (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, ap->a_p);
+       /*
+        * Note that we don't need to exchange any extended attributes
+        * since the attributes are keyed by file ID.
+        */
  
         if (error != E_NONE) {
-               goto Err_Exit;
+               error = MacToVFSError(error);
+               goto exit;
         }
  
         /* Purge the vnodes from the name cache */
@@ -988,12 +1235,14 @@ hfs_exchange(ap)
         /* Save a copy of from attributes before swapping. */
         bcopy(&from_cp->c_desc, &tempdesc, sizeof(struct cat_desc));
         bcopy(&from_cp->c_attr, &tempattr, sizeof(struct cat_attr));
+       tempflag = from_cp->c_flag & (C_HARDLINK | C_HASXATTRS);
  
         /*
          * Swap the descriptors and all non-fork related attributes.
          * (except the modify date)
          */
         bcopy(&to_cp->c_desc, &from_cp->c_desc, sizeof(struct cat_desc));
+
         from_cp->c_hint = 0;
         from_cp->c_fileid = from_cp->c_cnid;
         from_cp->c_itime = to_cp->c_itime;
@@ -1004,6 +1253,9 @@ hfs_exchange(ap)
         from_cp->c_uid = to_cp->c_uid;
         from_cp->c_flags = to_cp->c_flags;
         from_cp->c_mode = to_cp->c_mode;
+       from_cp->c_linkcount = to_cp->c_linkcount;
+       from_cp->c_flag = to_cp->c_flag & (C_HARDLINK | C_HASXATTRS);
+       from_cp->c_attr.ca_recflags = to_cp->c_attr.ca_recflags;
         bcopy(to_cp->c_finderinfo, from_cp->c_finderinfo, 32);
  
         bcopy(&tempdesc, &to_cp->c_desc, sizeof(struct cat_desc));
@@ -1017,91 +1269,113 @@ hfs_exchange(ap)
         to_cp->c_uid = tempattr.ca_uid;
         to_cp->c_flags = tempattr.ca_flags;
         to_cp->c_mode = tempattr.ca_mode;
+       to_cp->c_linkcount = tempattr.ca_linkcount;
+       to_cp->c_flag = tempflag;
+       to_cp->c_attr.ca_recflags = tempattr.ca_recflags;
         bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32);
  
-       /* Reinsert into the cnode hash under new file IDs*/
-       hfs_chashremove(from_cp);
-       hfs_chashremove(to_cp);
+       /* Rehash the cnodes using their new file IDs */
+       hfs_chash_rehash(from_cp, to_cp);
+
+       /*
+        * When a file moves out of "Cleanup At Startup"
+        * we can drop its NODUMP status.
+        */
+       if ((from_cp->c_flags & UF_NODUMP) &&
+           (from_cp->c_parentcnid != to_cp->c_parentcnid)) {
+               from_cp->c_flags &= ~UF_NODUMP;
+               from_cp->c_touch_chgtime = TRUE;
+       }
+       if ((to_cp->c_flags & UF_NODUMP) &&
+           (to_cp->c_parentcnid != from_cp->c_parentcnid)) {
+               to_cp->c_flags &= ~UF_NODUMP;
+               to_cp->c_touch_chgtime = TRUE;
+       }
+
+       HFS_KNOTE(from_vp, NOTE_ATTRIB);
+       HFS_KNOTE(to_vp, NOTE_ATTRIB);
  
-       hfs_chashinsert(from_cp);
-       hfs_chashinsert(to_cp);
-Err_Exit:
-       if (to_rvp)
-               vrele(to_rvp);
-       if (from_rvp)
-               vrele(from_rvp);
+exit:
+       if (got_cookie) {
+               cat_postflight(hfsmp, &cookie, vfs_context_proc(ap->a_context));
+       }
+       if (started_tr) {
+           hfs_end_transaction(hfsmp);
+       }
  
+       hfs_unlockpair(from_cp, to_cp);
         return (error);
  }
  
  
  /*
-
-#% fsync       vp      L L L
-#
- vop_fsync {
-     IN struct vnode *vp;
-     IN struct ucred *cred;
-     IN int waitfor;
-     IN struct proc *p;
-
-     */
-
-static int
-hfs_fsync(ap)
-       struct vop_fsync_args /* {
-               struct vnode *a_vp;
-               struct ucred *a_cred;
-               int a_waitfor;
-               struct proc *a_p;
-       } */ *ap;
+ *  cnode must be locked
+ */
+__private_extern__
+int
+hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
  {
-       struct vnode *vp = ap->a_vp;
         struct cnode *cp = VTOC(vp);
         struct filefork *fp = NULL;
         int retval = 0;
-       register struct buf *bp;
+       struct hfsmount *hfsmp = VTOHFS(vp);
         struct timeval tv;
-       struct buf *nbp;
-       int s;
         int wait;
-       int retry = 0;
-
-       wait = (ap->a_waitfor == MNT_WAIT);
+       int lockflag;
+       int took_trunc_lock = 0;
  
+       wait = (waitfor == MNT_WAIT);
+       if (always_do_fullfsync)
+               fullsync = 1;
+       
         /* HFS directories don't have any data blocks. */
-       if (vp->v_type == VDIR)
+       if (vnode_isdir(vp))
                 goto metasync;
  
         /*
          * For system files flush the B-tree header and
          * for regular files write out any clusters
          */
-       if (vp->v_flag & VSYSTEM) {
-               if (VTOF(vp)->fcbBTCBPtr != NULL)
-                       BTFlushPath(VTOF(vp));
-       } else if (UBCINFOEXISTS(vp))
-               (void) cluster_push(vp);
+       if (vnode_issystem(vp)) {
+           if (VTOF(vp)->fcbBTCBPtr != NULL) {
+                       // XXXdbg
+                       if (hfsmp->jnl == NULL) {
+                               BTFlushPath(VTOF(vp));
+                       }
+           }
+       } else if (UBCINFOEXISTS(vp)) {
+               hfs_unlock(cp);
+               hfs_lock_truncate(cp, TRUE);
+               took_trunc_lock = 1;
+
+               /* Don't hold cnode lock when calling into cluster layer. */
+               (void) cluster_push(vp, wait ? IO_SYNC : 0);
  
+               hfs_lock(cp, HFS_FORCE_LOCK);
+       }
         /*
          * When MNT_WAIT is requested and the zero fill timeout
          * has expired then we must explicitly zero out any areas
          * that are currently marked invalid (holes).
+        *
+        * Files with NODUMP can bypass zero filling here.
          */
         if ((wait || (cp->c_flag & C_ZFWANTSYNC)) &&
-           UBCINFOEXISTS(vp) && (fp = VTOF(vp)) &&
+           ((cp->c_flags & UF_NODUMP) == 0) &&
+           UBCINFOEXISTS(vp) && (vnode_issystem(vp) ==0) && (fp = VTOF(vp)) &&
             cp->c_zftimeout != 0) {
-               int devblksize;
-               int was_nocache;
-
-               if (time.tv_sec < cp->c_zftimeout) {
+               microuptime(&tv);
+               if (!fullsync && tv.tv_sec < (long)cp->c_zftimeout) {
                         /* Remember that a force sync was requested. */
                         cp->c_flag |= C_ZFWANTSYNC;
-                       goto loop;
-               }       
-               VOP_DEVBLOCKSIZE(cp->c_devvp, &devblksize);
-               was_nocache = ISSET(vp->v_flag, VNOCACHE_DATA);
-               SET(vp->v_flag, VNOCACHE_DATA); /* Don't cache zeros */
+                       goto datasync;
+               }
+               if (!took_trunc_lock) {
+                       hfs_unlock(cp);
+                       hfs_lock_truncate(cp, TRUE);
+                       hfs_lock(cp, HFS_FORCE_LOCK);
+                       took_trunc_lock = 1;
+               }
  
                 while (!CIRCLEQ_EMPTY(&fp->ff_invalidranges)) {
                         struct rl_entry *invalid_range = CIRCLEQ_FIRST(&fp->ff_invalidranges);
@@ -1109,224 +1383,292 @@ hfs_fsync(ap)
                         off_t end = invalid_range->rl_end;
                 
                         /* The range about to be written must be validated
-                        * first, so that VOP_CMAP() will return the
+                        * first, so that VNOP_BLOCKMAP() will return the
                          * appropriate mapping for the cluster code:
                          */
                         rl_remove(start, end, &fp->ff_invalidranges);
  
+                       /* Don't hold cnode lock when calling into cluster layer. */
+                       hfs_unlock(cp);
                         (void) cluster_write(vp, (struct uio *) 0,
-                                       fp->ff_size,
-                                       invalid_range->rl_end + 1,
-                                       invalid_range->rl_start,
-                                       (off_t)0, devblksize,
-                                       IO_HEADZEROFILL | IO_NOZERODIRTY);
+                                       fp->ff_size, end + 1, start, (off_t)0,
+                                       IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
+                       hfs_lock(cp, HFS_FORCE_LOCK);
                         cp->c_flag |= C_MODIFIED;
                 }
-               (void) cluster_push(vp);
-               if (!was_nocache)
-                       CLR(vp->v_flag, VNOCACHE_DATA);
+               hfs_unlock(cp);
+               (void) cluster_push(vp, wait ? IO_SYNC : 0);
+               hfs_lock(cp, HFS_FORCE_LOCK);
+
                 cp->c_flag &= ~C_ZFWANTSYNC;
                 cp->c_zftimeout = 0;
         }
+datasync:
+       if (took_trunc_lock)
+               hfs_unlock_truncate(cp, TRUE);
+       
+       /*
+        * if we have a journal and if journal_active() returns != 0 then the
+        * we shouldn't do anything to a locked block (because it is part 
+        * of a transaction).  otherwise we'll just go through the normal 
+        * code path and flush the buffer.  note journal_active() can return
+        * -1 if the journal is invalid -- however we still need to skip any 
+        * locked blocks as they get cleaned up when we finish the transaction
+        * or close the journal.
+        */
+       // if (hfsmp->jnl && journal_active(hfsmp->jnl) >= 0)
+       if (hfsmp->jnl)
+               lockflag = BUF_SKIP_LOCKED;
+       else
+               lockflag = 0;
  
         /*
          * Flush all dirty buffers associated with a vnode.
          */
-loop:
-       s = splbio();
-       for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
-               nbp = bp->b_vnbufs.le_next;
-               if ((bp->b_flags & B_BUSY))
-                       continue;
-               if ((bp->b_flags & B_DELWRI) == 0)
-                       panic("hfs_fsync: not dirty");
-               bremfree(bp);
-               bp->b_flags |= B_BUSY;
-               /* Clear B_LOCKED, should only be set on meta files */
-               bp->b_flags &= ~B_LOCKED;
-               splx(s);
-               /*
-                * Wait for I/O associated with indirect blocks to complete,
-                * since there is no way to quickly wait for them below.
-                */
-               if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT)
-                       (void) bawrite(bp);
-               else
-                       (void) VOP_BWRITE(bp);
-               goto loop;
-       }
-
-       if (wait) {
-               while (vp->v_numoutput) {
-                       vp->v_flag |= VBWAIT;
-                       tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "hfs_fsync", 0);
-               }
-
-               if (vp->v_dirtyblkhd.lh_first) {
-                       /* still have some dirty buffers */
-                       if (retry++ > 10) {
-                               vprint("hfs_fsync: dirty", vp);
-                               splx(s);
-                               /*
-                                * Looks like the requests are not
-                                * getting queued to the driver.
-                                * Retrying here causes a cpu bound loop.
-                                * Yield to the other threads and hope
-                                * for the best.
-                                */
-                               (void)tsleep((caddr_t)&vp->v_numoutput,
-                                       PRIBIO + 1, "hfs_fsync", hz/10);
-                               retry = 0;
-                       } else {
-                               splx(s);
-                       }
-                       /* try again */
-                       goto loop;
-               }
-       }
-       splx(s);
+       buf_flushdirtyblks(vp, wait, lockflag, "hfs_fsync");
  
  metasync:
-       tv = time;
-       if (vp->v_flag & VSYSTEM) {
-               if (VTOF(vp)->fcbBTCBPtr != NULL)
+       if (vnode_isreg(vp) && vnode_issystem(vp)) {
+               if (VTOF(vp)->fcbBTCBPtr != NULL) {
+                       microuptime(&tv);
                         BTSetLastSync(VTOF(vp), tv.tv_sec);
-               cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE);
-       } else /* User file */ {
-               retval = VOP_UPDATE(ap->a_vp, &tv, &tv, wait);
+               }
+               cp->c_touch_acctime = FALSE;
+               cp->c_touch_chgtime = FALSE;
+               cp->c_touch_modtime = FALSE;
+       } else if ( !(vp->v_flag & VSWAP) ) /* User file */ {
+               retval = hfs_update(vp, wait);
  
-               /* When MNT_WAIT is requested push out any delayed meta data */
-               if ((retval == 0) && wait && cp->c_hint &&
+               /*
+                * When MNT_WAIT is requested push out the catalog record for
+                * this file.  If they asked for a full fsync, we can skip this
+                * because the journal_flush or hfs_metasync_all will push out
+                * all of the metadata changes.
+                */
+               if ((retval == 0) && wait && !fullsync && cp->c_hint &&
                     !ISSET(cp->c_flag, C_DELETED | C_NOEXISTS)) {
-                       hfs_metasync(VTOHFS(vp), cp->c_hint, ap->a_p);
-               }
+                       hfs_metasync(VTOHFS(vp), (daddr64_t)cp->c_hint, p);
+               }
+
+               /*
+                * If this was a full fsync, make sure all metadata
+                * changes get to stable storage.
+                */
+               if (fullsync) {
+                   if (hfsmp->jnl) {
+                       journal_flush(hfsmp->jnl);
+                   } else {
+                       retval = hfs_metasync_all(hfsmp);
+                       /* XXX need to pass context! */
+                       VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
+                   }
+               }
         }
  
         return (retval);
  }
  
+
  /* Sync an hfs catalog b-tree node */
  static int
-hfs_metasync(struct hfsmount *hfsmp, daddr_t node, struct proc *p)
+hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p)
  {
-       struct vnode *vp;
-       struct buf *bp;
-       struct buf *nbp;
-       int s;
+       vnode_t vp;
+       buf_t   bp;
+       int lockflags;
  
         vp = HFSTOVCB(hfsmp)->catalogRefNum;
  
-       if (hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p) != 0)
-               return (0);
+       // XXXdbg - don't need to do this on a journaled volume
+       if (hfsmp->jnl) {
+               return 0;
+       }
  
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
         /*
          * Look for a matching node that has been delayed
          * but is not part of a set (B_LOCKED).
+        *
+        * BLK_ONLYVALID causes buf_getblk to return a
+        * buf_t for the daddr64_t specified only if it's
+        * currently resident in the cache... the size
+        * parameter to buf_getblk is ignored when this flag
+        * is set
          */
-       s = splbio();
-       for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
-               nbp = bp->b_vnbufs.le_next;
-               if (bp->b_flags & B_BUSY)
-                       continue;
-               if (bp->b_lblkno == node) {
-                       if (bp->b_flags & B_LOCKED)
-                               break;
+       bp = buf_getblk(vp, node, 0, 0, 0, BLK_META | BLK_ONLYVALID);
  
-                       bremfree(bp);
-                       bp->b_flags |= B_BUSY;
-                       splx(s);
-                       (void) VOP_BWRITE(bp);
-                       goto exit;
-               }
+       if (bp) {
+               if ((buf_flags(bp) & (B_LOCKED | B_DELWRI)) == B_DELWRI)
+                       (void) VNOP_BWRITE(bp);
+               else
+                       buf_brelse(bp);
         }
-       splx(s);
-exit:
-       (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
+
+       hfs_systemfile_unlock(hfsmp, lockflags);
  
         return (0);
  }
  
+
+/*
+ * Sync all hfs B-trees.  Use this instead of journal_flush for a volume
+ * without a journal.  Note that the volume bitmap does not get written;
+ * we rely on fsck_hfs to fix that up (which it can do without any loss
+ * of data).
+ */
+static int
+hfs_metasync_all(struct hfsmount *hfsmp)
+{
+       int lockflags;
+
+       /* Lock all of the B-trees so we get a mutually consistent state */
+       lockflags = hfs_systemfile_lock(hfsmp,
+               SFL_CATALOG|SFL_EXTENTS|SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+
+       /* Sync each of the B-trees */
+       if (hfsmp->hfs_catalog_vp)
+               hfs_btsync(hfsmp->hfs_catalog_vp, 0);
+       if (hfsmp->hfs_extents_vp)
+               hfs_btsync(hfsmp->hfs_extents_vp, 0);
+       if (hfsmp->hfs_attribute_vp)
+               hfs_btsync(hfsmp->hfs_attribute_vp, 0);
+       
+       /* Wait for all of the writes to complete */
+       if (hfsmp->hfs_catalog_vp)
+               vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_metasync_all");
+       if (hfsmp->hfs_extents_vp)
+               vnode_waitforwrites(hfsmp->hfs_extents_vp, 0, 0, 0, "hfs_metasync_all");
+       if (hfsmp->hfs_attribute_vp)
+               vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs_metasync_all");
+
+       hfs_systemfile_unlock(hfsmp, lockflags);
+       
+       return 0;
+}
+
+
+/*ARGSUSED 1*/
+static int
+hfs_btsync_callback(struct buf *bp, __unused void *dummy)
+{
+       buf_clearflags(bp, B_LOCKED);
+       (void) buf_bawrite(bp);
+
+       return(BUF_CLAIMED);
+}
+
+
  __private_extern__
  int
  hfs_btsync(struct vnode *vp, int sync_transaction)
  {
         struct cnode *cp = VTOC(vp);
-       register struct buf *bp;
         struct timeval tv;
-       struct buf *nbp;
-       int s;
+       int    flags = 0;
  
+       if (sync_transaction)
+               flags |= BUF_SKIP_NONLOCKED;
         /*
          * Flush all dirty buffers associated with b-tree.
          */
-loop:
-       s = splbio();
-
-       for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
-               nbp = bp->b_vnbufs.le_next;
-               if ((bp->b_flags & B_BUSY))
-                       continue;
-               if ((bp->b_flags & B_DELWRI) == 0)
-                       panic("hfs_fsync: not dirty");
-               if (sync_transaction && !(bp->b_flags & B_LOCKED))
-                       continue;
-
-               bremfree(bp);
-               bp->b_flags |= B_BUSY;
-               bp->b_flags &= ~B_LOCKED;
-               splx(s);
-
-               (void) bawrite(bp);
+       buf_iterate(vp, hfs_btsync_callback, flags, 0);
  
-               goto loop;
-       }
-       splx(s);
-
-       tv = time;
-       if ((vp->v_flag & VSYSTEM) && (VTOF(vp)->fcbBTCBPtr != NULL))
+       microuptime(&tv);
+       if (vnode_issystem(vp) && (VTOF(vp)->fcbBTCBPtr != NULL))
                 (void) BTSetLastSync(VTOF(vp), tv.tv_sec);
-       cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE);
+       cp->c_touch_acctime = FALSE;
+       cp->c_touch_chgtime = FALSE;
+       cp->c_touch_modtime = FALSE;
  
         return 0;
  }
  
  /*
- * Rmdir system call.
-#% rmdir       dvp     L U U
-#% rmdir       vp      L U U
-#
- vop_rmdir {
-     IN WILLRELE struct vnode *dvp;
-     IN WILLRELE struct vnode *vp;
-     IN struct componentname *cnp;
-
+ * Remove a directory.
   */
  static int
-hfs_rmdir(ap)
-       struct vop_rmdir_args /* {
+hfs_vnop_rmdir(ap)
+       struct vnop_rmdir_args /* {
                 struct vnode *a_dvp;
                 struct vnode *a_vp;
                 struct componentname *a_cnp;
+               vfs_context_t a_context;
         } */ *ap;
  {
-       struct vnode *vp = ap->a_vp;
         struct vnode *dvp = ap->a_dvp;
-       struct proc *p = ap->a_cnp->cn_proc;
+       struct vnode *vp = ap->a_vp;
+       struct cnode *dcp = VTOC(dvp);
+       struct cnode *cp = VTOC(vp);
+       int error;
+
+       if (!S_ISDIR(cp->c_mode)) {
+               return (ENOTDIR);
+       }
+       if (dvp == vp) {
+               return (EINVAL);
+       }
+       if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) {
+               return (error);
+       }
+       error = hfs_removedir(dvp, vp, ap->a_cnp, 0);
+       hfs_unlockpair(dcp, cp);
+
+       return (error);
+}
+
+/*
+ * Remove a directory
+ *
+ * Both dvp and vp cnodes are locked
+ */
+static int
+hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
+              int skip_reserve)
+{
         struct cnode *cp;
         struct cnode *dcp;
         struct hfsmount * hfsmp;
-       struct timeval tv;
-       int error = 0;
+       struct cat_desc desc;
+       int lockflags;
+       int error = 0, started_tr = 0;
  
         cp = VTOC(vp);
         dcp = VTOC(dvp);
         hfsmp = VTOHFS(vp);
  
         if (dcp == cp) {
-               vrele(dvp);
-               vput(vp);
                 return (EINVAL);        /* cannot remove "." */
         }
+       if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
+               return (0);
+       }
+       if (cp->c_entries != 0) {
+               return (ENOTEMPTY);
+       }
+
+       /* Check if we're removing the last link to an empty directory. */
+       if (cp->c_flag & C_HARDLINK) {
+               /* We could also return EBUSY here */
+               return hfs_unlink(hfsmp, dvp, vp, cnp, skip_reserve);
+       }
+
+       if ((hfsmp->hfs_attribute_vp != NULL) &&
+           (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) {
+
+           return hfs_removefile(dvp, vp, cnp, 0, 0, 1);
+       }
+
+       dcp->c_flag |= C_DIR_MODIFICATION;
+
+#if QUOTA
+       if (hfsmp->hfs_flags & HFS_QUOTAS)
+               (void)hfs_getinoquota(cp);
+#endif
+       if ((error = hfs_start_transaction(hfsmp)) != 0) {
+           goto out;
+       }
+       started_tr = 1;
+
         /*
          * Verify the directory is empty (and valid).
          * (Rmdir ".." won't be valid since
@@ -1334,10 +1676,6 @@ hfs_rmdir(ap)
          *  the current directory and thus be
          *  non-empty.)
          */
-       if (cp->c_entries != 0) {
-               error = ENOTEMPTY;
-               goto out;
-       }
         if ((dcp->c_flags & APPEND) || (cp->c_flags & (IMMUTABLE | APPEND))) {
                 error = EPERM;
                 goto out;
@@ -1346,213 +1684,387 @@ hfs_rmdir(ap)
         /* Remove the entry from the namei cache: */
         cache_purge(vp);
  
-       /* Lock catalog b-tree */
-       error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p);
-       if (error) goto out;
+       /* 
+        * Protect against a race with rename by using the component
+        * name passed in and parent id from dvp (instead of using 
+        * the cp->c_desc which may have changed).
+        */
+       desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
+       desc.cd_namelen = cnp->cn_namelen;
+       desc.cd_parentcnid = dcp->c_fileid;
+       desc.cd_cnid = cp->c_cnid;
+       desc.cd_flags = CD_ISDIR;
+       desc.cd_encoding = cp->c_encoding;
+       desc.cd_hint = 0;
+
+       if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid)) {
+           error = 0;
+           goto out;
+       }
  
-       if (cp->c_entries > 0)
-               panic("hfs_rmdir: attempting to delete a non-empty directory!");
         /* Remove entry from catalog */
-       error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+       if (!skip_reserve) {
+               /*
+                * Reserve some space in the Catalog file.
+                */
+               if ((error = cat_preflight(hfsmp, CAT_DELETE, NULL, 0))) {
+                       hfs_systemfile_unlock(hfsmp, lockflags);
+                       goto out;
+               }
+       }
+
+       error = cat_delete(hfsmp, &desc, &cp->c_attr);
+       if (error == 0) {
+               /* The parent lost a child */
+               if (dcp->c_entries > 0)
+                       dcp->c_entries--;
+               DEC_FOLDERCOUNT(hfsmp, dcp->c_attr);
+               dcp->c_dirchangecnt++;
+               dcp->c_touch_chgtime = TRUE;
+               dcp->c_touch_modtime = TRUE;
+               hfs_touchtimes(hfsmp, cp);
+               (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+               cp->c_flag &= ~(C_MODIFIED | C_FORCEUPDATE);
+       }
+
+       hfs_systemfile_unlock(hfsmp, lockflags);
  
-       /* Unlock catalog b-tree */
-       (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
-       if (error) goto out;
+       if (error)
+               goto out;
  
  #if QUOTA
-       if (!hfs_getinoquota(cp))
+       if (hfsmp->hfs_flags & HFS_QUOTAS)
                 (void)hfs_chkiq(cp, -1, NOCRED, 0);
  #endif /* QUOTA */
  
-       /* The parent lost a child */
-       if (dcp->c_entries > 0)
-               dcp->c_entries--;
-       if (dcp->c_nlink > 0)
-               dcp->c_nlink--;
-       dcp->c_flag |= C_CHANGE | C_UPDATE;
-       tv = time;
-       (void) VOP_UPDATE(dvp, &tv, &tv, 0);
+       HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK | NOTE_ATTRIB);
+
         hfs_volupdate(hfsmp, VOL_RMDIR, (dcp->c_cnid == kHFSRootFolderID));
  
-       cp->c_mode = 0;  /* Makes the vnode go away...see inactive */
-       cp->c_flag |= C_NOEXISTS;
+       /*
+        * directory open or in use (e.g. opendir() or current working
+        * directory for some process); wait for inactive to actually
+        * remove catalog entry
+        */
+       if (vnode_isinuse(vp, 0)) {
+               cp->c_flag |= C_DELETED;
+       } else {
+               cp->c_mode = 0;  /* Makes the vnode go away...see inactive */
+               cp->c_flag |= C_NOEXISTS;
+       }
  out:
-       if (dvp) 
-               vput(dvp);
-       vput(vp);
-       return (error);
-}
+       dcp->c_flag &= ~C_DIR_MODIFICATION;
+       wakeup((caddr_t)&dcp->c_flag);
  
-/*
+       HFS_KNOTE(vp, NOTE_DELETE);
+
+       if (started_tr) { 
+           hfs_end_transaction(hfsmp);
+       }
  
-#% remove      dvp     L U U
-#% remove      vp      L U U
-#
- vop_remove {
-     IN WILLRELE struct vnode *dvp;
-     IN WILLRELE struct vnode *vp;
-     IN struct componentname *cnp;
+       return (error);
+}
  
-     */
  
+/*
+ * Remove a file or link.
+ */
  static int
-hfs_remove(ap)
-       struct vop_remove_args /* {
+hfs_vnop_remove(ap)
+       struct vnop_remove_args /* {
                 struct vnode *a_dvp;
                 struct vnode *a_vp;
                 struct componentname *a_cnp;
+               int a_flags;
+               vfs_context_t a_context;
         } */ *ap;
  {
-       struct vnode *vp = ap->a_vp;
         struct vnode *dvp = ap->a_dvp;
+       struct vnode *vp = ap->a_vp;
+       struct cnode *dcp = VTOC(dvp);
+       struct cnode *cp = VTOC(vp);
+       struct vnode *rvp = cp->c_rsrc_vp;
+       int error=0, recycle_rsrc=0, rvid=0;
+
+       if (dvp == vp) {
+               return (EINVAL);
+       }
+
+       hfs_lock_truncate(cp, TRUE);
+
+       if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) {
+               hfs_unlock_truncate(cp, TRUE);
+               return (error);
+       }
+       error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0);
+
+       //
+       // If the remove succeeded and it's an open-unlinked file that has
+       // a resource fork vnode that's not in use, we will want to recycle
+       // the rvp *after* we're done unlocking everything.  Otherwise the
+       // resource vnode will keep a v_parent reference on this vnode which
+       // prevents it from going through inactive/reclaim which means that
+       // the disk space associated with this file won't get free'd until
+       // something forces the resource vnode to get recycled (and that can
+       // take a very long time).
+       //
+       if (error == 0 && (cp->c_flag & C_DELETED) && rvp && !vnode_isinuse(rvp, 0)) {
+           rvid = vnode_vid(rvp);
+           recycle_rsrc = 1;
+       }
+
+       /*
+        * Drop the truncate lock before unlocking the cnode
+        * (which can potentially perform a vnode_put and
+        * recycle the vnode which in turn might require the
+        * truncate lock)
+        */
+       hfs_unlock_truncate(cp, TRUE);
+       hfs_unlockpair(dcp, cp);
+
+       if (recycle_rsrc && vnode_getwithvid(rvp, rvid) == 0) {
+               vnode_ref(rvp);
+               vnode_rele(rvp);
+               vnode_recycle(rvp);
+               vnode_put(rvp);
+       } 
+       
+       return (error);
+}
+
+
+static int
+hfs_removefile_callback(struct buf *bp, void *hfsmp) {
+
+        if ( !(buf_flags(bp) & B_META))
+               panic("hfs: symlink bp @ %p is not marked meta-data!\n", bp);
+       /*
+        * it's part of the current transaction, kill it.
+        */
+       journal_kill_block(((struct hfsmount *)hfsmp)->jnl, bp);
+
+       return (BUF_CLAIMED);
+}
+
+/*
+ * hfs_removefile
+ *
+ * Similar to hfs_vnop_remove except there are additional options.
+ *
+ * Requires cnode and truncate locks to be held.
+ */
+static int
+hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
+               int flags, int skip_reserve, int allow_dirs)
+{
         struct vnode *rvp = NULL;
         struct cnode *cp;
         struct cnode *dcp;
         struct hfsmount *hfsmp;
-       struct proc *p = current_proc();
+       struct cat_desc desc;
+       struct timeval tv;
+       vfs_context_t ctx = cnp->cn_context;
         int dataforkbusy = 0;
         int rsrcforkbusy = 0;
         int truncated = 0;
-       struct timeval tv;
+       int lockflags;
         int error = 0;
-
-       /* Redirect directories to rmdir */
-       if (vp->v_type == VDIR)
-               return (hfs_rmdir(ap));
+       int started_tr = 0;
+       int isbigfile = 0, defer_remove=0, isdir=0;
  
         cp = VTOC(vp);
         dcp = VTOC(dvp);
         hfsmp = VTOHFS(vp);
-       
-       if (cp->c_parentcnid != dcp->c_cnid) {
-               error = EINVAL;
-               goto out;
+
+       /* Check if we lost a race post lookup. */
+       if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
+               return (0);
         }
  
-       /* Make sure a remove is permitted */
-       if ((cp->c_flags & (IMMUTABLE | APPEND)) ||
-           (VTOC(dvp)->c_flags & APPEND) ||
-           VNODE_IS_RSRC(vp)) {
-               error = EPERM;
-               goto out;
-        }
+       if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid)) {
+           return 0;
+       }
  
+       /* Make sure a remove is permitted */
+       if (VNODE_IS_RSRC(vp)) {
+               return (EPERM);
+       }
+       /* Don't allow deleting the journal or journal_info_block. */
+       if (hfsmp->jnl &&
+           (cp->c_fileid == hfsmp->hfs_jnlfileid || cp->c_fileid == hfsmp->hfs_jnlinfoblkid)) {
+               return (EPERM);
+       }
         /*
-        * Aquire a vnode for a non-empty resource fork.
-        * (needed for VOP_TRUNCATE)
+        * Hard links require special handling.
          */
-       if (cp->c_blocks - VTOF(vp)->ff_blocks) {
-               error = hfs_vgetrsrc(hfsmp, vp, &rvp, p);
-               if (error)
-                       goto out;
+       if (cp->c_flag & C_HARDLINK) {
+               if ((flags & VNODE_REMOVE_NODELETEBUSY) && vnode_isinuse(vp, 0)) {
+                       return (EBUSY);
+               } else {
+                       /* A directory hard link with a link count of one is 
+                        * treated as a regular directory.  Therefore it should 
+                        * only be removed using rmdir().
+                        */
+                       if ((vnode_isdir(vp) == 1) && (cp->c_linkcount == 1) && 
+                           (allow_dirs == 0)) {
+                               return (EPERM);
+                       }
+                       return hfs_unlink(hfsmp, dvp, vp, cnp, skip_reserve);
+               }
+       }
+       /* Directories should call hfs_rmdir! (unless they have a lot of attributes) */
+       if (vnode_isdir(vp)) {
+               if (allow_dirs == 0)
+                       return (EPERM);  /* POSIX */
+               isdir = 1;
+       }
+       /* Sanity check the parent ids. */
+       if ((cp->c_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
+           (cp->c_parentcnid != dcp->c_fileid)) {
+               return (EINVAL);
         }
  
+       dcp->c_flag |= C_DIR_MODIFICATION;
+
+       // this guy is going away so mark him as such
+       cp->c_flag |= C_DELETED;
+
+
+       /* Remove our entry from the namei cache. */
+       cache_purge(vp);
+
         /*
-        * Check if this file is being used.
-        *
-        * The namei done for the remove took a reference on the
-        * vnode (vp).  And we took a ref on the resource vnode (rvp).
-        * Hence set 1 in the tookref parameter of ubc_isinuse().
+        * Acquire a vnode for a non-empty resource fork.
+        * (needed for hfs_truncate)
          */
-       if (UBCISVALID(vp) && ubc_isinuse(vp, 1))
-               dataforkbusy = 1;
-       if (rvp && UBCISVALID(rvp) && ubc_isinuse(rvp, 1))
-               rsrcforkbusy = 1;
+       if (isdir == 0 && (cp->c_blocks - VTOF(vp)->ff_blocks)) {
+               /*
+                * We must avoid calling hfs_vgetrsrc() when we have
+                * an active resource fork vnode to avoid deadlocks
+                * when that vnode is in the VL_TERMINATE state. We
+                * can defer removing the file and its resource fork
+                * until the call to hfs_vnop_inactive() occurs.
+                */
+               if (cp->c_rsrc_vp) {
+                       defer_remove = 1;
+               } else {
+                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE);
+                       if (error)
+                               goto out;
+                       /* Defer the vnode_put on rvp until the hfs_unlock(). */
+                       cp->c_flag |= C_NEED_RVNODE_PUT;
+               }
+       }
+       /* Check if this file is being used. */
+       if (isdir == 0) {
+               dataforkbusy = vnode_isinuse(vp, 0);
+               rsrcforkbusy = rvp ? vnode_isinuse(rvp, 0) : 0;
+       }
+       
+       /* Check if we have to break the deletion into multiple pieces. */
+       if (isdir == 0) {
+               isbigfile = ((cp->c_datafork->ff_size >= HFS_BIGFILE_SIZE) && overflow_extents(VTOF(vp)));
+       }
+
+       /* Check if the file has xattrs.  If it does we'll have to delete them in
+          individual transactions in case there are too many */
+       if ((hfsmp->hfs_attribute_vp != NULL) &&
+           (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) {
+           defer_remove = 1;
+       }
  
         /*
          * Carbon semantics prohibit deleting busy files.
-        * (enforced when NODELETEBUSY is requested)
+        * (enforced when VNODE_REMOVE_NODELETEBUSY is requested)
          */
-       if ((dataforkbusy || rsrcforkbusy) &&
-           ((ap->a_cnp->cn_flags & NODELETEBUSY) ||
-            (hfsmp->hfs_private_metadata_dir == 0))) {
-               error = EBUSY;
-               goto out;
+       if (dataforkbusy || rsrcforkbusy) {
+               if ((flags & VNODE_REMOVE_NODELETEBUSY) ||
+                   (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid == 0)) {
+                       error = EBUSY;
+                       goto out;
+               }
         }
  
-       /* Remove our entry from the namei cache. */
-       cache_purge(vp);
+#if QUOTA
+       if (hfsmp->hfs_flags & HFS_QUOTAS)
+               (void)hfs_getinoquota(cp);
+#endif /* QUOTA */
+
+       /* Check if we need a ubc_setsize. */
+       if (isdir == 0 && (!dataforkbusy || !rsrcforkbusy)) {
+               /*
+                * A ubc_setsize can cause a pagein so defer it
+                * until after the cnode lock is dropped.  The
+                * cnode lock cannot be dropped/reacquired here
+                * since we might already hold the journal lock.
+                */
+               if (!dataforkbusy && cp->c_datafork->ff_blocks && !isbigfile) {
+                       cp->c_flag |= C_NEED_DATA_SETSIZE;
+               }
+               if (!rsrcforkbusy && rvp) {
+                       cp->c_flag |= C_NEED_RSRC_SETSIZE;
+               }
+       }
+
+       if ((error = hfs_start_transaction(hfsmp)) != 0) {
+           goto out;
+       }
+       started_tr = 1;
+
+       // XXXdbg - if we're journaled, kill any dirty symlink buffers 
+       if (hfsmp->jnl && vnode_islnk(vp))
+               buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
  
         /*
          * Truncate any non-busy forks.  Busy forks will
-        * get trucated when their vnode goes inactive.
+        * get truncated when their vnode goes inactive.
          *
-        * (Note: hard links are truncated in VOP_INACTIVE)
+        * Since we're already inside a transaction,
+        * tell hfs_truncate to skip the ubc_setsize.
          */
-       if ((cp->c_flag & C_HARDLINK) == 0) {
+       if (isdir == 0) {
                 int mode = cp->c_mode;
  
-               if (!dataforkbusy && cp->c_datafork->ff_blocks != 0) {
-                       cp->c_mode = 0;  /* Suppress VOP_UPDATES */
-                       error = VOP_TRUNCATE(vp, (off_t)0, IO_NDELAY, NOCRED, p);
+               if (!dataforkbusy && !isbigfile && cp->c_datafork->ff_blocks != 0) {
+                       cp->c_mode = 0;  /* Suppress hfs_update */
+                       error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ctx);
                         cp->c_mode = mode;
                         if (error)
                                 goto out;
                         truncated = 1;
                 }
                 if (!rsrcforkbusy && rvp) {
-                       cp->c_mode = 0;            /* Suppress VOP_UPDATES */
-                       error = VOP_TRUNCATE(rvp, (off_t)0, IO_NDELAY, NOCRED, p);
+                       cp->c_mode = 0;  /* Suppress hfs_update */
+                       error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, ctx);
                         cp->c_mode = mode;
-                       if (error && !dataforkbusy)
+                       if (error)
                                 goto out;
-                       else {
-                               /*
-                                * XXX could also force an update on vp
-                                * and fail the remove.
-                                */
-                               error = 0;
-                       }
-                       truncated = 1;
-               }
-       }
-       /*
-        * There are 3 remove cases to consider:
-        *   1. File is a hardlink    ==> remove the link
-        *   2. File is busy (in use) ==> move/rename the file
-        *   3. File is not in use    ==> remove the file
-        */
-
-       if (cp->c_flag & C_HARDLINK) {
-               struct cat_desc desc;
-
-               if ((ap->a_cnp->cn_flags & HASBUF) == 0 ||
-                   ap->a_cnp->cn_nameptr[0] == '\0') {
-                       error = ENOENT; /* name missing! */
-                       goto out;
+                       truncated = 1;
                 }
+       }
  
-               /* Setup a descriptor for the link */
-               bzero(&desc, sizeof(desc));
-               desc.cd_nameptr = ap->a_cnp->cn_nameptr;
-               desc.cd_namelen = ap->a_cnp->cn_namelen;
-               desc.cd_parentcnid = dcp->c_cnid;
-               /* XXX - if cnid is out of sync then the wrong thread rec will get deleted. */
-               desc.cd_cnid = cp->c_cnid;
-
-               /* Lock catalog b-tree */
-               error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p);
-               if (error)
-                       goto out;
-
-               error = cat_delete(hfsmp, &desc, &cp->c_attr);
-
-               /* Unlock the Catalog */
-               (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
-
-               /* All done with component name... */
-               if ((ap->a_cnp->cn_flags & (HASBUF | SAVENAME)) == (HASBUF | SAVENAME))
-                       FREE_ZONE(ap->a_cnp->cn_pnbuf, ap->a_cnp->cn_pnlen, M_NAMEI);
-
-               if (error != 0)
-                       goto out;
-
-               cp->c_flag |= C_CHANGE;
-                if (--cp->c_nlink < 1)
-                       cp->c_flag |= C_DELETED;
-               hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID));
+       /* 
+        * Protect against a race with rename by using the component
+        * name passed in and parent id from dvp (instead of using 
+        * the cp->c_desc which may have changed).  
+        */
+       desc.cd_flags = 0;
+       desc.cd_encoding = cp->c_desc.cd_encoding;
+       desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
+       desc.cd_namelen = cnp->cn_namelen;
+       desc.cd_parentcnid = dcp->c_fileid;
+       desc.cd_hint = cp->c_desc.cd_hint;
+       desc.cd_cnid = cp->c_cnid;
+       microtime(&tv);
  
-       } else if (dataforkbusy || rsrcforkbusy) {
+       /*
+        * There are two cases to consider:
+        *  1. File is busy/big/defer_remove ==> move/rename the file
+        *  2. File is not in use ==> remove the file
+        */
+       if (dataforkbusy || rsrcforkbusy || isbigfile || defer_remove) {
                 char delname[32];
                 struct cat_desc to_desc;
                 struct cat_desc todir_desc;
@@ -1563,101 +2075,148 @@ hfs_remove(ap)
                 bzero(&todir_desc, sizeof(todir_desc));
                 todir_desc.cd_parentcnid = 2;
  
-               MAKE_DELETED_NAME(delname, cp->c_fileid);               
+               MAKE_DELETED_NAME(delname, sizeof(delname), cp->c_fileid);
                 bzero(&to_desc, sizeof(to_desc));
-               to_desc.cd_nameptr = delname;
+               to_desc.cd_nameptr = (const u_int8_t *)delname;
                 to_desc.cd_namelen = strlen(delname);
-               to_desc.cd_parentcnid = hfsmp->hfs_private_metadata_dir;
+               to_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
                 to_desc.cd_flags = 0;
                 to_desc.cd_cnid = cp->c_cnid;
  
-               /* Lock catalog b-tree */
-               error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p);
-               if (error) goto out;
+               lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+               if (!skip_reserve) {
+                       if ((error = cat_preflight(hfsmp, CAT_RENAME, NULL, 0))) {
+                               hfs_systemfile_unlock(hfsmp, lockflags);
+                               goto out;
+                       }
+               }
  
-               error = cat_rename(hfsmp, &cp->c_desc, &todir_desc,
+               error = cat_rename(hfsmp, &desc, &todir_desc,
                                 &to_desc, (struct cat_desc *)NULL);
  
-               hfsmp->hfs_privdir_attr.ca_entries++;
-               (void)cat_update(hfsmp, &hfsmp->hfs_privdir_desc,
-                               &hfsmp->hfs_privdir_attr, NULL, NULL);
-
-               /* Unlock the Catalog */
-               (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
-               if (error) goto out;
+               if (error == 0) {
+                       hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries++;
+                       if (isdir == 1) {
+                               INC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+                       }
+                       (void) cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
+                                         &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
+
+                       /* Update the parent directory */
+                       if (dcp->c_entries > 0)
+                               dcp->c_entries--;
+                       if (isdir == 1) {
+                               DEC_FOLDERCOUNT(hfsmp, dcp->c_attr);
+                       }
+                       dcp->c_dirchangecnt++;
+                       dcp->c_ctime = tv.tv_sec;
+                       dcp->c_mtime = tv.tv_sec;
+                       (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
  
-               cp->c_flag |= C_CHANGE | C_DELETED | C_NOEXISTS;
-               --cp->c_nlink;
+                       /* Update the file's state */
+                       cp->c_flag |= C_DELETED;
+                       cp->c_ctime = tv.tv_sec;
+                       --cp->c_linkcount;
+                       (void) cat_update(hfsmp, &to_desc, &cp->c_attr, NULL, NULL);
+               }
+               hfs_systemfile_unlock(hfsmp, lockflags);
+               if (error)
+                       goto out;
  
         } else /* Not busy */ {
  
-               /* Lock catalog b-tree */
-               error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p);
-               if (error) goto out;
-
-               if (vp->v_type == VDIR && cp->c_entries > 0)
-                       panic("hfs_remove: attempting to delete a non-empty directory!");
-               if (vp->v_type != VDIR && cp->c_blocks > 0)
-                       panic("hfs_remove: attempting to delete a non-empty file!");
+               if (cp->c_blocks > 0) {
+                       printf("hfs_remove: attempting to delete a non-empty file %s\n",
+                               cp->c_desc.cd_nameptr);
+                       error = EBUSY;
+                       goto out;
+               }
  
-               error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
+               lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+               if (!skip_reserve) {
+                       if ((error = cat_preflight(hfsmp, CAT_DELETE, NULL, 0))) {
+                               hfs_systemfile_unlock(hfsmp, lockflags);
+                               goto out;
+                       }
+               }
  
-               if (error && truncated)
-                       panic("hfs_remove: couldn't delete a truncated file!");
+               error = cat_delete(hfsmp, &desc, &cp->c_attr);
  
-               /* Unlock the Catalog */
-               (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
-               if (error) goto out;
+               if (error && error != ENXIO && error != ENOENT && truncated) {
+                       if ((cp->c_datafork && cp->c_datafork->ff_size != 0) ||
+                               (cp->c_rsrcfork && cp->c_rsrcfork->ff_size != 0)) {
+                               panic("hfs: remove: couldn't delete a truncated file! (%d, data sz %lld; rsrc sz %lld)",
+                                         error, cp->c_datafork->ff_size, cp->c_rsrcfork->ff_size);
+                       } else {
+                               printf("hfs: remove: strangely enough, deleting truncated file %s (%d) got err %d\n",
+                                          cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error);
+                       }
+               }
+               if (error == 0) {
+                       /* Update the parent directory */
+                       if (dcp->c_entries > 0)
+                               dcp->c_entries--;
+                       dcp->c_dirchangecnt++;
+                       dcp->c_ctime = tv.tv_sec;
+                       dcp->c_mtime = tv.tv_sec;
+                       (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+               }
+               hfs_systemfile_unlock(hfsmp, lockflags);
+               if (error)
+                       goto out;
  
  #if QUOTA
-               if (!hfs_getinoquota(cp))
+               if (hfsmp->hfs_flags & HFS_QUOTAS)
                         (void)hfs_chkiq(cp, -1, NOCRED, 0);
  #endif /* QUOTA */
  
                 cp->c_mode = 0;
-               cp->c_flag |= C_CHANGE | C_NOEXISTS;
-               --cp->c_nlink;
+               truncated  = 0;    // because the catalog entry is gone
+               cp->c_flag |= C_NOEXISTS;
+               cp->c_flag &= ~C_DELETED;
+               cp->c_touch_chgtime = TRUE;   /* XXX needed ? */
+               --cp->c_linkcount;
+
                 hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID));
         }
  
         /*
          * All done with this cnode's descriptor...
          *
-        * Note: all future catalog calls for this cnode must be
-        * by fileid only.  This is OK for HFS (which doesn't have
-        * file thread records) since HFS doesn't support hard
-        * links or the removal of busy files.
+        * Note: all future catalog calls for this cnode must be by
+        * fileid only.  This is OK for HFS (which doesn't have file
+        * thread records) since HFS doesn't support the removal of
+        * busy files.
          */
         cat_releasedesc(&cp->c_desc);
  
-       /* In all three cases the parent lost a child */
-       if (dcp->c_entries > 0)
-               dcp->c_entries--;
-       if (dcp->c_nlink > 0)
-               dcp->c_nlink--;
-       dcp->c_flag |= C_CHANGE | C_UPDATE;
-       tv = time;
-       (void) VOP_UPDATE(dvp, &tv, &tv, 0);
-
-       if (rvp)
-               vrele(rvp);
-       VOP_UNLOCK(vp, 0, p);
-       (void) ubc_uncache(vp);
-       vrele(vp);
-       vput(dvp);
-       return (0);
+       HFS_KNOTE(dvp, NOTE_WRITE);
+
  out:
-       if (rvp)
-               vrele(rvp);
-       
+       if (error) {
+           cp->c_flag &= ~C_DELETED;
+       }
+
         /* Commit the truncation to the catalog record */
         if (truncated) {
-               cp->c_flag |= C_CHANGE | C_UPDATE;
-               tv = time;
-               (void) VOP_UPDATE(vp, &tv, &tv, 0);
+           cp->c_flag |= C_FORCEUPDATE;
+           cp->c_touch_chgtime = TRUE;
+           cp->c_touch_modtime = TRUE;
+           (void) hfs_update(vp, 0);
         }
-       vput(vp);
-       vput(dvp);
+
+       if (started_tr) {
+           hfs_end_transaction(hfsmp);
+       }
+
+       dcp->c_flag &= ~C_DIR_MODIFICATION;
+       wakeup((caddr_t)&dcp->c_flag);
+
+       HFS_KNOTE(vp, NOTE_DELETE);
+       if (rvp) {
+               HFS_KNOTE(rvp, NOTE_DELETE);
+       }
+
         return (error);
  }
  
@@ -1665,14 +2224,19 @@ out:
  __private_extern__ void
  replace_desc(struct cnode *cp, struct cat_desc *cdp)
  {
+       // fixes 4348457 and 4463138
+       if (&cp->c_desc == cdp) {
+           return;
+       }
+
         /* First release allocated name buffer */
         if (cp->c_desc.cd_flags & CD_HASBUF && cp->c_desc.cd_nameptr != 0) {
-               char *name = cp->c_desc.cd_nameptr;
+               const u_int8_t *name = cp->c_desc.cd_nameptr;
  
                 cp->c_desc.cd_nameptr = 0;
                 cp->c_desc.cd_namelen = 0;
                 cp->c_desc.cd_flags &= ~CD_HASBUF;
-               FREE(name, M_TEMP);
+               vfs_removename((const char *)name);
         }
         bcopy(cdp, &cp->c_desc, sizeof(cp->c_desc));
  
@@ -1683,660 +2247,936 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp)
  }
  
  
-/*
-#
-#% rename      fdvp    U U U
-#% rename      fvp     U U U
-#% rename      tdvp    L U U
-#% rename      tvp     X U U
-#
-       vop_rename {
-               IN WILLRELE struct vnode *fdvp;
-               IN WILLRELE struct vnode *fvp;
-               IN struct componentname *fcnp;
-               IN WILLRELE struct vnode *tdvp;
-               IN WILLRELE struct vnode *tvp;
-               IN struct componentname *tcnp;
-       };
-*/
  /*
   * Rename a cnode.
   *
- * The VFS layer guarantees that source and destination will
- * either both be directories, or both not be directories.
+ * The VFS layer guarantees that:
+ *   - source and destination will either both be directories, or
+ *     both not be directories.
+ *   - all the vnodes are from the same file system
   *
- * When the target is a directory, hfs_rename must ensure
- * that it is empty.
+ * When the target is a directory, HFS must ensure that its empty.
   */
-
  static int
-hfs_rename(ap)
-       struct vop_rename_args  /* {
+hfs_vnop_rename(ap)
+       struct vnop_rename_args  /* {
                 struct vnode *a_fdvp;
                 struct vnode *a_fvp;
                 struct componentname *a_fcnp;
                 struct vnode *a_tdvp;
                 struct vnode *a_tvp;
                 struct componentname *a_tcnp;
+               vfs_context_t a_context;
         } */ *ap;
  {
         struct vnode *tvp = ap->a_tvp;
         struct vnode *tdvp = ap->a_tdvp;
         struct vnode *fvp = ap->a_fvp;
         struct vnode *fdvp = ap->a_fdvp;
+       struct vnode *rvp = NULLVP;
         struct componentname *tcnp = ap->a_tcnp;
         struct componentname *fcnp = ap->a_fcnp;
-       struct cnode *fcp = NULL;
-       struct cnode *fdcp = NULL;
-       struct cnode *tdcp = NULL;
-       struct cnode *tcp = NULL;
+       struct proc *p = vfs_context_proc(ap->a_context);
+       struct cnode *fcp;
+       struct cnode *fdcp;
+       struct cnode *tdcp;
+       struct cnode *tcp;
         struct cat_desc from_desc;
         struct cat_desc to_desc;
         struct cat_desc out_desc;
         struct hfsmount *hfsmp;
-       struct proc *p = fcnp->cn_proc;
-       struct timeval tv;
-       int retval = 0;
-       cnid_t oldparent = 0;
-       cnid_t newparent = 0;
+       cat_cookie_t cookie;
+       int tvp_deleted = 0;
+       int started_tr = 0, got_cookie = 0;
+       int took_trunc_lock = 0;
+       int lockflags;
+       int error;
+       int rsrc_vid = 0;
+       int recycle_rsrc = 0;
+       
+       /* When tvp exist, take the truncate lock for the hfs_removefile(). */
+       if (tvp && (vnode_isreg(tvp) || vnode_islnk(tvp))) {
+               hfs_lock_truncate(VTOC(tvp), TRUE);
+               took_trunc_lock = 1;
+       }
+
+  retry:
+       error = hfs_lockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL,
+                            HFS_EXCLUSIVE_LOCK);
+       if (error) {
+               if (took_trunc_lock)
+                       hfs_unlock_truncate(VTOC(tvp), TRUE);   
+               return (error);
+       }
+
+       fdcp = VTOC(fdvp);
+       fcp = VTOC(fvp);
+       tdcp = VTOC(tdvp);
+       tcp = tvp ? VTOC(tvp) : NULL;
+       hfsmp = VTOHFS(tdvp);
+
+       /* Check for a race against unlink. */
+       if ((fcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, fdvp, fcnp, fcp->c_fileid)) {
+               error = ENOENT;
+               goto out;
+       }
+
+       if (tcp && ((tcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, tdvp, tcnp, tcp->c_fileid))) {
+           //
+           // hmm, the destination vnode isn't valid any more.
+           // in this case we can just drop him and pretend he
+           // never existed in the first place.
+           //
+           if (took_trunc_lock) {
+               hfs_unlock_truncate(VTOC(tvp), TRUE);
+               took_trunc_lock = 0;
+           }
+
+           hfs_unlockfour(fdcp, fcp, tdcp, tcp);
+
+           tcp = NULL;
+           tvp = NULL;
+           
+           // retry the locking with tvp null'ed out
+           goto retry;
+       }
+
+       fdcp->c_flag |= C_DIR_MODIFICATION;
+       if (fdvp != tdvp) {
+           tdcp->c_flag |= C_DIR_MODIFICATION;
+       }
  
-#if HFS_DIAGNOSTIC
-    if ((tcnp->cn_flags & HASBUF) == 0 ||
-        (fcnp->cn_flags & HASBUF) == 0)
-        panic("hfs_rename: no name");
-#endif
         /*
-        * When fvp matches tvp they must be case variants
-        * or hard links, and if they are in the same directory then
-        * tvp really doesn't exist (see VFS rename).
-        * XXX Hard link rename is still broken/ignored.  If they are
-        * in different directories then we must have hard links.
-        * Comments further down describe behaviour of hard links in same dir.
-        * Note case insensitivity was and still is presumed.
+        * Disallow renaming of a directory hard link if the source and 
+        * destination parent directories are different, or a directory whose 
+        * descendant is a directory hard link and the one of the ancestors
+        * of the destination directory is a directory hard link.
          */
-       if (fvp == tvp) {
-               if (fdvp != tdvp) {
-                       retval = 0;
-                       goto abortop;
+       if (vnode_isdir(fvp) && (fdvp != tdvp)) {
+               if (fcp->c_flag & C_HARDLINK) {
+                       error = EPERM;
+                       goto out;
+               }
+               if (fcp->c_attr.ca_recflags & kHFSHasChildLinkMask) {
+                   lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+                   if (cat_check_link_ancestry(hfsmp, tdcp->c_fileid, 0)) {
+                               error = EPERM;
+                               hfs_systemfile_unlock(hfsmp, lockflags);
+                               goto out;
+                       }
+                       hfs_systemfile_unlock(hfsmp, lockflags);
                 }
-               tvp = NULL;
         }
-        
+
         /*
-        * Check for cross-device rename.
+        * The following edge case is caught here:
+        * (to cannot be a descendent of from)
+        *
+        *       o fdvp
+        *      /
+        *     /
+        *    o fvp
+        *     \
+        *      \
+        *       o tdvp
+        *      /
+        *     /
+        *    o tvp
          */
-       if ((fvp->v_mount != tdvp->v_mount) ||
-           (tvp && (fvp->v_mount != tvp->v_mount))) {
-               retval = EXDEV;
-               goto abortop;
+       if (tdcp->c_parentcnid == fcp->c_fileid) {
+               error = EINVAL;
+               goto out;
         }
  
         /*
-        * Make sure a remove of "to" vnode is permitted.
+        * The following two edge cases are caught here:
+        * (note tvp is not empty)
+        *
+        *       o tdvp               o tdvp
+        *      /                    /
+        *     /                    /
+        *    o tvp            tvp o fdvp
+        *     \                    \
+        *      \                    \
+        *       o fdvp               o fvp
+        *      /
+        *     /
+        *    o fvp
          */
-       if (tvp && ((VTOC(tvp)->c_flags & (IMMUTABLE | APPEND)) ||
-           (VTOC(tdvp)->c_flags & APPEND))) {
-               retval = EPERM;
-               goto abortop;
+       if (tvp && vnode_isdir(tvp) && (tcp->c_entries != 0) && fvp != tvp) {
+               error = ENOTEMPTY;
+               goto out;
         }
  
-       if ((retval = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p)))
-               goto abortop;
+       /*
+        * The following edge case is caught here:
+        * (the from child and parent are the same)
+        *
+        *          o tdvp
+        *         /
+        *        /
+        *  fdvp o fvp
+        */
+       if (fdvp == fvp) {
+               error = EINVAL;
+               goto out;
+       }
  
         /*
          * Make sure "from" vnode and its parent are changeable.
          */
-       fdcp = VTOC(fdvp);
-       fcp = VTOC(fvp);
-       oldparent = fdcp->c_cnid;
         if ((fcp->c_flags & (IMMUTABLE | APPEND)) || (fdcp->c_flags & APPEND)) {
-               VOP_UNLOCK(fvp, 0, p);
-               retval = EPERM;
-               goto abortop;
+               error = EPERM;
+               goto out;
         }
  
-       if (fcp->c_parentcnid != fdcp->c_cnid) {
-               VOP_UNLOCK(fvp, 0, p);
-               retval = EINVAL;
-               goto abortop;
+       /*
+        * If the destination parent directory is "sticky", then the
+        * user must own the parent directory, or the destination of
+        * the rename, otherwise the destination may not be changed
+        * (except by root). This implements append-only directories.
+        *
+        * Note that checks for immutable and write access are done
+        * by the call to hfs_removefile.
+        */
+       if (tvp && (tdcp->c_mode & S_ISTXT) &&
+           (suser(vfs_context_ucred(tcnp->cn_context), NULL)) &&
+           (kauth_cred_getuid(vfs_context_ucred(tcnp->cn_context)) != tdcp->c_uid) &&
+           (hfs_owner_rights(hfsmp, tcp->c_uid, vfs_context_ucred(tcnp->cn_context), p, false)) ) {
+               error = EPERM;
+               goto out;
         }
  
+#if QUOTA
+       if (tvp)
+               (void)hfs_getinoquota(tcp);
+#endif
+       /* Preflighting done, take fvp out of the name space. */
+       cache_purge(fvp);
+
         /*
-        * Check if names already match...
-        * XXX The name being checked is from fcp rather than fcnp!  If
-        * there are hard links, fcp yields the name which was
-        * most recently looked up (yes that design is vulnerable to races)
-        * and the name most recently looked up was the target, so they
-        * compare equal and we ignore the rename.  XXX
+        * When a file moves out of "Cleanup At Startup"
+        * we can drop its NODUMP status.
          */
-       if (fvp == ap->a_tvp &&
-           (bcmp(fcp->c_desc.cd_nameptr, tcnp->cn_nameptr,
-            fcp->c_desc.cd_namelen) == 0)) {
-               VOP_UNLOCK(fvp, 0, p);
-               retval = 0;
-               goto abortop;
+       if ((fcp->c_flags & UF_NODUMP) &&
+           vnode_isreg(fvp) &&
+           (fdvp != tdvp) &&
+           (fdcp->c_desc.cd_nameptr != NULL) &&
+           (strncmp((const char *)fdcp->c_desc.cd_nameptr,
+                    CARBON_TEMP_DIR_NAME,
+                    sizeof(CARBON_TEMP_DIR_NAME)) == 0)) {
+               fcp->c_flags &= ~UF_NODUMP;
+               fcp->c_touch_chgtime = TRUE;
+               (void) hfs_update(fvp, 0);
         }
  
-       /* XXX This doesn't make sense for HFS...
-        * 
-        * Be sure we are not renaming ".", "..", or an alias of ".". This
-        * leads to a crippled directory tree.  It's pretty tough to do a
-        * "ls" or "pwd" with the "." directory entry missing, and "cd .."
-        * doesn't work if the ".." entry is missing.
+       bzero(&from_desc, sizeof(from_desc));
+       from_desc.cd_nameptr = (const u_int8_t *)fcnp->cn_nameptr;
+       from_desc.cd_namelen = fcnp->cn_namelen;
+       from_desc.cd_parentcnid = fdcp->c_fileid;
+       from_desc.cd_flags = fcp->c_desc.cd_flags & ~(CD_HASBUF | CD_DECOMPOSED);
+       from_desc.cd_cnid = fcp->c_cnid;
+
+       bzero(&to_desc, sizeof(to_desc));
+       to_desc.cd_nameptr = (const u_int8_t *)tcnp->cn_nameptr;
+       to_desc.cd_namelen = tcnp->cn_namelen;
+       to_desc.cd_parentcnid = tdcp->c_fileid;
+       to_desc.cd_flags = fcp->c_desc.cd_flags & ~(CD_HASBUF | CD_DECOMPOSED);
+       to_desc.cd_cnid = fcp->c_cnid;
+
+       if ((error = hfs_start_transaction(hfsmp)) != 0) {
+           goto out;
+       }
+       started_tr = 1;
+
+       /* hfs_vnop_link() and hfs_vnop_rename() set kHFSHasChildLinkMask 
+        * inside a journal transaction and without holding a cnode lock.  
+        * As setting of this bit depends on being in journal transaction for 
+        * concurrency, check this bit again after we start journal transaction for rename
+        * to ensure that this directory does not have any descendant that
+        * is a directory hard link. 
          */
-       if (fvp->v_type == VDIR) {
-               if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.')
-                       || fdcp == fcp
-                       || (fcnp->cn_flags&ISDOTDOT)
-                       || (fcp->c_flag & C_RENAME)) {
-                       VOP_UNLOCK(fvp, 0, p);
-                       retval = EINVAL;
-                       goto abortop;
+       if (vnode_isdir(fvp) && (fdvp != tdvp)) {
+               if (fcp->c_attr.ca_recflags & kHFSHasChildLinkMask) {
+                   lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+                   if (cat_check_link_ancestry(hfsmp, tdcp->c_fileid, 0)) {
+                               error = EPERM;
+                               hfs_systemfile_unlock(hfsmp, lockflags);
+                               goto out;
+                       }
+                       hfs_systemfile_unlock(hfsmp, lockflags);
                 }
-               fcp->c_flag |= C_RENAME;
         }
  
-       /* XXX UFS does vrele(fdvp) here */
-
-       /* From now on use bad instead of abort to exit */
+       // if it's a hardlink then re-lookup the name so
+       // that we get the correct cnid in from_desc (see
+       // the comment in hfs_removefile for more details)
+       //
+       if (fcp->c_flag & C_HARDLINK) {
+           struct cat_desc tmpdesc;
+           cnid_t real_cnid;
+
+           tmpdesc.cd_nameptr = (const u_int8_t *)fcnp->cn_nameptr;
+           tmpdesc.cd_namelen = fcnp->cn_namelen;
+           tmpdesc.cd_parentcnid = fdcp->c_fileid;
+           tmpdesc.cd_hint = fdcp->c_childhint;
+           tmpdesc.cd_flags = fcp->c_desc.cd_flags & CD_ISDIR;
+           tmpdesc.cd_encoding = 0;
+           
+           lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+           if (cat_lookup(hfsmp, &tmpdesc, 0, NULL, NULL, NULL, &real_cnid) != 0) {
+               hfs_systemfile_unlock(hfsmp, lockflags);
+               goto out;
+           }
  
-       tdcp = VTOC(tdvp);
-       if (tvp)
-               tcp = VTOC(tvp);
+           // use the real cnid instead of whatever happened to be there
+           from_desc.cd_cnid = real_cnid;
+           hfs_systemfile_unlock(hfsmp, lockflags);
+       }
  
-       newparent = tdcp->c_cnid;
-       
-       retval = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
-       if ((fvp->v_type == VDIR) && (newparent != oldparent)) {
-               if (retval)             /* write access check above */
-                       goto bad;
+       /*
+        * Reserve some space in the Catalog file.
+        */
+       if ((error = cat_preflight(hfsmp, CAT_RENAME + CAT_DELETE, &cookie, p))) {
+               goto out;
         }
-       retval = 0;  /* Reset value from above, we dont care about it anymore */
-       
+       got_cookie = 1;
+
         /*
-        * If the destination exists, then be sure its type (file or dir)
-        * matches that of the source.  And, if it is a directory make sure
-        * it is empty.  Then delete the destination.
+        * If the destination exists then it may need to be removed.
          */
         if (tvp) {
                 /*
-                * If the parent directory is "sticky", then the user must
-                * own the parent directory, or the destination of the rename,
-                * otherwise the destination may not be changed (except by
-                * root). This implements append-only directories.
-                */
-               if ((tdcp->c_mode & S_ISTXT) && (tcnp->cn_cred->cr_uid != 0) &&
-                   tcnp->cn_cred->cr_uid != tdcp->c_uid &&
-                   tcnp->cn_cred->cr_uid != tcp->c_uid) {
-                       retval = EPERM;
-                       goto bad;
-               }
-
-               /*
-                * Target must be empty if a directory.
-                */
-               if (S_ISDIR(tcp->c_mode) && (tcp->c_nlink > 2)) {
-                               retval = ENOTEMPTY;
-                               goto bad;
-               }
-
-               /*
-                * VOP_REMOVE will vput tdvp so we better bump 
-                * its ref count and relockit, always set tvp to
-                * NULL afterwards to indicate that were done with it.
+                * When fvp matches tvp they could be case variants
+                * or matching hard links.
                  */
-               VREF(tdvp);
+               if (fvp == tvp) {
+                       if (!(fcp->c_flag & C_HARDLINK)) {
+                               goto skip_rm;  /* simple case variant */
  
-               cache_purge(tvp);
-            
-               tcnp->cn_flags &= ~SAVENAME;
+                       } else if ((fdvp != tdvp) ||
+                                  (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)) {
+                               goto out;  /* matching hardlinks, nothing to do */
  
-               if (tvp->v_type == VDIR)
-                       retval = VOP_RMDIR(tdvp, tvp, tcnp);
-               else
-                       retval = VOP_REMOVE(tdvp, tvp, tcnp);
+                       } else if (hfs_namecmp((const u_int8_t *)fcnp->cn_nameptr, fcnp->cn_namelen,
+                                              (const u_int8_t *)tcnp->cn_nameptr, tcnp->cn_namelen) == 0) {
+                               goto skip_rm;  /* case-variant hardlink in the same dir */
+                       } else {
+                               goto out;  /* matching hardlink, nothing to do */
+                       }
+               }
  
-               (void) vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY, p);
-               tvp = NULL;
-               tcp = NULL;             
-               if (retval)
-                       goto bad;
+               if (vnode_isdir(tvp))
+                       error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_SKIP_RESERVE);
+               else {
+                       if (tcp){
+                               rvp = tcp->c_rsrc_vp;
+                       }
+                       error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE, 0);
+                               
+                       /* If the destination file had a resource fork vnode, we couldn't do 
+                        * anything about it in hfs_removefile because we didn't have a reference on it.  
+                        * We need to take action here to prevent it from leaking blocks.  If removefile 
+                        * succeeded, then squirrel away the vid of the resource fork vnode and force a 
+                        * recycle after dropping all of the locks. The vid is guaranteed not to change 
+                        * at this point because we still hold the cnode lock.
+                        */
+                       if ((error == 0) && (tcp->c_flag & C_DELETED) && rvp && !vnode_isinuse(rvp, 0)) {
+                               rsrc_vid = vnode_vid(rvp);      
+                               recycle_rsrc = 1;
+                       }
+               }
  
+               if (error)
+                       goto out;
+               tvp_deleted = 1;
         }
-
-       /* XXX
-        * Prevent lock heirarchy violation (deadlock):
-        *
-        * If fdvp is the parent of tdvp then we must drop
-        * tdvp lock before aquiring the lock for fdvp.
+skip_rm:
+       /*
+        * All done with tvp and fvp
          */
-       if (newparent != oldparent)
-               vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, p);
  
-       /* remove the existing entry from the namei cache: */
-       cache_purge(fvp);
-
-       hfsmp = VTOHFS(fvp);
-       bzero(&from_desc, sizeof(from_desc));
-       from_desc.cd_nameptr = fcnp->cn_nameptr;
-       from_desc.cd_namelen = fcnp->cn_namelen;
-       from_desc.cd_parentcnid = fdcp->c_cnid;
-       from_desc.cd_flags = fcp->c_desc.cd_flags & ~(CD_HASBUF | CD_DECOMPOSED);
-       from_desc.cd_cnid = fcp->c_cnid;
-       bzero(&to_desc, sizeof(to_desc));
-       to_desc.cd_nameptr = tcnp->cn_nameptr;
-       to_desc.cd_namelen = tcnp->cn_namelen;
-       to_desc.cd_parentcnid = tdcp->c_cnid;
-       to_desc.cd_flags = fcp->c_desc.cd_flags & ~(CD_HASBUF | CD_DECOMPOSED);
-       to_desc.cd_cnid = fcp->c_cnid;
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+       error = cat_rename(hfsmp, &from_desc, &tdcp->c_desc, &to_desc, &out_desc);
+       hfs_systemfile_unlock(hfsmp, lockflags);
  
-       /* Lock catalog b-tree */
-       retval = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p);
-       if (retval) {
-               if (newparent != oldparent)  /* unlock the lock we just got */
-                       VOP_UNLOCK(fdvp, 0, p);
-                goto bad;
-       }
-       retval = cat_rename(hfsmp, &from_desc, &tdcp->c_desc,
-                       &to_desc, &out_desc);
+       if (error) {
+               goto out;
+       }
  
-       /* Unlock catalog b-tree */
-       (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
+       /* Invalidate negative cache entries in the destination directory */
+       if (tdcp->c_flag & C_NEG_ENTRIES) {
+               cache_purge_negatives(tdvp);
+               tdcp->c_flag &= ~C_NEG_ENTRIES;
+       }
  
-       if (newparent != oldparent)
-               VOP_UNLOCK(fdvp, 0, p);
+       /* Update cnode's catalog descriptor */
+       replace_desc(fcp, &out_desc);
+       fcp->c_parentcnid = tdcp->c_fileid;
+       fcp->c_hint = 0;
+
+       hfs_volupdate(hfsmp, vnode_isdir(fvp) ? VOL_RMDIR : VOL_RMFILE,
+                     (fdcp->c_cnid == kHFSRootFolderID));
+       hfs_volupdate(hfsmp, vnode_isdir(fvp) ? VOL_MKDIR : VOL_MKFILE,
+                     (tdcp->c_cnid == kHFSRootFolderID));
+
+       /* Update both parent directories. */
+       if (fdvp != tdvp) {
+               if (vnode_isdir(fvp)) {
+                       /* If the source directory has directory hard link 
+                        * descendants, set the kHFSHasChildLinkBit in the 
+                        * destination parent hierarchy 
+                        */
+                       if ((fcp->c_attr.ca_recflags & kHFSHasChildLinkMask) && 
+                           !(tdcp->c_attr.ca_recflags & kHFSHasChildLinkMask)) {
  
-       if (retval)  goto bad;
+                               tdcp->c_attr.ca_recflags |= kHFSHasChildLinkMask;
  
-       /* update cnode's catalog descriptor */
-       replace_desc(fcp, &out_desc);
+                               error = cat_set_childlinkbit(hfsmp, tdcp->c_parentcnid);
+                               if (error) {
+                                       printf ("hfs_vnop_rename: error updating parent chain for %u\n", tdcp->c_cnid);
+                                       error = 0;
+                               }
+                       }
+                       INC_FOLDERCOUNT(hfsmp, tdcp->c_attr);
+                       DEC_FOLDERCOUNT(hfsmp, fdcp->c_attr);
+               }
+               tdcp->c_entries++;
+               tdcp->c_dirchangecnt++;
+               if (fdcp->c_entries > 0)
+                       fdcp->c_entries--;
+               fdcp->c_dirchangecnt++;
+               fdcp->c_touch_chgtime = TRUE;
+               fdcp->c_touch_modtime = TRUE;
+
+               fdcp->c_flag |= C_FORCEUPDATE;  // XXXdbg - force it out!
+               (void) hfs_update(fdvp, 0);
+       }
+       tdcp->c_childhint = out_desc.cd_hint;   /* Cache directory's location */
+       tdcp->c_touch_chgtime = TRUE;
+       tdcp->c_touch_modtime = TRUE;
  
-       fcp->c_flag &= ~C_RENAME;
+       tdcp->c_flag |= C_FORCEUPDATE;  // XXXdbg - force it out!
+       (void) hfs_update(tdvp, 0);
+out:
+       if (got_cookie) {
+               cat_postflight(hfsmp, &cookie, p);
+       }
+       if (started_tr) {
+           hfs_end_transaction(hfsmp);
+       }
  
-       /*
-        * Time stamp both parent directories.
-        * Note that if this is a rename within the same directory,
-        * (where tdcp == fdcp)
-        * the code below is still safe and correct.
+       /* Note that if hfs_removedir or hfs_removefile was invoked above they will already have
+          generated a NOTE_WRITE for tdvp and a NOTE_DELETE for tvp.
          */
-       if (fdcp->c_nlink > 0)
-               fdcp->c_nlink--;
-       if (fdcp->c_entries > 0)
-               fdcp->c_entries--;
-       tdcp->c_nlink++;
-       tdcp->c_entries++;
-       fdcp->c_flag |= C_UPDATE;
-       tdcp->c_flag |= C_UPDATE;
-       tv = time;
-       CTIMES(fdcp, &tv, &tv);
-       CTIMES(tdcp, &tv, &tv);
-       tdcp->c_childhint = out_desc.cd_hint;   /* Cache directory's location */
+       if (error == 0) {
+               HFS_KNOTE(fvp, NOTE_RENAME);
+               HFS_KNOTE(fdvp, NOTE_WRITE);
+               if (tdvp != fdvp) HFS_KNOTE(tdvp, NOTE_WRITE);
+       };
  
-       hfs_volupdate(hfsmp, fvp->v_type == VDIR ? VOL_RMDIR : VOL_RMFILE,
-               (fdcp->c_cnid == kHFSRootFolderID));
-       hfs_volupdate(hfsmp, fvp->v_type == VDIR ? VOL_MKDIR : VOL_MKFILE,
-               (tdcp->c_cnid == kHFSRootFolderID));
+       fdcp->c_flag &= ~C_DIR_MODIFICATION;
+       wakeup((caddr_t)&fdcp->c_flag);
+       if (fdvp != tdvp) {
+           tdcp->c_flag &= ~C_DIR_MODIFICATION;
+           wakeup((caddr_t)&tdcp->c_flag);
+       }
  
-       vput(tdvp);
-       vrele(fdvp);
-       vput(fvp);
-       return (0);
+       if (took_trunc_lock)
+               hfs_unlock_truncate(VTOC(tvp), TRUE);   
  
-bad:
-       if (fcp)
-               fcp->c_flag &= ~C_RENAME;
-       if (tdvp == tvp)
-               vrele(tdvp);
-       else
-               vput(tdvp);
-       if (tvp)
-               vput(tvp);
-       vrele(fdvp);
+       hfs_unlockfour(fdcp, fcp, tdcp, tcp);
  
-       if (VOP_ISLOCKED(fvp))
-               vput(fvp);
-       else
-               vrele(fvp);
-       return (retval);
+       /* Now that we've dropped locks, see if we need to force recycle on the old
+        * destination's rsrc fork, preventing a leak of the rsrc fork's blocks.  Note that
+        * doing the ref/rele is in order to twiddle the VL_INACTIVE bit to the vnode's flags
+        * so that on the last vnode_put for this vnode, we will force vnop_inactive to be triggered.
+        */
+       if ((recycle_rsrc) && (vnode_getwithvid(rvp, rsrc_vid) == 0)) {         
+               vnode_ref(rvp);
+               vnode_rele(rvp);
+               vnode_recycle(rvp);
+               vnode_put (rvp);
+       }
  
-abortop:
  
-       VOP_ABORTOP(tdvp, tcnp);
-       if (tdvp == tvp)
-               vrele(tdvp);
-       else
-               vput(tdvp);
-       if (tvp)
-               vput(tvp);
-       VOP_ABORTOP(fdvp, fcnp);
-       vrele(fdvp);
-       vrele(fvp);
-       return (retval);
-}
+       /* After tvp is removed the only acceptable error is EIO */
+       if (error && tvp_deleted)
+               error = EIO;
  
+       return (error);
+}
  
  
  /*
- * Mkdir system call
-#% mkdir       dvp     L U U
-#% mkdir       vpp     - L -
-#
- vop_mkdir {
-     IN WILLRELE struct vnode *dvp;
-     OUT struct vnode **vpp;
-     IN struct componentname *cnp;
-     IN struct vattr *vap;
-
-     We are responsible for freeing the namei buffer,
-        it is done in hfs_makenode()
-*/
-
+ * Make a directory.
+ */
  static int
-hfs_mkdir(ap)
-       struct vop_mkdir_args /* {
-               struct vnode *a_dvp;
-               struct vnode **a_vpp;
-               struct componentname *a_cnp;
-               struct vattr *a_vap;
-       } */ *ap;
+hfs_vnop_mkdir(struct vnop_mkdir_args *ap)
  {
-       struct vattr *vap = ap->a_vap;
-
-       return (hfs_makenode(MAKEIMODE(vap->va_type, vap->va_mode),
-                            ap->a_dvp, ap->a_vpp, ap->a_cnp));
+       /***** HACK ALERT ********/
+       ap->a_cnp->cn_flags |= MAKEENTRY;
+       return hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context);
  }
  
  
  /*
- * symlink -- make a symbolic link
-#% symlink     dvp     L U U
-#% symlink     vpp     - U -
-#
-# XXX - note that the return vnode has already been VRELE'ed
-#      by the filesystem layer.  To use it you must use vget,
-#      possibly with a further namei.
-#
- vop_symlink {
-     IN WILLRELE struct vnode *dvp;
-     OUT WILLRELE struct vnode **vpp;
-     IN struct componentname *cnp;
-     IN struct vattr *vap;
-     IN char *target;
-
-     We are responsible for freeing the namei buffer, 
-        it is done in hfs_makenode().
-
-*/
-
+ * Create a symbolic link.
+ */
  static int
-hfs_symlink(ap)
-       struct vop_symlink_args /* {
-               struct vnode *a_dvp;
-               struct vnode **a_vpp;
-               struct componentname *a_cnp;
-               struct vattr *a_vap;
-               char *a_target;
-       } */ *ap;
+hfs_vnop_symlink(struct vnop_symlink_args *ap)
  {
-       register struct vnode *vp, **vpp = ap->a_vpp;
+       struct vnode **vpp = ap->a_vpp;
+       struct vnode *dvp = ap->a_dvp;
+       struct vnode *vp = NULL;
+       struct cnode *cp = NULL;
+       struct hfsmount *hfsmp;
         struct filefork *fp;
-       int len, error;
         struct buf *bp = NULL;
+       char *datap;
+       int started_tr = 0;
+       u_int32_t len;
+       int error;
  
         /* HFS standard disks don't support symbolic links */
-       if (VTOVCB(ap->a_dvp)->vcbSigWord != kHFSPlusSigWord) {
-               VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
-               vput(ap->a_dvp);
-               return (EOPNOTSUPP);
-       }
+       if (VTOVCB(dvp)->vcbSigWord != kHFSPlusSigWord)
+               return (ENOTSUP);
  
         /* Check for empty target name */
-       if (ap->a_target[0] == 0) {
-               VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
-               vput(ap->a_dvp);
+       if (ap->a_target[0] == 0)
                 return (EINVAL);
+
+       hfsmp = VTOHFS(dvp);
+       len = strlen(ap->a_target);
+
+       /* Check for free space */
+       if (((u_int64_t)hfs_freeblks(hfsmp, 0) * (u_int64_t)hfsmp->blockSize) < len) {
+               return (ENOSPC);
         }
  
         /* Create the vnode */
-       if ((error = hfs_makenode(S_IFLNK | ap->a_vap->va_mode,
-           ap->a_dvp, vpp, ap->a_cnp)))
-               return (error);
-
+       ap->a_vap->va_mode |= S_IFLNK;
+       if ((error = hfs_makenode(dvp, vpp, ap->a_cnp, ap->a_vap, ap->a_context))) {
+               goto out;
+       }
         vp = *vpp;
-       len = strlen(ap->a_target);
+       if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
+               goto out;
+       }
+       cp = VTOC(vp);
         fp = VTOF(vp);
-       fp->ff_clumpsize = VTOVCB(vp)->blockSize;
  
-       /* Allocate space for the link */
-       error = VOP_TRUNCATE(vp, len, IO_NOZEROFILL,
-                             ap->a_cnp->cn_cred, ap->a_cnp->cn_proc);
-       if (error)
-               goto out;       /* XXX need to remove link */
+       if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
+           goto out;
+       }
+
+#if QUOTA
+       (void)hfs_getinoquota(cp);
+#endif /* QUOTA */
+
+       if ((error = hfs_start_transaction(hfsmp)) != 0) {
+           goto out;
+       }
+       started_tr = 1;
+
+       /*
+        * Allocate space for the link.
+        *
+        * Since we're already inside a transaction,
+        * tell hfs_truncate to skip the ubc_setsize.
+        *
+        * Don't need truncate lock since a symlink is treated as a system file.
+        */
+       error = hfs_truncate(vp, len, IO_NOZEROFILL, 1, ap->a_context);
+
+       /* On errors, remove the symlink file */
+       if (error) {
+               /*
+                * End the transaction so we don't re-take the cnode lock
+                * below while inside a transaction (lock order violation).
+                */
+               hfs_end_transaction(hfsmp);
+
+               /* hfs_removefile() requires holding the truncate lock */
+               hfs_unlock(cp);
+               hfs_lock_truncate(cp, TRUE);
+               hfs_lock(cp, HFS_FORCE_LOCK);
+
+               if (hfs_start_transaction(hfsmp) != 0) {
+                       started_tr = 0;
+                       hfs_unlock_truncate(cp, TRUE);
+                       goto out;
+               }
+               
+               (void) hfs_removefile(dvp, vp, ap->a_cnp, 0, 0, 0);
+               hfs_unlock_truncate(cp, TRUE);
+               goto out;       
+       }
  
         /* Write the link to disk */
-       bp = getblk(vp, 0, roundup((int)fp->ff_size, VTOHFS(vp)->hfs_phys_block_size),
+       bp = buf_getblk(vp, (daddr64_t)0, roundup((int)fp->ff_size, VTOHFS(vp)->hfs_phys_block_size),
                         0, 0, BLK_META);
-       bzero(bp->b_data, bp->b_bufsize);
-       bcopy(ap->a_target, bp->b_data, len);
-       bawrite(bp);
+       if (hfsmp->jnl) {
+               journal_modify_block_start(hfsmp->jnl, bp);
+       }
+       datap = (char *)buf_dataptr(bp);
+       bzero(datap, buf_size(bp));
+       bcopy(ap->a_target, datap, len);
+
+       if (hfsmp->jnl) {
+               journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
+       } else {
+               buf_bawrite(bp);
+       }
+       /*
+        * We defered the ubc_setsize for hfs_truncate
+        * since we were inside a transaction.
+        *
+        * We don't need to drop the cnode lock here
+        * since this is a symlink.
+        */
+       ubc_setsize(vp, len);
  out:
-       vput(vp);
+       if (started_tr)
+           hfs_end_transaction(hfsmp);
+       if ((cp != NULL) && (vp != NULL)) {
+               hfs_unlock(cp);
+       }
+       if (error) {
+               if (vp) {
+                       vnode_put(vp);
+               }
+               *vpp = NULL;
+       }
         return (error);
  }
  
  
-/*
- * Dummy dirents to simulate the "." and ".." entries of the directory
- * in a hfs filesystem.  HFS doesn't provide these on disk.  Note that
- * the size of these entries is the smallest needed to represent them
- * (only 12 byte each).
- */
-static hfsdotentry  rootdots[2] = {
-       {
-               1,                              /* d_fileno */
-               sizeof(struct hfsdotentry),     /* d_reclen */
-               DT_DIR,                         /* d_type */
-               1,                              /* d_namlen */
-               "."                             /* d_name */
-    },
-    {
-               1,                              /* d_fileno */
-               sizeof(struct hfsdotentry),     /* d_reclen */
-               DT_DIR,                         /* d_type */
-               2,                              /* d_namlen */
-               ".."                            /* d_name */
-       }
+/* structures to hold a "." or ".." directory entry */
+struct hfs_stddotentry {
+       u_int32_t       d_fileno;   /* unique file number */
+       u_int16_t       d_reclen;   /* length of this structure */
+       u_int8_t        d_type;     /* dirent file type */
+       u_int8_t        d_namlen;   /* len of filename */
+       char            d_name[4];  /* "." or ".." */
  };
  
-/*     4.3 Note:
-*      There is some confusion as to what the semantics of uio_offset are.
-*      In ufs, it represents the actual byte offset within the directory
-*      "file."  HFS, however, just uses it as an entry counter - essentially
-*      assuming that it has no meaning except to the hfs_readdir function.
-*      This approach would be more efficient here, but some callers may
-*      assume the uio_offset acts like a byte offset.  NFS in fact
-*      monkeys around with the offset field a lot between readdir calls.
-*
-*      The use of the resid uiop->uio_resid and uiop->uio_iov->iov_len
-*      fields is a mess as well.  The libc function readdir() returns
-*      NULL (indicating the end of a directory) when either
-*      the getdirentries() syscall (which calls this and returns
-*      the size of the buffer passed in less the value of uiop->uio_resid)
-*      returns 0, or a direct record with a d_reclen of zero.
-*      nfs_server.c:rfs_readdir(), on the other hand, checks for the end
-*      of the directory by testing uiop->uio_resid == 0.  The solution
-*      is to pad the size of the last struct direct in a given
-*      block to fill the block if we are not at the end of the directory.
-*/
+struct hfs_extdotentry {
+       u_int64_t  d_fileno;   /* unique file number */
+       u_int64_t  d_seekoff;  /* seek offset (optional, used by servers) */
+       u_int16_t  d_reclen;   /* length of this structure */
+       u_int16_t  d_namlen;   /* len of filename */
+       u_int8_t   d_type;     /* dirent file type */
+       u_char     d_name[3];  /* "." or ".." */
+};
  
+typedef union {
+       struct hfs_stddotentry  std;
+       struct hfs_extdotentry  ext;
+} hfs_dotentry_t;
  
  /*
- * NOTE: We require a minimal buffer size of DIRBLKSIZ for two reasons. One, it is the same value
- * returned be stat() call as the block size. This is mentioned in the man page for getdirentries():
- * "Nbytes must be greater than or equal to the block size associated with the file,
- * see stat(2)". Might as well settle on the same size of ufs. Second, this makes sure there is enough
- * room for the . and .. entries that have to added manually.
+ *  hfs_vnop_readdir reads directory entries into the buffer pointed
+ *  to by uio, in a filesystem independent format.  Up to uio_resid
+ *  bytes of data can be transferred.  The data in the buffer is a
+ *  series of packed dirent structures where each one contains the
+ *  following entries:
+ *
+ *     u_int32_t   d_fileno;              // file number of entry
+ *     u_int16_t   d_reclen;              // length of this record
+ *     u_int8_t    d_type;                // file type
+ *     u_int8_t    d_namlen;              // length of string in d_name
+ *     char        d_name[MAXNAMELEN+1];  // null terminated file name
+ *
+ *  The current position (uio_offset) refers to the next block of
+ *  entries.  The offset can only be set to a value previously
+ *  returned by hfs_vnop_readdir or zero.  This offset does not have
+ *  to match the number of bytes returned (in uio_resid).
+ *
+ *  In fact, the offset used by HFS is essentially an index (26 bits)
+ *  with a tag (6 bits).  The tag is for associating the next request
+ *  with the current request.  This enables us to have multiple threads
+ *  reading the directory while the directory is also being modified.
+ *
+ *  Each tag/index pair is tied to a unique directory hint.  The hint
+ *  contains information (filename) needed to build the catalog b-tree
+ *  key for finding the next set of entries.
+ *
+ * If the directory is marked as deleted-but-in-use (cp->c_flag & C_DELETED),
+ * do NOT synthesize entries for "." and "..".
   */
-
-/*                     
-#% readdir     vp      L L L
-#
-vop_readdir {
-    IN struct vnode *vp;
-    INOUT struct uio *uio;
-    IN struct ucred *cred;
-    INOUT int *eofflag;
-    OUT int *ncookies;
-    INOUT u_long **cookies;
-    */
  static int
-hfs_readdir(ap)
-       struct vop_readdir_args /* {
-               struct vnode *vp;
-               struct uio *uio;
-               struct ucred *cred;
-               int *eofflag;
-               int *ncookies;
-               u_long **cookies;
+hfs_vnop_readdir(ap)
+       struct vnop_readdir_args /* {
+               vnode_t a_vp;
+               uio_t a_uio;
+               int a_flags;
+               int *a_eofflag;
+               int *a_numdirent;
+               vfs_context_t a_context;
         } */ *ap;
  {
-       register struct uio *uio = ap->a_uio;
-       struct cnode *cp = VTOC(ap->a_vp);
-       struct hfsmount *hfsmp = VTOHFS(ap->a_vp);
-       struct proc *p = current_proc();
-       off_t off = uio->uio_offset;
-       int retval = 0;
+       struct vnode *vp = ap->a_vp;
+       uio_t uio = ap->a_uio;
+       struct cnode *cp;
+       struct hfsmount *hfsmp;
+       directoryhint_t *dirhint = NULL;
+       directoryhint_t localhint;
+       off_t offset;
+       off_t startoffset;
+       int error = 0;
         int eofflag = 0;
+       user_addr_t user_start = 0;
+       user_size_t user_len = 0;
+       int index;
+       unsigned int tag;
+       int items;
+       int lockflags;
+       int extended;
+       int nfs_cookies;
+       caddr_t bufstart;
+       cnid_t cnid_hint = 0;
+
+       items = 0;
+       startoffset = offset = uio_offset(uio);
+       bufstart = CAST_DOWN(caddr_t, uio_iov_base(uio));
+       extended = (ap->a_flags & VNODE_READDIR_EXTENDED);
+       nfs_cookies = extended && (ap->a_flags & VNODE_READDIR_REQSEEKOFF);
+
+       /* Sanity check the uio data. */
+       if ((uio_iovcnt(uio) > 1) ||
+           (uio_resid(uio) < (int)sizeof(struct dirent))) {
+               return (EINVAL);
+       }
+       /* Note that the dirhint calls require an exclusive lock. */
+       if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
+               return (error);
+       cp = VTOC(vp);
+       hfsmp = VTOHFS(vp);
  
-       /* We assume it's all one big buffer... */
-       if (uio->uio_iovcnt > 1 || uio->uio_resid < AVERAGE_HFSDIRENTRY_SIZE)
-               return EINVAL;
-
-       /* Create the entries for . and .. */
-       if (uio->uio_offset < sizeof(rootdots)) {
-               caddr_t dep;
-               size_t dotsize;
-               
-               rootdots[0].d_fileno = cp->c_cnid;
-               rootdots[1].d_fileno = cp->c_parentcnid;
-
-               if (uio->uio_offset == 0) {
-                       dep = (caddr_t) &rootdots[0];
-                       dotsize = 2* sizeof(struct hfsdotentry);
-               } else if (uio->uio_offset == sizeof(struct hfsdotentry)) {
-                       dep = (caddr_t) &rootdots[1];
-                       dotsize = sizeof(struct hfsdotentry);
+       /* Pick up cnid hint (if any). */
+       if (nfs_cookies) {
+               cnid_hint = (cnid_t)(uio_offset(uio) >> 32);
+               uio_setoffset(uio, uio_offset(uio) & 0x00000000ffffffffLL);
+               if (cnid_hint == INT_MAX) { /* searching pass the last item */
+                       eofflag = 1;
+                       goto out;
+               }
+       }
+       /*
+        * Synthesize entries for "." and "..", unless the directory has
+        * been deleted, but not closed yet (lazy delete in progress).
+        */
+       if (offset == 0 && !(cp->c_flag & C_DELETED)) {
+               hfs_dotentry_t  dotentry[2];
+               size_t  uiosize;
+
+               if (extended) {
+                       struct hfs_extdotentry *entry = &dotentry[0].ext;
+
+                       entry->d_fileno = cp->c_cnid;
+                       entry->d_reclen = sizeof(struct hfs_extdotentry);
+                       entry->d_type = DT_DIR;
+                       entry->d_namlen = 1;
+                       entry->d_name[0] = '.';
+                       entry->d_name[1] = '\0';
+                       entry->d_name[2] = '\0';
+                       entry->d_seekoff = 1;
+
+                       ++entry;
+                       entry->d_fileno = cp->c_parentcnid;
+                       entry->d_reclen = sizeof(struct hfs_extdotentry);
+                       entry->d_type = DT_DIR;
+                       entry->d_namlen = 2;
+                       entry->d_name[0] = '.';
+                       entry->d_name[1] = '.';
+                       entry->d_name[2] = '\0';
+                       entry->d_seekoff = 2;
+                       uiosize = 2 * sizeof(struct hfs_extdotentry);
                 } else {
-                       retval = EINVAL;
-                       goto Exit;
+                       struct hfs_stddotentry *entry = &dotentry[0].std;
+
+                       entry->d_fileno = cp->c_cnid;
+                       entry->d_reclen = sizeof(struct hfs_stddotentry);
+                       entry->d_type = DT_DIR;
+                       entry->d_namlen = 1;
+                       *(int *)&entry->d_name[0] = 0;
+                       entry->d_name[0] = '.';
+
+                       ++entry;
+                       entry->d_fileno = cp->c_parentcnid;
+                       entry->d_reclen = sizeof(struct hfs_stddotentry);
+                       entry->d_type = DT_DIR;
+                       entry->d_namlen = 2;
+                       *(int *)&entry->d_name[0] = 0;
+                       entry->d_name[0] = '.';
+                       entry->d_name[1] = '.';
+                       uiosize = 2 * sizeof(struct hfs_stddotentry);
                 }
-
-               retval = uiomove(dep, dotsize, uio);
-               if (retval != 0)
-                       goto Exit;
+               if ((error = uiomove((caddr_t)&dotentry, uiosize, uio))) {
+                       goto out;
+               }
+               offset += 2;
         }
  
-       /* If there are no children then we're done */  
+       /* If there are no real entries then we're done. */
         if (cp->c_entries == 0) {
+               error = 0;
                 eofflag = 1;
-               retval = 0;
-               goto Exit;
+               uio_setoffset(uio, offset);
+               goto seekoffcalc;
         }
  
-       /* Lock catalog b-tree */
-       retval = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p);
-       if (retval) goto Exit;
-
-       retval = cat_getdirentries(hfsmp, &cp->c_desc, uio, &eofflag);
+       //
+       // We have to lock the user's buffer here so that we won't
+       // fault on it after we've acquired a shared lock on the
+       // catalog file.  The issue is that you can get a 3-way
+       // deadlock if someone else starts a transaction and then
+       // tries to lock the catalog file but can't because we're
+       // here and we can't service our page fault because VM is
+       // blocked trying to start a transaction as a result of
+       // trying to free up pages for our page fault.  It's messy
+       // but it does happen on dual-processors that are paging
+       // heavily (see radar 3082639 for more info).  By locking
+       // the buffer up-front we prevent ourselves from faulting
+       // while holding the shared catalog file lock.
+       //
+       // Fortunately this and hfs_search() are the only two places
+       // currently (10/30/02) that can fault on user data with a
+       // shared lock on the catalog file.
+       //
+       if (hfsmp->jnl && uio_isuserspace(uio)) {
+               user_start = uio_curriovbase(uio);
+               user_len = uio_curriovlen(uio);
+
+               if ((error = vslock(user_start, user_len)) != 0) {
+                       user_start = 0;
+                       goto out;
+               }
+       }
+       /* Convert offset into a catalog directory index. */
+       index = (offset & HFS_INDEX_MASK) - 2;
+       tag = offset & ~HFS_INDEX_MASK;
+
+       /* Lock catalog during cat_findname and cat_getdirentries. */
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+       /* When called from NFS, try and resolve a cnid hint. */
+       if (nfs_cookies && cnid_hint != 0) {
+               if (cat_findname(hfsmp, cnid_hint, &localhint.dh_desc) == 0) {
+                       if ( localhint.dh_desc.cd_parentcnid == cp->c_fileid) {
+                               localhint.dh_index = index - 1;
+                               localhint.dh_time = 0;
+                               bzero(&localhint.dh_link, sizeof(localhint.dh_link));
+                               dirhint = &localhint;  /* don't forget to release the descriptor */
+                       } else {
+                               cat_releasedesc(&localhint.dh_desc);
+                       }
+               }
+       }
  
-       /* Unlock catalog b-tree */
-       (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
+       /* Get a directory hint (cnode must be locked exclusive) */
+       if (dirhint == NULL) {
+               dirhint = hfs_getdirhint(cp, ((index - 1) & HFS_INDEX_MASK) | tag, 0);
  
-       if (retval != E_NONE) {
-               goto Exit;
+               /* Hide tag from catalog layer. */
+               dirhint->dh_index &= HFS_INDEX_MASK;
+               if (dirhint->dh_index == HFS_INDEX_MASK) {
+                       dirhint->dh_index = -1;
+               }
         }
         
-       /* were we already past eof ? */
-       if (uio->uio_offset == off) {
-               retval = E_NONE;
-               goto Exit;
+       if (index == 0) {
+               dirhint->dh_threadhint = cp->c_dirthreadhint;
+       }
+
+       /* Pack the buffer with dirent entries. */
+       error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, extended, &items, &eofflag);
+
+       if (index == 0 && error == 0) {
+               cp->c_dirthreadhint = dirhint->dh_threadhint;
+       }
+
+       hfs_systemfile_unlock(hfsmp, lockflags);
+
+       if (error != 0) {
+               goto out;
         }
         
-       cp->c_flag |= C_ACCESS;
-                                                                                                                       /* Bake any cookies */
-       if (!retval && ap->a_ncookies != NULL) {
-               struct dirent* dpStart;
-               struct dirent* dpEnd;
-               struct dirent* dp;
-               int ncookies;
-               u_long *cookies;
-               u_long *cookiep;
+       /* Get index to the next item */
+       index += items;
+       
+       if (items >= (int)cp->c_entries) {
+               eofflag = 1;
+       }
  
-               /*
-                * Only the NFS server uses cookies, and it loads the
-                * directory block into system space, so we can just look at
-                * it directly.
-                */
-               if (uio->uio_segflg != UIO_SYSSPACE)
-                       panic("hfs_readdir: unexpected uio from NFS server");
-               dpStart = (struct dirent *)(uio->uio_iov->iov_base - (uio->uio_offset - off));
-               dpEnd = (struct dirent *) uio->uio_iov->iov_base;
-               for (dp = dpStart, ncookies = 0;
-                    dp < dpEnd && dp->d_reclen != 0;
-                    dp = (struct dirent *)((caddr_t)dp + dp->d_reclen))
-                       ncookies++;
-               MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, M_WAITOK);
-               for (dp = dpStart, cookiep = cookies;
-                    dp < dpEnd;
-                    dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
-                       off += dp->d_reclen;
-                       *cookiep++ = (u_long) off;
-               }
-               *ap->a_ncookies = ncookies;
-               *ap->a_cookies = cookies;
-       }
-
-Exit:;
-       if (ap->a_eofflag)
-               *ap->a_eofflag = eofflag;
+       /* Convert catalog directory index back into an offset. */
+       while (tag == 0)
+               tag = (++cp->c_dirhinttag) << HFS_INDEX_BITS;   
+       uio_setoffset(uio, (index + 2) | tag);
+       dirhint->dh_index |= tag;
+
+seekoffcalc:
+       cp->c_touch_acctime = TRUE;
  
-    return (retval);
+       if (ap->a_numdirent) {
+               if (startoffset == 0)
+                       items += 2;
+               *ap->a_numdirent = items;
+       }
+
+out:
+       if (hfsmp->jnl && user_start) {
+               vsunlock(user_start, user_len, TRUE);
+       }
+       /* If we didn't do anything then go ahead and dump the hint. */
+       if ((dirhint != NULL) &&
+           (dirhint != &localhint) &&
+           (uio_offset(uio) == startoffset)) {
+               hfs_reldirhint(cp, dirhint);
+               eofflag = 1;
+       }
+       if (ap->a_eofflag) {
+               *ap->a_eofflag = eofflag;
+       }
+       if (dirhint == &localhint) {
+               cat_releasedesc(&localhint.dh_desc);
+       }
+       hfs_unlock(cp);
+       return (error);
  }
  
  
  /*
- * Return target name of a symbolic link
-#% readlink    vp      L L L
-#
- vop_readlink {
-     IN struct vnode *vp;
-     INOUT struct uio *uio;
-     IN struct ucred *cred;
-     */
-
+ * Read contents of a symbolic link.
+ */
  static int
-hfs_readlink(ap)
-       struct vop_readlink_args /* {
+hfs_vnop_readlink(ap)
+       struct vnop_readlink_args /* {
                 struct vnode *a_vp;
                 struct uio *a_uio;
-               struct ucred *a_cred;
+               vfs_context_t a_context;
         } */ *ap;
  {
-       int retval;
         struct vnode *vp = ap->a_vp;
         struct cnode *cp;
         struct filefork *fp;
+       int error;
  
-       if (vp->v_type != VLNK)
+       if (!vnode_islnk(vp))
                 return (EINVAL);
   
+       if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
+               return (error);
         cp = VTOC(vp);
         fp = VTOF(vp);
     
         /* Zero length sym links are not allowed */
         if (fp->ff_size == 0 || fp->ff_size > MAXPATHLEN) {
-               VTOVCB(vp)->vcbFlags |= kHFS_DamagedVolume;
-               return (EINVAL);
+               printf("hfs: zero length symlink on fileid %d\n", cp->c_fileid);
+               error = EINVAL;
+               goto exit;
         }
      
         /* Cache the path so we don't waste buffer cache resources */
@@ -2344,406 +3184,154 @@ hfs_readlink(ap)
                 struct buf *bp = NULL;
  
                 MALLOC(fp->ff_symlinkptr, char *, fp->ff_size, M_TEMP, M_WAITOK);
-               retval = meta_bread(vp, 0,
-                               roundup((int)fp->ff_size,
-                                       VTOHFS(vp)->hfs_phys_block_size),
-                                               ap->a_cred, &bp);
-               if (retval) {
+               error = (int)buf_meta_bread(vp, (daddr64_t)0,
+                                           roundup((int)fp->ff_size,
+                                           VTOHFS(vp)->hfs_phys_block_size),
+                                           vfs_context_ucred(ap->a_context), &bp);
+               if (error) {
                         if (bp)
-                               brelse(bp);
+                               buf_brelse(bp);
                         if (fp->ff_symlinkptr) {
                                 FREE(fp->ff_symlinkptr, M_TEMP);
                                 fp->ff_symlinkptr = NULL;
                         }
-                       return (retval);
+                       goto exit;
                 }
-               bcopy(bp->b_data, fp->ff_symlinkptr, (size_t)fp->ff_size);
-               if (bp) {
-                       bp->b_flags |= B_INVAL;         /* data no longer needed */
-                       brelse(bp);
+               bcopy((char *)buf_dataptr(bp), fp->ff_symlinkptr, (size_t)fp->ff_size);
+
+               if (VTOHFS(vp)->jnl && (buf_flags(bp) & B_LOCKED) == 0) {
+                       buf_markinvalid(bp);            /* data no longer needed */
                 }
+               buf_brelse(bp);
         }
-       retval = uiomove((caddr_t)fp->ff_symlinkptr, (int)fp->ff_size, ap->a_uio);
-
-       return (retval);
-}
-
-
-/*
- * hfs abort op, called after namei() when a CREATE/DELETE isn't actually
- * done. If a buffer has been saved in anticipation of a CREATE, delete it.
-#% abortop     dvp     = = =
-#
- vop_abortop {
-     IN struct vnode *dvp;
-     IN struct componentname *cnp;
-
-     */
-
-/* ARGSUSED */
-
-static int
-hfs_abortop(ap)
-       struct vop_abortop_args /* {
-               struct vnode *a_dvp;
-               struct componentname *a_cnp;
-       } */ *ap;
-{
-       if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
-               FREE_ZONE(ap->a_cnp->cn_pnbuf, ap->a_cnp->cn_pnlen, M_NAMEI);
-
-       return (0);
-}
-
-
-/*
- * Lock an cnode. If its already locked, set the WANT bit and sleep.
-#% lock                vp      U L U
-#
- vop_lock {
-     IN struct vnode *vp;
-     IN int flags;
-     IN struct proc *p;
-     */
+       error = uiomove((caddr_t)fp->ff_symlinkptr, (int)fp->ff_size, ap->a_uio);
  
-static int
-hfs_lock(ap)
-       struct vop_lock_args /* {
-               struct vnode *a_vp;
-               int a_flags;
-               struct proc *a_p;
-       } */ *ap;
-{
-       struct vnode *vp = ap->a_vp;
-       struct cnode *cp = VTOC(vp);
-
-       if (cp == NULL)
-               panic("hfs_lock: cnode in vnode is null\n");
-
-       return (lockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, ap->a_p));
-}
-
-/*
- * Unlock an cnode.
-#% unlock      vp      L U L
-#
- vop_unlock {
-     IN struct vnode *vp;
-     IN int flags;
-     IN struct proc *p;
-
-     */
-static int
-hfs_unlock(ap)
-       struct vop_unlock_args /* {
-               struct vnode *a_vp;
-               int a_flags;
-               struct proc *a_p;
-       } */ *ap;
-{
-       struct vnode *vp = ap->a_vp;
-       struct cnode *cp = VTOC(vp);
-
-       if (cp == NULL)
-               panic("hfs_unlock: cnode in vnode is null\n");
-
-       return (lockmgr(&cp->c_lock, ap->a_flags | LK_RELEASE,
-               &vp->v_interlock, ap->a_p));
-}
-
-
-/*
- * Print out the contents of a cnode.
-#% print       vp      = = =
-#
- vop_print {
-     IN struct vnode *vp;
-     */
-static int
-hfs_print(ap)
-       struct vop_print_args /* {
-               struct vnode *a_vp;
-       } */ *ap;
-{
-       struct vnode * vp = ap->a_vp;
-       struct cnode *cp = VTOC(vp);
+       /*
+        * Keep track blocks read
+        */
+       if ((VTOHFS(vp)->hfc_stage == HFC_RECORDING) && (error == 0)) {
+               
+               /*
+                * If this file hasn't been seen since the start of
+                * the current sampling period then start over.
+                */
+               if (cp->c_atime < VTOHFS(vp)->hfc_timebase)
+                       VTOF(vp)->ff_bytesread = fp->ff_size;
+               else
+                       VTOF(vp)->ff_bytesread += fp->ff_size;
+               
+       //      if (VTOF(vp)->ff_bytesread > fp->ff_size)
+       //              cp->c_touch_acctime = TRUE;
+       }
  
-       printf("tag VT_HFS, cnid %d, on dev %d, %d", cp->c_cnid,
-               major(cp->c_dev), minor(cp->c_dev));
-#if FIFO
-       if (vp->v_type == VFIFO)
-               fifo_printinfo(vp);
-#endif /* FIFO */
-       lockmgr_printinfo(&cp->c_lock);
-       printf("\n");
-       return (0);
+exit:
+       hfs_unlock(cp);
+       return (error);
  }
  
  
  /*
- * Check for a locked cnode.
-#% islocked    vp      = = =
-#
- vop_islocked {
-     IN struct vnode *vp;
-
-     */
-static int
-hfs_islocked(ap)
-       struct vop_islocked_args /* {
-               struct vnode *a_vp;
-       } */ *ap;
-{
-       return (lockstatus(&VTOC(ap->a_vp)->c_lock));
-}
-
-/*
-
-#% pathconf    vp      L L L
-#
- vop_pathconf {
-     IN struct vnode *vp;
-     IN int name;
-     OUT register_t *retval;
-
-     */
+ * Get configurable pathname variables.
+ */
  static int
-hfs_pathconf(ap)
-       struct vop_pathconf_args /* {
+hfs_vnop_pathconf(ap)
+       struct vnop_pathconf_args /* {
                 struct vnode *a_vp;
                 int a_name;
                 int *a_retval;
+               vfs_context_t a_context;
         } */ *ap;
  {
-       int retval = 0;
-
         switch (ap->a_name) {
         case _PC_LINK_MAX:
-               if (VTOVCB(ap->a_vp)->vcbSigWord == kHFSPlusSigWord)
-                       *ap->a_retval = HFS_LINK_MAX;
-               else
+               if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD)
                         *ap->a_retval = 1;
+               else
+                       *ap->a_retval = HFS_LINK_MAX;
                 break;
         case _PC_NAME_MAX:
-               *ap->a_retval = kHFSPlusMaxFileNameBytes;       /* max # of characters x max utf8 representation */
+               if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD)
+                       *ap->a_retval = kHFSMaxFileNameChars;  /* 255 */
+               else
+                       *ap->a_retval = kHFSPlusMaxFileNameChars;  /* 31 */
                 break;
         case _PC_PATH_MAX:
-               *ap->a_retval = PATH_MAX; /* 1024 */
+               *ap->a_retval = PATH_MAX;  /* 1024 */
+               break;
+       case _PC_PIPE_BUF:
+               *ap->a_retval = PIPE_BUF;
                 break;
         case _PC_CHOWN_RESTRICTED:
-               *ap->a_retval = 1;
+               *ap->a_retval = 200112;         /* _POSIX_CHOWN_RESTRICTED */
                 break;
         case _PC_NO_TRUNC:
-               *ap->a_retval = 0;
+               *ap->a_retval = 200112;         /* _POSIX_NO_TRUNC */
                 break;
         case _PC_NAME_CHARS_MAX:
                 *ap->a_retval = kHFSPlusMaxFileNameChars;
                 break;
         case _PC_CASE_SENSITIVE:
-               *ap->a_retval = 0;
+               if (VTOHFS(ap->a_vp)->hfs_flags & HFS_CASE_SENSITIVE)
+                       *ap->a_retval = 1;
+               else
+                       *ap->a_retval = 0;
                 break;
         case _PC_CASE_PRESERVING:
                 *ap->a_retval = 1;
                 break;
-       default:
-               retval = EINVAL;
-       }
-
-       return (retval);
-}
-
-
-/*
- * Advisory record locking support
-#% advlock     vp      U U U
-#
- vop_advlock {
-     IN struct vnode *vp;
-     IN caddr_t id;
-     IN int op;
-     IN struct flock *fl;
-     IN int flags;
-
-     */
-static int
-hfs_advlock(ap)
-       struct vop_advlock_args /* {
-               struct vnode *a_vp;
-               caddr_t  a_id;
-               int a_op;
-               struct flock *a_fl;
-               int a_flags;
-       } */ *ap;
-{
-       struct vnode *vp = ap->a_vp;
-       struct flock *fl = ap->a_fl;
-       struct hfslockf *lock;
-       struct filefork *fork;
-       off_t start, end;
-       int retval;
-
-       /* Only regular files can have locks */
-       if (vp->v_type != VREG)
-               return (EISDIR);
-
-       fork = VTOF(ap->a_vp);
-       /*
-        * Avoid the common case of unlocking when cnode has no locks.
-        */
-       if (fork->ff_lockf == (struct hfslockf *)0) {
-               if (ap->a_op != F_SETLK) {
-                       fl->l_type = F_UNLCK;
-                       return (0);
-               }
-       }
-       /*
-        * Convert the flock structure into a start and end.
-        */
-       start = 0;
-       switch (fl->l_whence) {
-       case SEEK_SET:
-       case SEEK_CUR:
-               /*
-                * Caller is responsible for adding any necessary offset
-                * when SEEK_CUR is used.
-                */
-               start = fl->l_start;
-               break;
-       case SEEK_END:
-               start = fork->ff_size + fl->l_start;
+       case _PC_FILESIZEBITS:
+               *ap->a_retval = 64;     /* number of bits to store max file size */
                 break;
         default:
                 return (EINVAL);
         }
  
-       if (start < 0)
-               return (EINVAL);
-       if (fl->l_len == 0)
-               end = -1;
-       else
-               end = start + fl->l_len - 1;
-
-       /*
-        * Create the hfslockf structure
-        */
-       MALLOC(lock, struct hfslockf *, sizeof *lock, M_LOCKF, M_WAITOK);
-       lock->lf_start = start;
-       lock->lf_end = end;
-       lock->lf_id = ap->a_id;
-       lock->lf_fork = fork;
-       lock->lf_type = fl->l_type;
-       lock->lf_next = (struct hfslockf *)0;
-       TAILQ_INIT(&lock->lf_blkhd);
-       lock->lf_flags = ap->a_flags;
-       /*
-        * Do the requested operation.
-        */
-       switch(ap->a_op) {
-       case F_SETLK:
-               retval = hfs_setlock(lock);
-               break;
-       case F_UNLCK:
-               retval = hfs_clearlock(lock);
-               FREE(lock, M_LOCKF);
-               break;
-       case F_GETLK:
-               retval = hfs_getlock(lock, fl);
-               FREE(lock, M_LOCKF);
-               break;
-       default:
-               retval = EINVAL;
-               _FREE(lock, M_LOCKF);
-            break;
-       }
-
-       return (retval);
+       return (0);
  }
  
  
-
  /*
- * Update the access, modified, and node change times as specified
- * by the C_ACCESS, C_UPDATE, and C_CHANGE flags respectively. The
- * C_MODIFIED flag is used to specify that the node needs to be
- * updated but that the times have already been set. The access and
- * modified times are input parameters but the node change time is
- * always taken from the current time. If waitfor is set, then wait
- * for the disk write of the node to complete.
+ * Update a cnode's on-disk metadata.
+ *
+ * If waitfor is set, then wait for the disk write of
+ * the node to complete.
+ *
+ * The cnode must be locked exclusive
   */
-/*
-#% update      vp      L L L
-       IN struct vnode *vp;
-       IN struct timeval *access;
-       IN struct timeval *modify;
-       IN int waitfor;
-*/
-static int
-hfs_update(ap)
-       struct vop_update_args /* {
-               struct vnode *a_vp;
-               struct timeval *a_access;
-               struct timeval *a_modify;
-               int a_waitfor;
-       } */ *ap;
+__private_extern__
+int
+hfs_update(struct vnode *vp, __unused int waitfor)
  {
-       struct vnode *vp = ap->a_vp;
-       struct cnode *cp = VTOC(ap->a_vp);
+       struct cnode *cp = VTOC(vp);
         struct proc *p;
         struct cat_fork *dataforkp = NULL;
         struct cat_fork *rsrcforkp = NULL;
         struct cat_fork datafork;
-       int updateflag;
+       struct cat_fork rsrcfork;
+       struct hfsmount *hfsmp;
+       int lockflags;
         int error;
  
-       /* XXX do we really want to clear the sytem cnode flags here???? */
-       if ((vp->v_flag & VSYSTEM) ||
-           (VTOVFS(vp)->mnt_flag & MNT_RDONLY) ||
-           (cp->c_mode == 0)) {
-               cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE);
+       p = current_proc();
+       hfsmp = VTOHFS(vp);
+
+       if (((vnode_issystem(vp) && (cp->c_cnid < kHFSFirstUserCatalogNodeID))) || 
+               hfsmp->hfs_catalog_vp == NULL){
+               return (0);
+       }
+       if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (cp->c_mode == 0)) {
+               cp->c_flag &= ~C_MODIFIED;
+               cp->c_touch_acctime = 0;
+               cp->c_touch_chgtime = 0;
+               cp->c_touch_modtime = 0;
                 return (0);
         }
  
-       updateflag = cp->c_flag & (C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE);
+       hfs_touchtimes(hfsmp, cp);
  
         /* Nothing to update. */
-       if (updateflag == 0)
+       if ((cp->c_flag & (C_MODIFIED | C_FORCEUPDATE)) == 0) {
                 return (0);
-       /* HFS standard doesn't have access times. */
-       if ((updateflag == C_ACCESS) && (VTOVCB(vp)->vcbSigWord == kHFSSigWord))
-               return (0);
-       if (updateflag & C_ACCESS) {
-               /*
-                * If only the access time is changing then defer
-                * updating it on-disk util later (in hfs_inactive).
-                * If it was recently updated then skip the update.
-                */
-               if (updateflag == C_ACCESS) {
-                       cp->c_flag &= ~C_ACCESS;
-       
-                       /* Its going to disk or its sufficiently newer... */
-                       if ((cp->c_flag & C_ATIMEMOD) ||
-                           (ap->a_access->tv_sec > (cp->c_atime + ATIME_ACCURACY))) {
-                               cp->c_atime = ap->a_access->tv_sec;
-                               cp->c_flag |= C_ATIMEMOD;
-                       }
-                       return (0);
-               } else {
-                       cp->c_atime = ap->a_access->tv_sec;
-               }
-       }
-       if (updateflag & C_UPDATE) {
-               cp->c_mtime = ap->a_modify->tv_sec;
-               cp->c_mtime_nsec = ap->a_modify->tv_usec * 1000;
-       }
-       if (updateflag & C_CHANGE) {
-               cp->c_ctime = time.tv_sec;
-               /*
-                * HFS dates that WE set must be adjusted for DST
-                */
-               if ((VTOVCB(vp)->vcbSigWord == kHFSSigWord) && gTimeZone.tz_dsttime) {
-                       cp->c_ctime += 3600;
-                       cp->c_mtime = cp->c_ctime;
-               }
         }
         
         if (cp->c_datafork)
@@ -2751,25 +3339,32 @@ hfs_update(ap)
         if (cp->c_rsrcfork)
                 rsrcforkp = &cp->c_rsrcfork->ff_data;
  
-       p = current_proc();
-
         /*
          * For delayed allocations updates are
          * postponed until an fsync or the file
          * gets written to disk.
          *
          * Deleted files can defer meta data updates until inactive.
+        *
+        * If we're ever called with the C_FORCEUPDATE flag though
+        * we have to do the update.
          */
-       if (ISSET(cp->c_flag, C_DELETED) ||
+       if (ISSET(cp->c_flag, C_FORCEUPDATE) == 0 &&
+           (ISSET(cp->c_flag, C_DELETED) || 
             (dataforkp && cp->c_datafork->ff_unallocblocks) ||
-           (rsrcforkp && cp->c_rsrcfork->ff_unallocblocks)) {
-               if (updateflag & (C_CHANGE | C_UPDATE))
-                       hfs_volupdate(VTOHFS(vp), VOL_UPDATE, 0);       
-               cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE);
+           (rsrcforkp && cp->c_rsrcfork->ff_unallocblocks))) {
+       //      cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE);
                 cp->c_flag |= C_MODIFIED;
+
+               HFS_KNOTE(vp, NOTE_ATTRIB);
+
                 return (0);
         }
  
+       if ((error = hfs_start_transaction(hfsmp)) != 0) {
+           return error;
+       }
+
         /*
          * For files with invalid ranges (holes) the on-disk
          * field representing the size of the file (cf_size)
@@ -2779,105 +3374,139 @@ hfs_update(ap)
                 bcopy(dataforkp, &datafork, sizeof(datafork));
                 datafork.cf_size = CIRCLEQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start;
                 dataforkp = &datafork;
+       } else if (dataforkp && (cp->c_datafork->ff_unallocblocks != 0)) {
+               // always make sure the block count and the size 
+               // of the file match the number of blocks actually
+               // allocated to the file on disk
+               bcopy(dataforkp, &datafork, sizeof(datafork));
+               // make sure that we don't assign a negative block count
+               if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) {
+                   panic("hfs: ff_blocks %d is less than unalloc blocks %d\n",
+                         cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks);
+               }
+               datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks);
+               datafork.cf_size   = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
+               dataforkp = &datafork;
+       }
+
+       /*
+        * For resource forks with delayed allocations, make sure
+        * the block count and file size match the number of blocks
+        * actually allocated to the file on disk.
+        */
+       if (rsrcforkp && (cp->c_rsrcfork->ff_unallocblocks != 0)) {
+               bcopy(rsrcforkp, &rsrcfork, sizeof(rsrcfork));
+               rsrcfork.cf_blocks = (cp->c_rsrcfork->ff_blocks - cp->c_rsrcfork->ff_unallocblocks);
+               rsrcfork.cf_size   = rsrcfork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
+               rsrcforkp = &rsrcfork;
         }
  
         /*
          * Lock the Catalog b-tree file.
-        * A shared lock is sufficient since an update doesn't change
-        * the tree and the lock on vp protects the cnode.
          */
-       error = hfs_metafilelocking(VTOHFS(vp), kHFSCatalogFileID, LK_SHARED, p);
-       if (error)
-               return (error);
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
  
         /* XXX - waitfor is not enforced */
-       error = cat_update(VTOHFS(vp), &cp->c_desc, &cp->c_attr, dataforkp, rsrcforkp);
+       error = cat_update(hfsmp, &cp->c_desc, &cp->c_attr, dataforkp, rsrcforkp);
  
-        /* Unlock the Catalog b-tree file. */
-       (void) hfs_metafilelocking(VTOHFS(vp), kHFSCatalogFileID, LK_RELEASE, p);
-
-       if (updateflag & (C_CHANGE | C_UPDATE))
-               hfs_volupdate(VTOHFS(vp), VOL_UPDATE, 0);       
+       hfs_systemfile_unlock(hfsmp, lockflags);
  
         /* After the updates are finished, clear the flags */
-       cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE | C_ATIMEMOD);
+       cp->c_flag &= ~(C_MODIFIED | C_FORCEUPDATE);
+
+       hfs_end_transaction(hfsmp);
  
+       HFS_KNOTE(vp, NOTE_ATTRIB);
+       
         return (error);
  }
  
  /*
   * Allocate a new node
- *
- * Upon leaving, namei buffer must be freed.
- *
+ * Note - Function does not create and return a vnode for whiteout creation.
   */
  static int
-hfs_makenode(mode, dvp, vpp, cnp)
-       int mode;
-       struct vnode *dvp;
-       struct vnode **vpp;
-       struct componentname *cnp;
+hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
+             struct vnode_attr *vap, vfs_context_t ctx)
  {
-       struct cnode *cp;
+       struct cnode *cp = NULL;
         struct cnode *dcp;
         struct vnode *tvp;
         struct hfsmount *hfsmp;
-       struct timeval tv;
-       struct proc *p;
         struct cat_desc in_desc, out_desc;
         struct cat_attr attr;
-       int error;
+       struct timeval tv;
+       int lockflags;
+       int error, started_tr = 0;
         enum vtype vnodetype;
+       int mode;
  
-       p = cnp->cn_proc;
         dcp = VTOC(dvp);
+       if ((error = hfs_lock(dcp, HFS_EXCLUSIVE_LOCK)))
+               return (error);
+
+       dcp->c_flag |= C_DIR_MODIFICATION;
+       
         hfsmp = VTOHFS(dvp);
         *vpp = NULL;
         tvp = NULL;
-       bzero(&out_desc, sizeof(out_desc));
+       out_desc.cd_flags = 0;
+       out_desc.cd_nameptr = NULL;
  
-       if ((mode & S_IFMT) == 0)
-               mode |= S_IFREG;
-       vnodetype = IFTOVT(mode);
+       vnodetype = vap->va_type;
+       if (vnodetype == VNON)
+               vnodetype = VREG;
+       mode = MAKEIMODE(vnodetype, vap->va_mode);
  
-       /* Check if unmount in progress */
-       if (VTOVFS(dvp)->mnt_kern_flag & MNTK_UNMOUNT) {
-               error = EPERM;
-               goto exit;
-       }
         /* Check if were out of usable disk space. */
-       if ((suser(cnp->cn_cred, NULL) != 0) && (hfs_freeblks(hfsmp, 1) <= 0)) {
+       if ((hfs_freeblks(hfsmp, 1) == 0) && (vfs_context_suser(ctx) != 0)) {
                 error = ENOSPC;
                 goto exit;
         }
  
+       microtime(&tv);
+
         /* Setup the default attributes */
         bzero(&attr, sizeof(attr));
         attr.ca_mode = mode;
-       attr.ca_nlink = vnodetype == VDIR ? 2 : 1;
-       attr.ca_mtime = time.tv_sec;
-       attr.ca_mtime_nsec = time.tv_usec * 1000;
-       if ((VTOVCB(dvp)->vcbSigWord == kHFSSigWord) && gTimeZone.tz_dsttime) {
-               attr.ca_mtime += 3600;  /* Same as what hfs_update does */
-       }
-       attr.ca_atime = attr.ca_ctime = attr.ca_itime = attr.ca_mtime;
-       if (VTOVFS(dvp)->mnt_flag & MNT_UNKNOWNPERMISSIONS) {
-               attr.ca_uid = hfsmp->hfs_uid;
-               attr.ca_gid = hfsmp->hfs_gid;
+       attr.ca_linkcount = 1;
+       if (VATTR_IS_ACTIVE(vap, va_rdev)) {
+               attr.ca_rdev = vap->va_rdev;
+       }
+       if (VATTR_IS_ACTIVE(vap, va_create_time)) {
+               VATTR_SET_SUPPORTED(vap, va_create_time);
+               attr.ca_itime = vap->va_create_time.tv_sec;
         } else {
-               if (vnodetype == VLNK)
-                       attr.ca_uid = dcp->c_uid;
-               else
-                       attr.ca_uid = cnp->cn_cred->cr_uid;
-               attr.ca_gid = dcp->c_gid;
+               attr.ca_itime = tv.tv_sec;
         }
-       /*
-        * Don't tag as a special file (BLK or CHR) until *after*
-        * hfs_getnewvnode is called.  This insures that any
-        * alias checking is defered until hfs_mknod completes.
+       if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) {
+               attr.ca_itime += 3600;  /* Same as what hfs_update does */
+       }
+       attr.ca_atime = attr.ca_ctime = attr.ca_mtime = attr.ca_itime;
+       attr.ca_atimeondisk = attr.ca_atime;
+       if (VATTR_IS_ACTIVE(vap, va_flags)) {
+               VATTR_SET_SUPPORTED(vap, va_flags);
+               attr.ca_flags = vap->va_flags;
+       }
+       
+       /* 
+        * HFS+ only: all files get ThreadExists
+        * HFSX only: dirs get HasFolderCount
          */
-       if (vnodetype == VBLK || vnodetype == VCHR)
-               attr.ca_mode = (attr.ca_mode & ~S_IFMT) | S_IFREG;
+       if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
+               if (vnodetype == VDIR) {
+                       if (hfsmp->hfs_flags & HFS_FOLDERCOUNT)
+                               attr.ca_recflags = kHFSHasFolderCountMask;
+               } else {
+                       attr.ca_recflags = kHFSThreadExistsMask;
+               }
+       }
+
+       attr.ca_uid = vap->va_uid;
+       attr.ca_gid = vap->va_gid;
+       VATTR_SET_SUPPORTED(vap, va_mode);
+       VATTR_SET_SUPPORTED(vap, va_uid);
+       VATTR_SET_SUPPORTED(vap, va_gid);
  
         /* Tag symlinks with a type and creator. */
         if (vnodetype == VLNK) {
@@ -2887,134 +3516,304 @@ hfs_makenode(mode, dvp, vpp, cnp)
                 fip->fdType    = SWAP_BE32(kSymLinkFileType);
                 fip->fdCreator = SWAP_BE32(kSymLinkCreator);
         }
-       if ((attr.ca_mode & S_ISGID) &&
-           !groupmember(dcp->c_gid, cnp->cn_cred) &&
-           suser(cnp->cn_cred, NULL)) {
-               attr.ca_mode &= ~S_ISGID;
-       }
         if (cnp->cn_flags & ISWHITEOUT)
                 attr.ca_flags |= UF_OPAQUE;
  
         /* Setup the descriptor */
-       bzero(&in_desc, sizeof(in_desc));
-       in_desc.cd_nameptr = cnp->cn_nameptr;
+       in_desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
         in_desc.cd_namelen = cnp->cn_namelen;
-       in_desc.cd_parentcnid = dcp->c_cnid;
+       in_desc.cd_parentcnid = dcp->c_fileid;
         in_desc.cd_flags = S_ISDIR(mode) ? CD_ISDIR : 0;
+       in_desc.cd_hint = dcp->c_childhint;
+       in_desc.cd_encoding = 0;
  
-       /* Lock catalog b-tree */
-       error = hfs_metafilelocking(VTOHFS(dvp), kHFSCatalogFileID, LK_EXCLUSIVE, p);
-       if (error)
-               goto exit;
+       if ((error = hfs_start_transaction(hfsmp)) != 0) {
+           goto exit;
+       }
+       started_tr = 1;
  
-       error = cat_create(hfsmp, &in_desc, &attr, &out_desc);
+       // have to also lock the attribute file because cat_create() needs
+       // to check that any fileID it wants to use does not have orphaned
+       // attributes in it.
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
  
-       /* Unlock catalog b-tree */
-       (void) hfs_metafilelocking(VTOHFS(dvp), kHFSCatalogFileID, LK_RELEASE, p);              
+       /* Reserve some space in the Catalog file. */
+       if ((error = cat_preflight(hfsmp, CAT_CREATE, NULL, 0))) {
+               hfs_systemfile_unlock(hfsmp, lockflags);
+               goto exit;
+       }
+       error = cat_create(hfsmp, &in_desc, &attr, &out_desc);
+       if (error == 0) {
+               /* Update the parent directory */
+               dcp->c_childhint = out_desc.cd_hint;    /* Cache directory's location */
+               dcp->c_entries++;
+               if (vnodetype == VDIR) {
+                       INC_FOLDERCOUNT(hfsmp, dcp->c_attr);
+               }
+               dcp->c_dirchangecnt++;
+               dcp->c_ctime = tv.tv_sec;
+               dcp->c_mtime = tv.tv_sec;
+               (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+               HFS_KNOTE(dvp, NOTE_ATTRIB);
+       }
+       hfs_systemfile_unlock(hfsmp, lockflags);
         if (error)
                 goto exit;
         
-       /* Update the parent directory */
-       dcp->c_childhint = out_desc.cd_hint;    /* Cache directory's location */
-       dcp->c_nlink++;
-       dcp->c_entries++;
-       dcp->c_flag |= C_CHANGE | C_UPDATE;
-       tv = time;
-       (void) VOP_UPDATE(dvp, &tv, &tv, 0);
+       /* Invalidate negative cache entries in the directory */
+       if (dcp->c_flag & C_NEG_ENTRIES) {
+               cache_purge_negatives(dvp);
+               dcp->c_flag &= ~C_NEG_ENTRIES;
+       }
+
+       if (vnodetype == VDIR) {
+               HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
+       } else {
+               HFS_KNOTE(dvp, NOTE_WRITE);
+       };
+
         hfs_volupdate(hfsmp, vnodetype == VDIR ? VOL_MKDIR : VOL_MKFILE,
                 (dcp->c_cnid == kHFSRootFolderID));
  
-       /* Create a vnode for the object just created: */
-       error = hfs_getnewvnode(hfsmp, NULL, &out_desc, 0, &attr, NULL, &tvp);
+       // XXXdbg
+       // have to end the transaction here before we call hfs_getnewvnode()
+       // because that can cause us to try and reclaim a vnode on a different
+       // file system which could cause us to start a transaction which can
+       // deadlock with someone on that other file system (since we could be
+       // holding two transaction locks as well as various vnodes and we did
+       // not obtain the locks on them in the proper order).
+       //
+       // NOTE: this means that if the quota check fails or we have to update
+       //       the change time on a block-special device that those changes
+       //       will happen as part of independent transactions.
+       //
+       if (started_tr) {
+           hfs_end_transaction(hfsmp);
+           started_tr = 0;
+       }
+
+       /* Do not create vnode for whiteouts */
+       if (S_ISWHT(mode)) {
+               goto exit;
+       }
+
+       /*
+        * Create a vnode for the object just created.
+        *
+        * The cnode is locked on successful return.
+        */
+       error = hfs_getnewvnode(hfsmp, dvp, cnp, &out_desc, GNV_CREATE, &attr, NULL, &tvp);
         if (error)
                 goto exit;
  
-#if QUOTA
         cp = VTOC(tvp);
+#if QUOTA
         /* 
          * We call hfs_chkiq with FORCE flag so that if we
          * fall through to the rmdir we actually have 
          * accounted for the inode
         */
-       if ((error = hfs_getinoquota(cp)) ||
-           (error = hfs_chkiq(cp, 1, cnp->cn_cred, FORCE))) {
-               if ((cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) {
-                       FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
+       if (hfsmp->hfs_flags & HFS_QUOTAS) {
+               if ((error = hfs_getinoquota(cp)) ||
+                   (error = hfs_chkiq(cp, 1, vfs_context_ucred(ctx), FORCE))) {
+       
+                       if (vnode_isdir(tvp))
+                               (void) hfs_removedir(dvp, tvp, cnp, 0);
+                       else {
+                               hfs_unlock(cp);
+                               hfs_lock_truncate(cp, TRUE);
+                               hfs_lock(cp, HFS_FORCE_LOCK);
+                               (void) hfs_removefile(dvp, tvp, cnp, 0, 0, 0);
+                               hfs_unlock_truncate(cp, TRUE);
+                       }
+                       /*
+                        * we successfully allocated a new vnode, but
+                        * the quota check is telling us we're beyond
+                        * our limit, so we need to dump our lock + reference
+                        */
+                       hfs_unlock(cp);
+                       vnode_put(tvp);
+       
+                       goto exit;
                 }
-               if (tvp->v_type == VDIR)
-                       VOP_RMDIR(dvp,tvp, cnp);
-               else
-                       VOP_REMOVE(dvp,tvp, cnp);
-               return (error);
         }
  #endif /* QUOTA */
  
+       *vpp = tvp;
+exit:
+       cat_releasedesc(&out_desc);
+
         /*
-        * restore vtype and mode for VBLK and VCHR
+        * Check if a file is located in the "Cleanup At Startup"
+        * directory.  If it is then tag it as NODUMP so that we
+        * can be lazy about zero filling data holes.
          */
-       if (vnodetype == VBLK || vnodetype == VCHR) {
-               struct cnode *cp;
+       if ((error == 0) && dvp && (vnodetype == VREG) &&
+           (dcp->c_desc.cd_nameptr != NULL) &&
+           (strncmp((const char *)dcp->c_desc.cd_nameptr,
+                    CARBON_TEMP_DIR_NAME,
+                    sizeof(CARBON_TEMP_DIR_NAME)) == 0)) {
+               struct vnode *ddvp;
  
-               cp = VTOC(tvp);
-               cp->c_mode = mode;
-               tvp->v_type = IFTOVT(mode);
-               cp->c_flag |= C_CHANGE;
-               tv = time;
-               if ((error = VOP_UPDATE(tvp, &tv, &tv, 1))) {
-                       vput(tvp);
-                       goto exit;
-               }
-       }
+               dcp->c_flag &= ~C_DIR_MODIFICATION;
+               wakeup((caddr_t)&dcp->c_flag);
  
-       *vpp = tvp;
-exit:
-       cat_releasedesc(&out_desc);
+               hfs_unlock(dcp);
+               dvp = NULL;
  
-       if ((cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
-               FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
-       vput(dvp);
+               /*
+                * The parent of "Cleanup At Startup" should
+                * have the ASCII name of the userid.
+                */
+               if (hfs_vget(hfsmp, dcp->c_parentcnid, &ddvp, 0) == 0) {
+                       if (VTOC(ddvp)->c_desc.cd_nameptr) {
+                               uid_t uid;
+
+                               uid = strtoul((const char *)VTOC(ddvp)->c_desc.cd_nameptr, 0, 0);
+                               if ((uid == cp->c_uid) ||
+                                   (uid == vfs_context_ucred(ctx)->cr_uid)) {
+                                       cp->c_flags |= UF_NODUMP;
+                                       cp->c_touch_chgtime = TRUE;
+                               }
+                       }
+                       hfs_unlock(VTOC(ddvp));
+                       vnode_put(ddvp);
+               }
+       }
+       if (dvp) {
+               dcp->c_flag &= ~C_DIR_MODIFICATION;
+               wakeup((caddr_t)&dcp->c_flag);
+               
+               hfs_unlock(dcp);
+       }
+       if (error == 0 && cp != NULL) {
+               hfs_unlock(cp);
+       }
+       if (started_tr) {
+           hfs_end_transaction(hfsmp);
+           started_tr = 0;
+       }
  
         return (error);
  }
  
  
-static int
-hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, struct proc *p)
+/*
+ * Return a referenced vnode for the resource fork
+ *
+ * cnode for vnode vp must already be locked.
+ *
+ * can_drop_lock is true if its safe to temporally drop/re-acquire the cnode lock
+ */
+__private_extern__
+int
+hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int can_drop_lock)
  {
         struct vnode *rvp;
+       struct vnode *dvp = NULLVP;
         struct cnode *cp = VTOC(vp);
         int error;
+       int vid;
  
+restart:
+       /* Attempt to use exising vnode */
         if ((rvp = cp->c_rsrc_vp)) {
-               /* Use exising vnode */
-               error = vget(rvp, 0, p);
+               vid = vnode_vid(rvp);
+
+               /*
+                * It is not safe to hold the cnode lock when calling vnode_getwithvid()
+                * for the alternate fork -- vnode_getwithvid() could deadlock waiting
+                * for a VL_WANTTERM while another thread has an iocount on the alternate
+                * fork vnode and is attempting to acquire the common cnode lock.
+                *
+                * But it's also not safe to drop the cnode lock when we're holding
+                * multiple cnode locks, like during a hfs_removefile() operation
+                * since we could lock out of order when re-acquiring the cnode lock.
+                *
+                * So we can only drop the lock here if its safe to drop it -- which is
+                * most of the time with the exception being hfs_removefile().
+                */
+               if (can_drop_lock)
+                       hfs_unlock(cp);
+
+               error = vnode_getwithvid(rvp, vid);
+
+               if (can_drop_lock) {
+                       (void) hfs_lock(cp, HFS_FORCE_LOCK);
+                       /*
+                        * When our lock was relinquished, the resource fork
+                        * could have been recycled.  Check for this and try
+                        * again.
+                        */
+                       if (error == ENOENT)
+                               goto restart;
+               }
                 if (error) {
-                       char * name = VTOC(vp)->c_desc.cd_nameptr;
+                       const char * name = (const char *)VTOC(vp)->c_desc.cd_nameptr;
  
                         if (name)
-                               printf("hfs_vgetrsrc: couldn't get"
-                                       " resource fork for %s\n", name);
+                               printf("hfs_vgetrsrc: couldn't get resource"
+                                      " fork for %s, err %d\n", name, error);
                         return (error);
                 }
         } else {
                 struct cat_fork rsrcfork;
+               struct componentname cn;
+               int lockflags;
  
-               /* Lock catalog b-tree */
-               error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p);
-               if (error)
-                       return (error);
+               /*
+                * Make sure cnode lock is exclusive, if not upgrade it.
+                *
+                * We assume that we were called from a read-only VNOP (getattr)
+                * and that its safe to have the cnode lock dropped and reacquired.
+                */
+               if (cp->c_lockowner != current_thread()) {
+                       if (!can_drop_lock)
+                               return (EINVAL);
+                       /*
+                        * If the upgrade fails we loose the lock and
+                        * have to take the exclusive lock on our own.
+                        */
+                       if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE)
+                               lck_rw_lock_exclusive(&cp->c_rwlock);
+                       cp->c_lockowner = current_thread();
+               }
+
+               lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
  
                 /* Get resource fork data */
                 error = cat_lookup(hfsmp, &cp->c_desc, 1, (struct cat_desc *)0,
-                               (struct cat_attr *)0, &rsrcfork);
+                               (struct cat_attr *)0, &rsrcfork, NULL);
  
-               /* Unlock the Catalog */
-               (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
+               hfs_systemfile_unlock(hfsmp, lockflags);
                 if (error)
                         return (error);
                 
-               error = hfs_getnewvnode(hfsmp, cp, &cp->c_desc, 1, &cp->c_attr,
-                                       &rsrcfork, &rvp);
+               /*
+                * Supply hfs_getnewvnode with a component name. 
+                */
+               cn.cn_pnbuf = NULL;
+               if (cp->c_desc.cd_nameptr) {
+                       MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
+                       cn.cn_nameiop = LOOKUP;
+                       cn.cn_flags = ISLASTCN | HASBUF;
+                       cn.cn_context = NULL;
+                       cn.cn_pnlen = MAXPATHLEN;
+                       cn.cn_nameptr = cn.cn_pnbuf;
+                       cn.cn_hash = 0;
+                       cn.cn_consume = 0;
+                       cn.cn_namelen = snprintf(cn.cn_nameptr, MAXPATHLEN,
+                                                "%s%s", cp->c_desc.cd_nameptr,
+                                                _PATH_RSRCFORKSPEC);
+               }
+               dvp = vnode_getparent(vp);
+               error = hfs_getnewvnode(hfsmp, dvp, cn.cn_pnbuf ? &cn : NULL,
+                                       &cp->c_desc, GNV_WANTRSRC | GNV_SKIPLOCK, &cp->c_attr,
+                                       &rsrcfork, &rvp);
+               if (dvp)
+                       vnode_put(dvp);
+               if (cn.cn_pnbuf)
+                       FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
                 if (error)
                         return (error);
         }
@@ -3024,23 +3823,205 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, stru
  }
  
  
+static void
+filt_hfsdetach(struct knote *kn)
+{
+       struct vnode *vp;
+       
+       vp = (struct vnode *)kn->kn_hook;
+       if (vnode_getwithvid(vp, kn->kn_hookid))
+               return;
+
+       if (1) {  /* ! KNDETACH_VNLOCKED */
+               if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) {
+                       (void) KNOTE_DETACH(&VTOC(vp)->c_knotes, kn);
+                       hfs_unlock(VTOC(vp));
+               }
+       }
+
+       vnode_put(vp);
+}
+
+/*ARGSUSED*/
+static int
+filt_hfsread(struct knote *kn, long hint)
+{
+       struct vnode *vp = (struct vnode *)kn->kn_hook;
+       int dropvp = 0;
+
+       if (hint == 0)  {
+               if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) {
+                       hint = NOTE_REVOKE;
+               } else 
+                       dropvp = 1;
+       }
+       if (hint == NOTE_REVOKE) {
+               /*
+                * filesystem is gone, so set the EOF flag and schedule 
+                * the knote for deletion.
+                */
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+               return (1);
+       }
+
+       /* poll(2) semantics dictate always saying there is data */
+       if (!(kn->kn_flags & EV_POLL)) {
+               off_t amount;
+ 
+               amount = VTOF(vp)->ff_size - kn->kn_fp->f_fglob->fg_offset;
+               if (amount > (off_t)INTPTR_MAX)
+                       kn->kn_data = INTPTR_MAX;
+               else if (amount < (off_t)INTPTR_MIN)
+                       kn->kn_data = INTPTR_MIN;
+               else
+                       kn->kn_data = (intptr_t)amount;
+       } else {
+               kn->kn_data = 1;
+       }
+
+       if  (dropvp)
+               vnode_put(vp);
+
+       return (kn->kn_data != 0);
+}
+
+/*ARGSUSED*/
+static int
+filt_hfswrite(struct knote *kn, long hint)
+{
+       struct vnode *vp = (struct vnode *)kn->kn_hook;
+
+       if (hint == 0)  {
+               if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) {
+                       hint = NOTE_REVOKE;
+               } else 
+                       vnode_put(vp);
+       }
+       if (hint == NOTE_REVOKE) {
+               /*
+                * filesystem is gone, so set the EOF flag and schedule 
+                * the knote for deletion.
+                */
+               kn->kn_data = 0;
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+               return (1);
+       }
+       kn->kn_data = 0;
+       return (1);
+}
+
+static int
+filt_hfsvnode(struct knote *kn, long hint)
+{
+       struct vnode *vp = (struct vnode *)kn->kn_hook;
+
+       if (hint == 0)  {
+               if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) {
+                       hint = NOTE_REVOKE;
+               } else
+                       vnode_put(vp);
+       }
+       if (kn->kn_sfflags & hint)
+               kn->kn_fflags |= hint;
+       if ((hint == NOTE_REVOKE)) {
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+               return (1);
+       }
+       
+       return (kn->kn_fflags != 0);
+}
+
+static struct filterops hfsread_filtops = 
+       { 1, NULL, filt_hfsdetach, filt_hfsread };
+static struct filterops hfswrite_filtops = 
+       { 1, NULL, filt_hfsdetach, filt_hfswrite };
+static struct filterops hfsvnode_filtops = 
+       { 1, NULL, filt_hfsdetach, filt_hfsvnode };
+
+/*
+ * Add a kqueue filter.
+ */
+static int
+hfs_vnop_kqfiltadd(
+       struct vnop_kqfilt_add_args /* {
+               struct vnode *a_vp;
+               struct knote *a_kn;
+               struct proc *p;
+               vfs_context_t a_context;
+       } */ *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct knote *kn = ap->a_kn;
+       int error;
+
+       switch (kn->kn_filter) {
+       case EVFILT_READ:
+               if (vnode_isreg(vp)) {
+                       kn->kn_fop = &hfsread_filtops;
+               } else {
+                       return EINVAL;
+               };
+               break;
+       case EVFILT_WRITE:
+               if (vnode_isreg(vp)) {
+                       kn->kn_fop = &hfswrite_filtops;
+               } else {
+                       return EINVAL;
+               };
+               break;
+       case EVFILT_VNODE:
+               kn->kn_fop = &hfsvnode_filtops;
+               break;
+       default:
+               return (1);
+       }
+
+       kn->kn_hook = (caddr_t)vp;
+       kn->kn_hookid = vnode_vid(vp);
+
+       if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
+               return (error);
+       KNOTE_ATTACH(&VTOC(vp)->c_knotes, kn);
+       hfs_unlock(VTOC(vp));
+
+       return (0);
+}
+
+/*
+ * Remove a kqueue filter
+ */
+static int
+hfs_vnop_kqfiltremove(ap)
+       struct vnop_kqfilt_remove_args /* {
+               struct vnode *a_vp;
+               uintptr_t ident;
+               vfs_context_t a_context;
+       } */__unused *ap;
+{
+       int result;
+
+       result = ENOTSUP; /* XXX */
+       
+       return (result);
+}
+
  /*
   * Wrapper for special device reads
   */
  static int
  hfsspec_read(ap)
-       struct vop_read_args /* {
+       struct vnop_read_args /* {
                 struct vnode *a_vp;
                 struct uio *a_uio;
                 int  a_ioflag;
-               struct ucred *a_cred;
+               vfs_context_t a_context;
         } */ *ap;
  {
         /*
          * Set access flag.
          */
-       VTOC(ap->a_vp)->c_flag |= C_ACCESS;
-       return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap));
+       VTOC(ap->a_vp)->c_touch_acctime = TRUE;
+       return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_read), ap));
  }
  
  /*
@@ -3048,18 +4029,19 @@ hfsspec_read(ap)
   */
  static int
  hfsspec_write(ap)
-       struct vop_write_args /* {
+       struct vnop_write_args /* {
                 struct vnode *a_vp;
                 struct uio *a_uio;
                 int  a_ioflag;
-               struct ucred *a_cred;
+               vfs_context_t a_context;
         } */ *ap;
  {
         /*
          * Set update and change flags.
          */
-       VTOC(ap->a_vp)->c_flag |= C_CHANGE | C_UPDATE;
-       return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap));
+       VTOC(ap->a_vp)->c_touch_chgtime = TRUE;
+       VTOC(ap->a_vp)->c_touch_modtime = TRUE;
+       return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_write), ap));
  }
  
  /*
@@ -3069,21 +4051,23 @@ hfsspec_write(ap)
   */
  static int
  hfsspec_close(ap)
-       struct vop_close_args /* {
+       struct vnop_close_args /* {
                 struct vnode *a_vp;
                 int  a_fflag;
-               struct ucred *a_cred;
-               struct proc *a_p;
+               vfs_context_t a_context;
         } */ *ap;
  {
         struct vnode *vp = ap->a_vp;
-       struct cnode *cp = VTOC(vp);
+       struct cnode *cp;
  
-       simple_lock(&vp->v_interlock);
-       if (ap->a_vp->v_usecount > 1)
-               CTIMES(cp, &time, &time);
-       simple_unlock(&vp->v_interlock);
-       return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
+       if (vnode_isinuse(ap->a_vp, 1)) {
+               if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) {
+                       cp = VTOC(vp);
+                       hfs_touchtimes(VTOHFS(vp), cp);
+                       hfs_unlock(cp);
+               }
+       }
+       return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_close), ap));
  }
  
  #if FIFO
@@ -3092,20 +4076,18 @@ hfsspec_close(ap)
   */
  static int
  hfsfifo_read(ap)
-       struct vop_read_args /* {
+       struct vnop_read_args /* {
                 struct vnode *a_vp;
                 struct uio *a_uio;
                 int  a_ioflag;
-               struct ucred *a_cred;
+               vfs_context_t a_context;
         } */ *ap;
  {
-       extern int (**fifo_vnodeop_p)(void *);
-
         /*
          * Set access flag.
          */
-       VTOC(ap->a_vp)->c_flag |= C_ACCESS;
-       return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap));
+       VTOC(ap->a_vp)->c_touch_acctime = TRUE;
+       return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_read), ap));
  }
  
  /*
@@ -3113,20 +4095,19 @@ hfsfifo_read(ap)
   */
  static int
  hfsfifo_write(ap)
-       struct vop_write_args /* {
+       struct vnop_write_args /* {
                 struct vnode *a_vp;
                 struct uio *a_uio;
                 int  a_ioflag;
-               struct ucred *a_cred;
+               vfs_context_t a_context;
         } */ *ap;
  {
-       extern int (**fifo_vnodeop_p)(void *);
-
         /*
          * Set update and change flags.
          */
-       VTOC(ap->a_vp)->c_flag |= C_CHANGE | C_UPDATE;
-       return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap));
+       VTOC(ap->a_vp)->c_touch_chgtime = TRUE;
+       VTOC(ap->a_vp)->c_touch_modtime = TRUE;
+       return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_write), ap));
  }
  
  /*
@@ -3136,112 +4117,213 @@ hfsfifo_write(ap)
   */
  static int
  hfsfifo_close(ap)
-       struct vop_close_args /* {
+       struct vnop_close_args /* {
                 struct vnode *a_vp;
                 int  a_fflag;
-               struct ucred *a_cred;
-               struct proc *a_p;
+               vfs_context_t a_context;
         } */ *ap;
  {
-       extern int (**fifo_vnodeop_p)(void *);
         struct vnode *vp = ap->a_vp;
-       struct cnode *cp = VTOC(vp);
+       struct cnode *cp;
+
+       if (vnode_isinuse(ap->a_vp, 1)) {
+               if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) {
+                       cp = VTOC(vp);
+                       hfs_touchtimes(VTOHFS(vp), cp);
+                       hfs_unlock(cp);
+               }
+       }
+       return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_close), ap));
+}
+
+/*
+ * kqfilt_add wrapper for fifos.
+ *
+ * Fall through to hfs kqfilt_add routines if needed 
+ */
+int
+hfsfifo_kqfilt_add(ap)
+       struct vnop_kqfilt_add_args *ap;
+{
+       int error;
  
-       simple_lock(&vp->v_interlock);
-       if (ap->a_vp->v_usecount > 1)
-               CTIMES(cp, &time, &time);
-       simple_unlock(&vp->v_interlock);
-       return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
+       error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_add), ap);
+       if (error)
+               error = hfs_vnop_kqfiltadd(ap);
+       return (error);
+}
+
+/*
+ * kqfilt_remove wrapper for fifos.
+ *
+ * Fall through to hfs kqfilt_remove routines if needed 
+ */
+int
+hfsfifo_kqfilt_remove(ap)
+       struct vnop_kqfilt_remove_args *ap;
+{
+       int error;
+
+       error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_remove), ap);
+       if (error)
+               error = hfs_vnop_kqfiltremove(ap);
+       return (error);
  }
+
  #endif /* FIFO */
  
+/*
+ * Synchronize a file's in-core state with that on disk.
+ */
+static int
+hfs_vnop_fsync(ap)
+       struct vnop_fsync_args /* {
+               struct vnode *a_vp;
+               int a_waitfor;
+               vfs_context_t a_context;
+       } */ *ap;
+{
+       struct vnode* vp = ap->a_vp;
+       int error;
+
+       /*
+        * We need to allow ENOENT lock errors since unlink
+        * systenm call can call VNOP_FSYNC during vclean.
+        */
+       error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
+       if (error)
+               return (0);
+
+       error = hfs_fsync(vp, ap->a_waitfor, 0, vfs_context_proc(ap->a_context));
  
-/*****************************************************************************
-*
-*      VOP Tables
-*
-*****************************************************************************/
-int hfs_cache_lookup();        /* in hfs_lookup.c */
-int hfs_lookup();      /* in hfs_lookup.c */
-int hfs_read();                /* in hfs_readwrite.c */
-int hfs_write();       /* in hfs_readwrite.c */
-int hfs_ioctl();       /* in hfs_readwrite.c */
-int hfs_select();      /* in hfs_readwrite.c */
-int hfs_bmap();                /* in hfs_readwrite.c */
-int hfs_strategy();    /* in hfs_readwrite.c */
-int hfs_truncate();    /* in hfs_readwrite.c */
-int hfs_allocate();    /* in hfs_readwrite.c */
-int hfs_pagein();      /* in hfs_readwrite.c */
-int hfs_pageout();     /* in hfs_readwrite.c */
-int hfs_search();      /* in hfs_search.c */
-int hfs_bwrite();      /* in hfs_readwrite.c */
-int hfs_link();                /* in hfs_link.c */
-int hfs_blktooff();    /* in hfs_readwrite.c */
-int hfs_offtoblk();    /* in hfs_readwrite.c */
-int hfs_cmap();                /* in hfs_readwrite.c */
-int hfs_getattrlist(); /* in hfs_attrlist.c */
-int hfs_setattrlist(); /* in hfs_attrlist.c */
-int hfs_readdirattr(); /* in hfs_attrlist.c */
-int hfs_inactive();    /* in hfs_cnode.c */
-int hfs_reclaim();     /* in hfs_cnode.c */
+       hfs_unlock(VTOC(vp));
+       return (error);
+}
+
+
+static int
+hfs_vnop_whiteout(ap) 
+       struct vnop_whiteout_args /* {
+               struct vnode *a_dvp;
+               struct componentname *a_cnp;
+               int a_flags;
+               vfs_context_t a_context;
+       } */ *ap;
+{
+       int error = 0;
+       struct vnode *vp = NULL;
+       struct vnode_attr va;
+       struct vnop_lookup_args lookup_args;
+       struct vnop_remove_args remove_args;
+       struct hfsmount *hfsmp;
+
+       hfsmp = VTOHFS(ap->a_dvp);
+       if (hfsmp->hfs_flags & HFS_STANDARD) {
+               error = ENOTSUP;
+               goto exit;
+       }
+
+       switch (ap->a_flags) {
+               case LOOKUP:
+                       error = 0;
+                       break;
+
+               case CREATE: 
+                       VATTR_INIT(&va);
+                       VATTR_SET(&va, va_type, VREG);
+                       VATTR_SET(&va, va_mode, S_IFWHT);
+                       VATTR_SET(&va, va_uid, 0);
+                       VATTR_SET(&va, va_gid, 0);
+                       
+                       error = hfs_makenode(ap->a_dvp, &vp, ap->a_cnp, &va, ap->a_context);
+                       /* No need to release the vnode as no vnode is created for whiteouts */
+                       break;
+
+               case DELETE:
+                       lookup_args.a_dvp = ap->a_dvp;
+                       lookup_args.a_vpp = &vp;
+                       lookup_args.a_cnp = ap->a_cnp;
+                       lookup_args.a_context = ap->a_context;
+
+                       error = hfs_vnop_lookup(&lookup_args);
+                       if (error) {
+                               break;
+                       }
+                       
+                       remove_args.a_dvp = ap->a_dvp;
+                       remove_args.a_vp = vp;
+                       remove_args.a_cnp = ap->a_cnp;
+                       remove_args.a_flags = 0;
+                       remove_args.a_context = ap->a_context;
+
+                       error = hfs_vnop_remove(&remove_args);
+                       vnode_put(vp);
+                       break;
+
+               default:
+                       panic("hfs_vnop_whiteout: unknown operation (flag = %x)\n", ap->a_flags);
+       };
+       
+exit:
+       return (error);
+}
  
  int (**hfs_vnodeop_p)(void *);
  
  #define VOPFUNC int (*)(void *)
  
  struct vnodeopv_entry_desc hfs_vnodeop_entries[] = {
-    { &vop_default_desc, (VOPFUNC)vn_default_error },
-    { &vop_lookup_desc, (VOPFUNC)hfs_cache_lookup },           /* lookup */
-    { &vop_create_desc, (VOPFUNC)hfs_create },                 /* create */
-    { &vop_mknod_desc, (VOPFUNC)hfs_mknod },                   /* mknod */
-    { &vop_open_desc, (VOPFUNC)hfs_open },                     /* open */
-    { &vop_close_desc, (VOPFUNC)hfs_close },                   /* close */
-    { &vop_access_desc, (VOPFUNC)hfs_access },                 /* access */
-    { &vop_getattr_desc, (VOPFUNC)hfs_getattr },               /* getattr */
-    { &vop_setattr_desc, (VOPFUNC)hfs_setattr },               /* setattr */
-    { &vop_read_desc, (VOPFUNC)hfs_read },                     /* read */
-    { &vop_write_desc, (VOPFUNC)hfs_write },                   /* write */
-    { &vop_ioctl_desc, (VOPFUNC)hfs_ioctl },                   /* ioctl */
-    { &vop_select_desc, (VOPFUNC)hfs_select },                 /* select */
-    { &vop_exchange_desc, (VOPFUNC)hfs_exchange },             /* exchange */
-    { &vop_mmap_desc, (VOPFUNC)err_mmap },                     /* mmap */
-    { &vop_fsync_desc, (VOPFUNC)hfs_fsync },                   /* fsync */
-    { &vop_seek_desc, (VOPFUNC)nop_seek },                     /* seek */
-    { &vop_remove_desc, (VOPFUNC)hfs_remove },                 /* remove */
-    { &vop_link_desc, (VOPFUNC)hfs_link },                     /* link */
-    { &vop_rename_desc, (VOPFUNC)hfs_rename },                 /* rename */
-    { &vop_mkdir_desc, (VOPFUNC)hfs_mkdir },                   /* mkdir */
-    { &vop_rmdir_desc, (VOPFUNC)hfs_rmdir },                   /* rmdir */
-    { &vop_mkcomplex_desc, (VOPFUNC)err_mkcomplex },           /* mkcomplex */
-    { &vop_getattrlist_desc, (VOPFUNC)hfs_getattrlist },  /* getattrlist */
-    { &vop_setattrlist_desc, (VOPFUNC)hfs_setattrlist },  /* setattrlist */
-    { &vop_symlink_desc, (VOPFUNC)hfs_symlink },               /* symlink */
-    { &vop_readdir_desc, (VOPFUNC)hfs_readdir },               /* readdir */
-    { &vop_readdirattr_desc, (VOPFUNC)hfs_readdirattr },  /* readdirattr */
-    { &vop_readlink_desc, (VOPFUNC)hfs_readlink },             /* readlink */
-    { &vop_abortop_desc, (VOPFUNC)hfs_abortop },               /* abortop */
-    { &vop_inactive_desc, (VOPFUNC)hfs_inactive },             /* inactive */
-    { &vop_reclaim_desc, (VOPFUNC)hfs_reclaim },               /* reclaim */
-    { &vop_lock_desc, (VOPFUNC)hfs_lock },                     /* lock */
-    { &vop_unlock_desc, (VOPFUNC)hfs_unlock },                 /* unlock */
-    { &vop_bmap_desc, (VOPFUNC)hfs_bmap },                     /* bmap */
-    { &vop_strategy_desc, (VOPFUNC)hfs_strategy },             /* strategy */
-    { &vop_print_desc, (VOPFUNC)hfs_print },                   /* print */
-    { &vop_islocked_desc, (VOPFUNC)hfs_islocked },             /* islocked */
-    { &vop_pathconf_desc, (VOPFUNC)hfs_pathconf },             /* pathconf */
-    { &vop_advlock_desc, (VOPFUNC)hfs_advlock },               /* advlock */
-    { &vop_reallocblks_desc, (VOPFUNC)err_reallocblks },  /* reallocblks */
-    { &vop_truncate_desc, (VOPFUNC)hfs_truncate },             /* truncate */
-    { &vop_allocate_desc, (VOPFUNC)hfs_allocate },             /* allocate */
-    { &vop_update_desc, (VOPFUNC)hfs_update },                 /* update */
-    { &vop_searchfs_desc, (VOPFUNC)hfs_search },               /* search fs */
-    { &vop_bwrite_desc, (VOPFUNC)hfs_bwrite },                 /* bwrite */
-    { &vop_pagein_desc, (VOPFUNC)hfs_pagein },                 /* pagein */
-    { &vop_pageout_desc,(VOPFUNC) hfs_pageout },               /* pageout */
-    { &vop_copyfile_desc, (VOPFUNC)err_copyfile },             /* copyfile */
-    { &vop_blktooff_desc, (VOPFUNC)hfs_blktooff },             /* blktooff */
-    { &vop_offtoblk_desc, (VOPFUNC)hfs_offtoblk },             /* offtoblk */
-    { &vop_cmap_desc, (VOPFUNC)hfs_cmap },                     /* cmap */
+    { &vnop_default_desc, (VOPFUNC)vn_default_error },
+    { &vnop_lookup_desc, (VOPFUNC)hfs_vnop_lookup },           /* lookup */
+    { &vnop_create_desc, (VOPFUNC)hfs_vnop_create },           /* create */
+    { &vnop_mknod_desc, (VOPFUNC)hfs_vnop_mknod },             /* mknod */
+    { &vnop_open_desc, (VOPFUNC)hfs_vnop_open },                       /* open */
+    { &vnop_close_desc, (VOPFUNC)hfs_vnop_close },             /* close */
+    { &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr },         /* getattr */
+    { &vnop_setattr_desc, (VOPFUNC)hfs_vnop_setattr },         /* setattr */
+    { &vnop_read_desc, (VOPFUNC)hfs_vnop_read },                       /* read */
+    { &vnop_write_desc, (VOPFUNC)hfs_vnop_write },             /* write */
+    { &vnop_ioctl_desc, (VOPFUNC)hfs_vnop_ioctl },             /* ioctl */
+    { &vnop_select_desc, (VOPFUNC)hfs_vnop_select },           /* select */
+    { &vnop_revoke_desc, (VOPFUNC)nop_revoke },                        /* revoke */
+    { &vnop_exchange_desc, (VOPFUNC)hfs_vnop_exchange },               /* exchange */
+    { &vnop_mmap_desc, (VOPFUNC)err_mmap },                    /* mmap */
+    { &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync },             /* fsync */
+    { &vnop_remove_desc, (VOPFUNC)hfs_vnop_remove },           /* remove */
+    { &vnop_link_desc, (VOPFUNC)hfs_vnop_link },                       /* link */
+    { &vnop_rename_desc, (VOPFUNC)hfs_vnop_rename },           /* rename */
+    { &vnop_mkdir_desc, (VOPFUNC)hfs_vnop_mkdir },             /* mkdir */
+    { &vnop_rmdir_desc, (VOPFUNC)hfs_vnop_rmdir },             /* rmdir */
+    { &vnop_symlink_desc, (VOPFUNC)hfs_vnop_symlink },         /* symlink */
+    { &vnop_readdir_desc, (VOPFUNC)hfs_vnop_readdir },         /* readdir */
+    { &vnop_readdirattr_desc, (VOPFUNC)hfs_vnop_readdirattr }, /* readdirattr */
+    { &vnop_readlink_desc, (VOPFUNC)hfs_vnop_readlink },               /* readlink */
+    { &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive },               /* inactive */
+    { &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim },         /* reclaim */
+    { &vnop_strategy_desc, (VOPFUNC)hfs_vnop_strategy },               /* strategy */
+    { &vnop_pathconf_desc, (VOPFUNC)hfs_vnop_pathconf },               /* pathconf */
+    { &vnop_advlock_desc, (VOPFUNC)err_advlock },              /* advlock */
+    { &vnop_allocate_desc, (VOPFUNC)hfs_vnop_allocate },               /* allocate */
+    { &vnop_searchfs_desc, (VOPFUNC)hfs_vnop_search },         /* search fs */
+    { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite },           /* bwrite */
+    { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein },           /* pagein */
+    { &vnop_pageout_desc,(VOPFUNC) hfs_vnop_pageout },         /* pageout */
+    { &vnop_copyfile_desc, (VOPFUNC)err_copyfile },            /* copyfile */
+    { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff },               /* blktooff */
+    { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk },               /* offtoblk */
+    { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap },                       /* blockmap */
+    { &vnop_kqfilt_add_desc, (VOPFUNC)hfs_vnop_kqfiltadd },            /* kqfilt_add */
+    { &vnop_kqfilt_remove_desc, (VOPFUNC)hfs_vnop_kqfiltremove },              /* kqfilt_remove */
+    { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr},
+    { &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr},
+    { &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr},
+    { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr},
+    { &vnop_whiteout_desc, (VOPFUNC)hfs_vnop_whiteout},
+#if NAMEDSTREAMS
+    { &vnop_getnamedstream_desc, (VOPFUNC)hfs_vnop_getnamedstream },
+    { &vnop_makenamedstream_desc, (VOPFUNC)hfs_vnop_makenamedstream },
+    { &vnop_removenamedstream_desc, (VOPFUNC)hfs_vnop_removenamedstream },
+#endif
      { NULL, (VOPFUNC)NULL }
  };
  
@@ -3250,56 +4332,40 @@ struct vnodeopv_desc hfs_vnodeop_opv_desc =
  
  int (**hfs_specop_p)(void *);
  struct vnodeopv_entry_desc hfs_specop_entries[] = {
-       { &vop_default_desc, (VOPFUNC)vn_default_error },
-       { &vop_lookup_desc, (VOPFUNC)spec_lookup },             /* lookup */
-       { &vop_create_desc, (VOPFUNC)spec_create },             /* create */
-       { &vop_mknod_desc, (VOPFUNC)spec_mknod },               /* mknod */
-       { &vop_open_desc, (VOPFUNC)spec_open },                 /* open */
-       { &vop_close_desc, (VOPFUNC)hfsspec_close },            /* close */
-       { &vop_access_desc, (VOPFUNC)hfs_access },              /* access */
-       { &vop_getattr_desc, (VOPFUNC)hfs_getattr },            /* getattr */
-       { &vop_setattr_desc, (VOPFUNC)hfs_setattr },            /* setattr */
-       { &vop_read_desc, (VOPFUNC)hfsspec_read },              /* read */
-       { &vop_write_desc, (VOPFUNC)hfsspec_write },            /* write */
-       { &vop_lease_desc, (VOPFUNC)spec_lease_check },         /* lease */
-       { &vop_ioctl_desc, (VOPFUNC)spec_ioctl },               /* ioctl */
-       { &vop_select_desc, (VOPFUNC)spec_select },             /* select */
-       { &vop_revoke_desc, (VOPFUNC)spec_revoke },             /* revoke */
-       { &vop_mmap_desc, (VOPFUNC)spec_mmap },                 /* mmap */
-       { &vop_fsync_desc, (VOPFUNC)hfs_fsync },                /* fsync */
-       { &vop_seek_desc, (VOPFUNC)spec_seek },                 /* seek */
-       { &vop_remove_desc, (VOPFUNC)spec_remove },             /* remove */
-       { &vop_link_desc, (VOPFUNC)spec_link },                 /* link */
-       { &vop_rename_desc, (VOPFUNC)spec_rename },             /* rename */
-       { &vop_mkdir_desc, (VOPFUNC)spec_mkdir },               /* mkdir */
-       { &vop_rmdir_desc, (VOPFUNC)spec_rmdir },               /* rmdir */
-       { &vop_symlink_desc, (VOPFUNC)spec_symlink },           /* symlink */
-       { &vop_readdir_desc, (VOPFUNC)spec_readdir },           /* readdir */
-       { &vop_readlink_desc, (VOPFUNC)spec_readlink },         /* readlink */
-       { &vop_abortop_desc, (VOPFUNC)spec_abortop },           /* abortop */
-       { &vop_inactive_desc, (VOPFUNC)hfs_inactive },          /* inactive */
-       { &vop_reclaim_desc, (VOPFUNC)hfs_reclaim },            /* reclaim */
-       { &vop_lock_desc, (VOPFUNC)hfs_lock },                  /* lock */
-       { &vop_unlock_desc, (VOPFUNC)hfs_unlock },              /* unlock */
-       { &vop_bmap_desc, (VOPFUNC)spec_bmap },                 /* bmap */
-       { &vop_strategy_desc, (VOPFUNC)spec_strategy },         /* strategy */
-       { &vop_print_desc, (VOPFUNC)hfs_print },                /* print */
-       { &vop_islocked_desc, (VOPFUNC)hfs_islocked },          /* islocked */
-       { &vop_pathconf_desc, (VOPFUNC)spec_pathconf },         /* pathconf */
-       { &vop_advlock_desc, (VOPFUNC)spec_advlock },           /* advlock */
-       { &vop_blkatoff_desc, (VOPFUNC)spec_blkatoff },         /* blkatoff */
-       { &vop_valloc_desc, (VOPFUNC)spec_valloc },             /* valloc */
-       { &vop_reallocblks_desc, (VOPFUNC)spec_reallocblks },   /* reallocblks */
-       { &vop_vfree_desc, (VOPFUNC)err_vfree },                /* vfree */
-       { &vop_truncate_desc, (VOPFUNC)spec_truncate },         /* truncate */
-       { &vop_update_desc, (VOPFUNC)hfs_update },              /* update */
-       { &vop_bwrite_desc, (VOPFUNC)hfs_bwrite },
-       { &vop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */
-       { &vop_pagein_desc, (VOPFUNC)hfs_pagein },              /* Pagein */
-       { &vop_pageout_desc, (VOPFUNC)hfs_pageout },            /* Pageout */
-        { &vop_copyfile_desc, (VOPFUNC)err_copyfile },         /* copyfile */
-       { &vop_blktooff_desc, (VOPFUNC)hfs_blktooff },          /* blktooff */
-       { &vop_offtoblk_desc, (VOPFUNC)hfs_offtoblk },          /* offtoblk */
+       { &vnop_default_desc, (VOPFUNC)vn_default_error },
+       { &vnop_lookup_desc, (VOPFUNC)spec_lookup },            /* lookup */
+       { &vnop_create_desc, (VOPFUNC)spec_create },            /* create */
+       { &vnop_mknod_desc, (VOPFUNC)spec_mknod },              /* mknod */
+       { &vnop_open_desc, (VOPFUNC)spec_open },                        /* open */
+       { &vnop_close_desc, (VOPFUNC)hfsspec_close },           /* close */
+       { &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr },      /* getattr */
+       { &vnop_setattr_desc, (VOPFUNC)hfs_vnop_setattr },      /* setattr */
+       { &vnop_read_desc, (VOPFUNC)hfsspec_read },             /* read */
+       { &vnop_write_desc, (VOPFUNC)hfsspec_write },           /* write */
+       { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl },              /* ioctl */
+       { &vnop_select_desc, (VOPFUNC)spec_select },            /* select */
+       { &vnop_revoke_desc, (VOPFUNC)spec_revoke },            /* revoke */
+       { &vnop_mmap_desc, (VOPFUNC)spec_mmap },                        /* mmap */
+       { &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync },          /* fsync */
+       { &vnop_remove_desc, (VOPFUNC)spec_remove },            /* remove */
+       { &vnop_link_desc, (VOPFUNC)spec_link },                        /* link */
+       { &vnop_rename_desc, (VOPFUNC)spec_rename },            /* rename */
+       { &vnop_mkdir_desc, (VOPFUNC)spec_mkdir },              /* mkdir */
+       { &vnop_rmdir_desc, (VOPFUNC)spec_rmdir },              /* rmdir */
+       { &vnop_symlink_desc, (VOPFUNC)spec_symlink },          /* symlink */
+       { &vnop_readdir_desc, (VOPFUNC)spec_readdir },          /* readdir */
+       { &vnop_readlink_desc, (VOPFUNC)spec_readlink },                /* readlink */
+       { &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive },    /* inactive */
+       { &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim },      /* reclaim */
+       { &vnop_strategy_desc, (VOPFUNC)spec_strategy },                /* strategy */
+       { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf },                /* pathconf */
+       { &vnop_advlock_desc, (VOPFUNC)err_advlock },           /* advlock */
+       { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite },
+       { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein },                /* Pagein */
+       { &vnop_pageout_desc, (VOPFUNC)hfs_vnop_pageout },      /* Pageout */
+        { &vnop_copyfile_desc, (VOPFUNC)err_copyfile },                /* copyfile */
+       { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff },    /* blktooff */
+       { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk },    /* offtoblk */
         { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL }
  };
  struct vnodeopv_desc hfs_specop_opv_desc =
@@ -3308,56 +4374,43 @@ struct vnodeopv_desc hfs_specop_opv_desc =
  #if FIFO
  int (**hfs_fifoop_p)(void *);
  struct vnodeopv_entry_desc hfs_fifoop_entries[] = {
-       { &vop_default_desc, (VOPFUNC)vn_default_error },
-       { &vop_lookup_desc, (VOPFUNC)fifo_lookup },             /* lookup */
-       { &vop_create_desc, (VOPFUNC)fifo_create },             /* create */
-       { &vop_mknod_desc, (VOPFUNC)fifo_mknod },               /* mknod */
-       { &vop_open_desc, (VOPFUNC)fifo_open },                 /* open */
-       { &vop_close_desc, (VOPFUNC)hfsfifo_close },            /* close */
-       { &vop_access_desc, (VOPFUNC)hfs_access },              /* access */
-       { &vop_getattr_desc, (VOPFUNC)hfs_getattr },            /* getattr */
-       { &vop_setattr_desc, (VOPFUNC)hfs_setattr },            /* setattr */
-       { &vop_read_desc, (VOPFUNC)hfsfifo_read },              /* read */
-       { &vop_write_desc, (VOPFUNC)hfsfifo_write },            /* write */
-       { &vop_lease_desc, (VOPFUNC)fifo_lease_check },         /* lease */
-       { &vop_ioctl_desc, (VOPFUNC)fifo_ioctl },               /* ioctl */
-       { &vop_select_desc, (VOPFUNC)fifo_select },             /* select */
-       { &vop_revoke_desc, (VOPFUNC)fifo_revoke },             /* revoke */
-       { &vop_mmap_desc, (VOPFUNC)fifo_mmap },                 /* mmap */
-       { &vop_fsync_desc, (VOPFUNC)hfs_fsync },                /* fsync */
-       { &vop_seek_desc, (VOPFUNC)fifo_seek },                 /* seek */
-       { &vop_remove_desc, (VOPFUNC)fifo_remove },             /* remove */
-       { &vop_link_desc, (VOPFUNC)fifo_link },                 /* link */
-       { &vop_rename_desc, (VOPFUNC)fifo_rename },             /* rename */
-       { &vop_mkdir_desc, (VOPFUNC)fifo_mkdir },               /* mkdir */
-       { &vop_rmdir_desc, (VOPFUNC)fifo_rmdir },               /* rmdir */
-       { &vop_symlink_desc, (VOPFUNC)fifo_symlink },           /* symlink */
-       { &vop_readdir_desc, (VOPFUNC)fifo_readdir },           /* readdir */
-       { &vop_readlink_desc, (VOPFUNC)fifo_readlink },         /* readlink */
-       { &vop_abortop_desc, (VOPFUNC)fifo_abortop },           /* abortop */
-       { &vop_inactive_desc, (VOPFUNC)hfs_inactive },          /* inactive */
-       { &vop_reclaim_desc, (VOPFUNC)hfs_reclaim },            /* reclaim */
-       { &vop_lock_desc, (VOPFUNC)hfs_lock },                  /* lock */
-       { &vop_unlock_desc, (VOPFUNC)hfs_unlock },              /* unlock */
-       { &vop_bmap_desc, (VOPFUNC)fifo_bmap },                 /* bmap */
-       { &vop_strategy_desc, (VOPFUNC)fifo_strategy },         /* strategy */
-       { &vop_print_desc, (VOPFUNC)hfs_print },                /* print */
-       { &vop_islocked_desc, (VOPFUNC)hfs_islocked },          /* islocked */
-       { &vop_pathconf_desc, (VOPFUNC)fifo_pathconf },         /* pathconf */
-       { &vop_advlock_desc, (VOPFUNC)fifo_advlock },           /* advlock */
-       { &vop_blkatoff_desc, (VOPFUNC)fifo_blkatoff },         /* blkatoff */
-       { &vop_valloc_desc, (VOPFUNC)fifo_valloc },             /* valloc */
-       { &vop_reallocblks_desc, (VOPFUNC)fifo_reallocblks },   /* reallocblks */
-       { &vop_vfree_desc, (VOPFUNC)err_vfree },                /* vfree */
-       { &vop_truncate_desc, (VOPFUNC)fifo_truncate },         /* truncate */
-       { &vop_update_desc, (VOPFUNC)hfs_update },              /* update */
-       { &vop_bwrite_desc, (VOPFUNC)hfs_bwrite },
-       { &vop_pagein_desc, (VOPFUNC)hfs_pagein },              /* Pagein */
-       { &vop_pageout_desc, (VOPFUNC)hfs_pageout },            /* Pageout */
-    { &vop_copyfile_desc, (VOPFUNC)err_copyfile },             /* copyfile */
-       { &vop_blktooff_desc, (VOPFUNC)hfs_blktooff },          /* blktooff */
-       { &vop_offtoblk_desc, (VOPFUNC)hfs_offtoblk },          /* offtoblk */
-       { &vop_cmap_desc, (VOPFUNC)hfs_cmap },                  /* cmap */
+       { &vnop_default_desc, (VOPFUNC)vn_default_error },
+       { &vnop_lookup_desc, (VOPFUNC)fifo_lookup },            /* lookup */
+       { &vnop_create_desc, (VOPFUNC)fifo_create },            /* create */
+       { &vnop_mknod_desc, (VOPFUNC)fifo_mknod },              /* mknod */
+       { &vnop_open_desc, (VOPFUNC)fifo_open },                        /* open */
+       { &vnop_close_desc, (VOPFUNC)hfsfifo_close },           /* close */
+       { &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr },      /* getattr */
+       { &vnop_setattr_desc, (VOPFUNC)hfs_vnop_setattr },      /* setattr */
+       { &vnop_read_desc, (VOPFUNC)hfsfifo_read },             /* read */
+       { &vnop_write_desc, (VOPFUNC)hfsfifo_write },           /* write */
+       { &vnop_ioctl_desc, (VOPFUNC)fifo_ioctl },              /* ioctl */
+       { &vnop_select_desc, (VOPFUNC)fifo_select },            /* select */
+       { &vnop_revoke_desc, (VOPFUNC)fifo_revoke },            /* revoke */
+       { &vnop_mmap_desc, (VOPFUNC)fifo_mmap },                        /* mmap */
+       { &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync },          /* fsync */
+       { &vnop_remove_desc, (VOPFUNC)fifo_remove },            /* remove */
+       { &vnop_link_desc, (VOPFUNC)fifo_link },                        /* link */
+       { &vnop_rename_desc, (VOPFUNC)fifo_rename },            /* rename */
+       { &vnop_mkdir_desc, (VOPFUNC)fifo_mkdir },              /* mkdir */
+       { &vnop_rmdir_desc, (VOPFUNC)fifo_rmdir },              /* rmdir */
+       { &vnop_symlink_desc, (VOPFUNC)fifo_symlink },          /* symlink */
+       { &vnop_readdir_desc, (VOPFUNC)fifo_readdir },          /* readdir */
+       { &vnop_readlink_desc, (VOPFUNC)fifo_readlink },                /* readlink */
+       { &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive },    /* inactive */
+       { &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim },      /* reclaim */
+       { &vnop_strategy_desc, (VOPFUNC)fifo_strategy },                /* strategy */
+       { &vnop_pathconf_desc, (VOPFUNC)fifo_pathconf },                /* pathconf */
+       { &vnop_advlock_desc, (VOPFUNC)err_advlock },           /* advlock */
+       { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite },
+       { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein },                /* Pagein */
+       { &vnop_pageout_desc, (VOPFUNC)hfs_vnop_pageout },      /* Pageout */
+       { &vnop_copyfile_desc, (VOPFUNC)err_copyfile },                 /* copyfile */
+       { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff },    /* blktooff */
+       { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk },    /* offtoblk */
+       { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap },            /* blockmap */
+       { &vnop_kqfilt_add_desc, (VOPFUNC)hfsfifo_kqfilt_add },  /* kqfilt_add */
+       { &vnop_kqfilt_remove_desc, (VOPFUNC)hfsfifo_kqfilt_remove },  /* kqfilt_remove */
         { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL }
  };
  struct vnodeopv_desc hfs_fifoop_opv_desc =