xnu-2050.48.11.tar.gz

[apple/xnu.git] / bsd / hfs / hfs_cnode.c
diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c

index b0ba0f1aefdb1e3032fa6efde9cd9510d2f56b45..65f2825d058fda86300b9b80c076fbf42b26ab0f 100644 (file)
--- a/bsd/hfs/hfs_cnode.c
+++ b/bsd/hfs/hfs_cnode.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2002-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2002-2013 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -36,6 +36,8 @@
  #include <sys/ubc.h>
  #include <sys/quota.h>
  #include <sys/kdebug.h>
+#include <libkern/OSByteOrder.h>
+#include <sys/buf_internal.h>
  
  #include <kern/locks.h>
  
@@ -46,6 +48,7 @@
  #include <hfs/hfs_catalog.h>
  #include <hfs/hfs_cnode.h>
  #include <hfs/hfs_quota.h>
+#include <hfs/hfs_format.h>
  
  extern int prtactive;
  
@@ -53,276 +56,697 @@ extern lck_attr_t *  hfs_lock_attr;
  extern lck_grp_t *  hfs_mutex_group;
  extern lck_grp_t *  hfs_rwlock_group;
  
-static int  hfs_filedone(struct vnode *vp, vfs_context_t context);
-
  static void  hfs_reclaim_cnode(struct cnode *);
-
+static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
  static int hfs_isordered(struct cnode *, struct cnode *);
  
+extern int hfs_removefile_callback(struct buf *bp, void *hfsmp);
+
+__inline__ int hfs_checkdeleted (struct cnode *cp) {
+       return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);  
+}
  
  /*
- * Last reference to an cnode.  If necessary, write or delete it.
+ * Function used by a special fcntl() that decorates a cnode/vnode that
+ * indicates it is backing another filesystem, like a disk image.
+ *
+ * the argument 'val' indicates whether or not to set the bit in the cnode flags
+ * 
+ * Returns non-zero on failure. 0 on success 
   */
-__private_extern__
-int
-hfs_vnop_inactive(struct vnop_inactive_args *ap)
-{
-       struct vnode *vp = ap->a_vp;
-       struct cnode *cp;
-       struct hfsmount *hfsmp = VTOHFS(vp);
-       struct proc *p = vfs_context_proc(ap->a_context);
-       int error = 0;
-       int recycle = 0;
-       int forkcount = 0;
-       int truncated = 0;
-       int started_tr = 0;
-       int took_trunc_lock = 0;
-       cat_cookie_t cookie;
-       int cat_reserve = 0;
-       int lockflags;
-       enum vtype v_type;
-
-       v_type = vnode_vtype(vp);
+int hfs_set_backingstore (struct vnode *vp, int val) {
+       struct cnode *cp = NULL;
+       int err = 0;
+       
         cp = VTOC(vp);
+       if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
+               return EINVAL;
+       }
  
-       if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
-           (hfsmp->hfs_freezing_proc == p)) {
-               return (0);
+       /* lock the cnode */
+       err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK);
+       if (err) {
+               return err;
+       }
+       
+       if (val) {
+               cp->c_flag |= C_BACKINGSTORE;
+       }
+       else {
+               cp->c_flag &= ~C_BACKINGSTORE;
         }
  
-       /*
-        * Ignore nodes related to stale file handles.
-        */
-       if (cp->c_mode == 0) {
-               vnode_recycle(vp);
-               return (0);
+       /* unlock everything */
+       hfs_unlock (cp);
+
+       return err;
+}
+
+/*
+ * Function used by a special fcntl() that check to see if a cnode/vnode
+ * indicates it is backing another filesystem, like a disk image.
+ *
+ * the argument 'val' is an output argument for whether or not the bit is set
+ * 
+ * Returns non-zero on failure. 0 on success 
+ */
+
+int hfs_is_backingstore (struct vnode *vp, int *val) {
+       struct cnode *cp = NULL;
+       int err = 0;
+
+       if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
+               *val = 0;
+               return 0;
         }
  
-       if ((v_type == VREG || v_type == VLNK)) {
-               hfs_lock_truncate(cp, TRUE);
-               took_trunc_lock = 1;
+       cp = VTOC(vp);
+
+       /* lock the cnode */
+       err = hfs_lock (cp, HFS_SHARED_LOCK);
+       if (err) {
+               return err;
         }
  
-       (void) hfs_lock(cp, HFS_FORCE_LOCK);
+       if (cp->c_flag & C_BACKINGSTORE) {
+               *val = 1;
+       }       
+       else {
+               *val = 0;
+       }
+
+       /* unlock everything */
+       hfs_unlock (cp);
  
+       return err;
+}
+
+
+/*
+ * hfs_cnode_teardown
+ *
+ * This is an internal function that is invoked from both hfs_vnop_inactive
+ * and hfs_vnop_reclaim.  As VNOP_INACTIVE is not necessarily called from vnodes
+ * being recycled and reclaimed, it is important that we do any post-processing
+ * necessary for the cnode in both places.  Important tasks include things such as
+ * releasing the blocks from an open-unlinked file when all references to it have dropped,
+ * and handling resource forks separately from data forks.
+ *
+ * Note that we take only the vnode as an argument here (rather than the cnode).
+ * Recall that each cnode supports two forks (rsrc/data), and we can always get the right
+ * cnode from either of the vnodes, but the reverse is not true -- we can't determine which
+ * vnode we need to reclaim if only the cnode is supplied. 
+ *
+ * This function is idempotent and safe to call from both hfs_vnop_inactive and hfs_vnop_reclaim
+ * if both are invoked right after the other.  In the second call, most of this function's if()
+ * conditions will fail, since they apply generally to cnodes still marked with C_DELETED.  
+ * As a quick check to see if this function is necessary, determine if the cnode is already
+ * marked C_NOEXISTS.  If it is, then it is safe to skip this function.  The only tasks that 
+ * remain for cnodes marked in such a fashion is to teardown their fork references and 
+ * release all directory hints and hardlink origins.  However, both of those are done 
+ * in hfs_vnop_reclaim.  hfs_update, by definition, is not necessary if the cnode's catalog
+ * entry is no longer there.  
+ *
+ * 'reclaim' argument specifies whether or not we were called from hfs_vnop_reclaim.  If we are
+ * invoked from hfs_vnop_reclaim, we can not call functions that cluster_push since the UBC info 
+ * is totally gone by that point.
+ *
+ * Assumes that both truncate and cnode locks for 'cp' are held.
+ */
+static 
+int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
+       
+       int forkcount = 0;
+       enum vtype v_type;
+       struct cnode *cp;
+       int error = 0;
+       int started_tr = 0;
+       struct hfsmount *hfsmp = VTOHFS(vp);
+       struct proc *p = vfs_context_proc(ctx);
+       int truncated = 0;
+    cat_cookie_t cookie;
+    int cat_reserve = 0;
+    int lockflags;
+       int ea_error = 0;
+       
+       v_type = vnode_vtype(vp);
+       cp = VTOC(vp);
+       
+       if (cp->c_datafork) {
+               ++forkcount;
+       }
+       if (cp->c_rsrcfork) {
+               ++forkcount;
+       }
+       
+       
         /*
-        * We should lock cnode before checking the flags in the 
-        * condition below and should unlock the cnode before calling 
-        * ubc_setsize() as cluster code can call other HFS vnops which
-        * will try to acquire the same cnode lock and cause deadlock.
+        * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim.
+        * The dirty regions would have already been synced to disk, so informing UBC
+        * that they can toss the pages doesn't help anyone at this point. 
+        * 
+        * Note that this is a performance problem if the vnode goes straight to reclaim
+        * (and skips inactive), since there would be no way for anyone to notify the UBC
+        * that all pages in this file are basically useless.
+        */     
+       if (reclaim == 0) {
+               /*
+                * Check whether we are tearing down a cnode with only one remaining fork.
+                * If there are blocks in its filefork, then we need to unlock the cnode
+                * before calling ubc_setsize.  The cluster layer may re-enter the filesystem
+                * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock
+                * panic.  
+                */
+               
+               if ((v_type == VREG || v_type == VLNK) &&
+                       (cp->c_flag & C_DELETED) &&
+                       (VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) {
+                       hfs_unlock(cp); 
+                       /* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */
+                       ubc_setsize(vp, 0);
+                       (void) hfs_lock(cp, HFS_FORCE_LOCK);
+               }       
+       }
+       
+       /* 
+        * Push file data out for normal files that haven't been evicted from 
+        * the namespace.  We only do this if this function was not called from reclaim,
+        * because by that point the UBC information has been totally torn down.  
+        * 
+        * There should also be no way that a normal file that has NOT been deleted from 
+        * the namespace to skip INACTIVE and go straight to RECLAIM.  That race only happens
+        * when the file becomes open-unlinked. 
          */
-       if ((v_type == VREG || v_type == VLNK) &&
-           (cp->c_flag & C_DELETED) &&
-           (VTOF(vp)->ff_blocks != 0)) {
-               hfs_unlock(cp); 
-               ubc_setsize(vp, 0);
-               (void) hfs_lock(cp, HFS_FORCE_LOCK);
+       if ((v_type == VREG) && 
+               (!ISSET(cp->c_flag, C_DELETED)) && 
+               (!ISSET(cp->c_flag, C_NOEXISTS)) &&
+               (VTOF(vp)->ff_blocks) &&
+               (reclaim == 0)) {
+               /* 
+                * Note that if content protection is enabled, then this is where we will
+                * attempt to issue IOs for all dirty regions of this file.  
+                *
+                * If we're called from hfs_vnop_inactive, all this means is at the time 
+                * the logic for deciding to call this function, there were not any lingering
+                * mmap/fd references for this file.  However, there is nothing preventing the system
+                * from creating a new reference in between the time that logic was checked
+                * and we entered hfs_vnop_inactive.  As a result, the only time we can guarantee
+                * that there aren't any references is during vnop_reclaim.
+                */
+               hfs_filedone(vp, ctx);
         }
  
-       if (v_type == VREG && !ISSET(cp->c_flag, C_DELETED) && VTOF(vp)->ff_blocks) {
-               hfs_filedone(vp, ap->a_context);
+       /* 
+        * We're holding the cnode lock now.  Stall behind any shadow BPs that may
+        * be involved with this vnode if it is a symlink.  We don't want to allow 
+        * the blocks that we're about to release to be put back into the pool if there
+        * is pending I/O to them.
+        */
+       if (v_type == VLNK) {   
+               /* 
+                * This will block if the asynchronous journal flush is in progress.
+                * If this symlink is not being renamed over and doesn't have any open FDs,
+                * then we'll remove it from the journal's bufs below in kill_block.
+                */
+               buf_wait_for_shadow_io (vp, 0);
         }
+
         /* 
          * Remove any directory hints or cached origins
          */
         if (v_type == VDIR) {
                 hfs_reldirhints(cp, 0);
         }
-       
         if (cp->c_flag & C_HARDLINK) {
                 hfs_relorigins(cp);
         }
+       
+       /*
+        * This check is slightly complicated.  We should only truncate data 
+        * in very specific cases for open-unlinked files.  This is because
+        * we want to ensure that the resource fork continues to be available
+        * if the caller has the data fork open.  However, this is not symmetric; 
+        * someone who has the resource fork open need not be able to access the data
+        * fork once the data fork has gone inactive.
+        * 
+        * If we're the last fork, then we have cleaning up to do.
+        * 
+        * A) last fork, and vp == c_vp
+        *      Truncate away own fork data. If rsrc fork is not in core, truncate it too.
+        *
+        * B) last fork, and vp == c_rsrc_vp
+        *      Truncate ourselves, assume data fork has been cleaned due to C).
+        *
+        * If we're not the last fork, then things are a little different:
+        *
+        * C) not the last fork, vp == c_vp
+        *      Truncate ourselves.  Once the file has gone out of the namespace,
+        *      it cannot be further opened.  Further access to the rsrc fork may 
+        *      continue, however.
+        *
+        * D) not the last fork, vp == c_rsrc_vp
+        *      Don't enter the block below, just clean up vnode and push it out of core.
+        */
+       
+       if ((v_type == VREG || v_type == VLNK) && 
+               (cp->c_flag & C_DELETED) &&
+               ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
  
-       if (cp->c_datafork)
-               ++forkcount;
-       if (cp->c_rsrcfork)
-               ++forkcount;
-
-       /* If needed, get rid of any fork's data for a deleted file */
-       if ((v_type == VREG || v_type == VLNK) && (cp->c_flag & C_DELETED)) {
+               /* Truncate away our own fork data. (Case A, B, C above) */
                 if (VTOF(vp)->ff_blocks != 0) {
+
+                       /* 
+                        * SYMLINKS only:
+                        *
+                        * Encapsulate the entire change (including truncating the link) in 
+                        * nested transactions if we are modifying a symlink, because we know that its
+                        * file length will be at most 4k, and we can fit both the truncation and 
+                        * any relevant bitmap changes into a single journal transaction.  We also want
+                        * the kill_block code to execute in the same transaction so that any dirty symlink
+                        * blocks will not be written. Otherwise, rely on
+                        * hfs_truncate doing its own transactions to ensure that we don't blow up
+                        * the journal.
+                        */ 
+                       if ((started_tr == 0) && (v_type == VLNK)) {
+                               if (hfs_start_transaction(hfsmp) != 0) {
+                                       error = EINVAL;
+                                       goto out;
+                               }
+                               else {
+                                       started_tr = 1;
+                               }
+                       }
+
+                       /*
+                        * At this point, we have decided that this cnode is
+                        * suitable for full removal.  We are about to deallocate
+                        * its blocks and remove its entry from the catalog. 
+                        * If it was a symlink, then it's possible that the operation
+                        * which created it is still in the current transaction group
+                        * due to coalescing.  Take action here to kill the data blocks
+                        * of the symlink out of the journal before moving to 
+                        * deallocate the blocks.  We need to be in the middle of
+                        * a transaction before calling buf_iterate like this.
+                        * 
+                        * Note: we have to kill any potential symlink buffers out of 
+                        * the journal prior to deallocating their blocks.  This is so 
+                        * that we don't race with another thread that may be doing an 
+                        * an allocation concurrently and pick up these blocks. It could
+                        * generate I/O against them which could go out ahead of our journal
+                        * transaction.
+                        */
+
+                       if (hfsmp->jnl && vnode_islnk(vp)) {
+                               buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
+                       }
+       
                         /*
-                        * Since we're already inside a transaction,
-                        * tell hfs_truncate to skip the ubc_setsize.
+                        * This truncate call (and the one below) is fine from VNOP_RECLAIM's 
+                        * context because we're only removing blocks, not zero-filling new 
+                        * ones.  The C_DELETED check above makes things much simpler. 
                          */
-                       error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ap->a_context);
-                       if (error)
+                       error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, 0, ctx);
+                       if (error) {
                                 goto out;
+                       }
                         truncated = 1;
-               }
-               recycle = 1;
  
-               /*
-                * Check if there's any resource fork blocks that need to
-                * be reclaimed.  This covers the case where there is a
-                * resource fork but its not in core.
+                       /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
+                       if (started_tr) {
+                               hfs_end_transaction(hfsmp);
+                               started_tr = 0;
+                       }
+               }
+               
+               /* 
+                * Truncate away the resource fork, if we represent the data fork and
+                * it is the last fork.  That means, by definition, the rsrc fork is not in 
+                * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
+                * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
+                * to get rid of the resource fork's data. Note that because we are holding the 
+                * cnode lock, it is impossible for a competing thread to create the resource fork
+                * vnode from underneath us while we do this.
+                * 
+                * This is invoked via case A above only.
                  */
                 if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
-                       struct vnode *rvp = NULLVP;
+                       struct cat_lookup_buffer *lookup_rsrc = NULL;
+                       struct cat_desc *desc_ptr = NULL;
+                       lockflags = 0;
+
+                       MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK);
+                       if (lookup_rsrc == NULL) {
+                               printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
+                               error = ENOMEM;
+                               goto out;
+                       }
+                       else {
+                               bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer));
+                       }
  
-                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE);
-                       if (error)
+                       if (cp->c_desc.cd_namelen == 0) {
+                               /* Initialize the rsrc descriptor for lookup if necessary*/
+                               MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
+                               
+                               lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
+                               lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
+                               lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+                               lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;  
+                               
+                               desc_ptr = &lookup_rsrc->lookup_desc;
+                       }
+                       else {
+                               desc_ptr = &cp->c_desc; 
+                       }
+
+                       lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+                       error = cat_lookup (hfsmp, desc_ptr, 1, (struct cat_desc *) NULL, 
+                                       (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
+
+                       hfs_systemfile_unlock (hfsmp, lockflags);
+                       
+                       if (error) {
+                               FREE (lookup_rsrc, M_TEMP);
                                 goto out;
+                       }
+
                         /*
-                        * Defer the vnode_put and ubc_setsize on rvp until hfs_unlock().
+                        * Make the filefork in our temporary struct look like a real 
+                        * filefork.  Fill in the cp, sysfileinfo and rangelist fields..
+                        */
+                       rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
+                       lookup_rsrc->lookup_fork.ff_cp = cp;
+
+                       /* 
+                        * If there were no errors, then we have the catalog's fork information 
+                        * for the resource fork in question.  Go ahead and delete the data in it now.
                          */
-                       cp->c_flag |= C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE;
-                       error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, ap->a_context);
-                       if (error)
+
+                       error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
+                       FREE(lookup_rsrc, M_TEMP);
+
+                       if (error) {
                                 goto out;
-                       vnode_recycle(rvp);  /* all done with this vnode */
-               }
-       }
+                       }
  
-       // If needed, get rid of any xattrs that this file may have.
-       // Note that this must happen outside of any other transactions
-       // because it starts/ends its own transactions and grabs its
-       // own locks.  This is to prevent a file with a lot of attributes
-       // from creating a transaction that is too large (which panics).
-       //
-       if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 && (cp->c_flag & C_DELETED)) {
-               hfs_removeallattr(hfsmp, cp->c_fileid);
+                       /*
+                        * This fileid's resource fork extents have now been fully deleted on-disk
+                        * and this CNID is no longer valid. At this point, we should be able to
+                        * zero out cp->c_blocks to indicate there is no data left in this file.
+                        */
+                       cp->c_blocks = 0;
+               }
         }
-
+       
         /*
-        * Check for a postponed deletion.
-        * (only delete cnode when the last fork goes inactive)
+        * If we represent the last fork (or none in the case of a dir), 
+        * and the cnode has become open-unlinked,
+        * AND it has EA's, then we need to get rid of them.
+        *
+        * Note that this must happen outside of any other transactions
+        * because it starts/ends its own transactions and grabs its
+        * own locks.  This is to prevent a file with a lot of attributes
+        * from creating a transaction that is too large (which panics).
          */
-       if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) {                     
-               /*
-                * Mark cnode in transit so that no one can get this 
-                * cnode from cnode hash.
-                */
-               // hfs_chash_mark_in_transit(cp);
-               // XXXdbg - remove the cnode from the hash table since it's deleted
-               //          otherwise someone could go to sleep on the cnode and not
-               //          be woken up until this vnode gets recycled which could be
-               //          a very long time...
-               hfs_chashremove(cp);
-
-               cp->c_flag |= C_NOEXISTS;   // XXXdbg
-               cp->c_rdev = 0;
-
-               if (started_tr == 0) {
-                   if (hfs_start_transaction(hfsmp) != 0) {
-                       error = EINVAL;
-                       goto out;
-                   }
-                   started_tr = 1;
-               }
+    if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 &&
+               (cp->c_flag & C_DELETED) && 
+               (forkcount <= 1)) {
+               
+        ea_error = hfs_removeallattr(hfsmp, cp->c_fileid);
+    }
+       
+       
+       /*
+        * If the cnode represented an open-unlinked file, then now
+        * actually remove the cnode's catalog entry and release all blocks
+        * it may have been using.  
+        */
+    if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) {
+        /*
+         * Mark cnode in transit so that no one can get this 
+         * cnode from cnode hash.
+         */
+               // hfs_chash_mark_in_transit(hfsmp, cp);
+               // XXXdbg - remove the cnode from the hash table since it's deleted
+               //          otherwise someone could go to sleep on the cnode and not
+               //          be woken up until this vnode gets recycled which could be
+               //          a very long time...
+        hfs_chashremove(hfsmp, cp);
+               
+        cp->c_flag |= C_NOEXISTS;   // XXXdbg
+        cp->c_rdev = 0;
+               
+        if (started_tr == 0) {
+            if (hfs_start_transaction(hfsmp) != 0) {
+                               error = EINVAL;
+                               goto out;
+            }
+            started_tr = 1;
+        }
+               
+        /*
+         * Reserve some space in the Catalog file.
+         */
+        if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
+            goto out;
+        }
+        cat_reserve = 1;
+               
+        lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+               
+        if (cp->c_blocks > 0) {
+            printf("hfs_inactive: deleting non-empty%sfile %d, "
+                   "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
+                   (int)cp->c_fileid, (int)cp->c_blocks);
+        }
                 
-               /*
-                * Reserve some space in the Catalog file.
-                */
-               if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
-                       goto out;
-               }
-               cat_reserve = 1;
-
-               lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
-
-               if (cp->c_blocks > 0) {
-                       printf("hfs_inactive: deleting non-empty%sfile %d, "
-                              "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
-                              (int)cp->c_fileid, (int)cp->c_blocks);
-               }
-
-               //
-               // release the name pointer in the descriptor so that
-               // cat_delete() will use the file-id to do the deletion.
-               // in the case of hard links this is imperative (in the
-               // case of regular files the fileid and cnid are the
-               // same so it doesn't matter).
                 //
-               cat_releasedesc(&cp->c_desc);
+        // release the name pointer in the descriptor so that
+        // cat_delete() will use the file-id to do the deletion.
+        // in the case of hard links this is imperative (in the
+        // case of regular files the fileid and cnid are the
+        // same so it doesn't matter).
+        //
+        cat_releasedesc(&cp->c_desc);
                 
-               /*
-                * The descriptor name may be zero,
-                * in which case the fileid is used.
-                */
-               error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
+        /*
+         * The descriptor name may be zero,
+         * in which case the fileid is used.
+         */
+        error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
                 
-               if (error && truncated && (error != ENXIO))
-                       printf("hfs_inactive: couldn't delete a truncated file!");
-
-               /* Update HFS Private Data dir */
-               if (error == 0) {
-                       hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
-                       if (vnode_isdir(vp)) {
-                               DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
-                       }
-                       (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
-                               &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
-               }
-
-               hfs_systemfile_unlock(hfsmp, lockflags);
-
-               if (error)
+        if (error && truncated && (error != ENXIO))
+            printf("hfs_inactive: couldn't delete a truncated file!");
+               
+        /* Update HFS Private Data dir */
+        if (error == 0) {
+            hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
+            if (vnode_isdir(vp)) {
+                DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+            }
+            (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
+                                                        &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
+        }
+               
+        hfs_systemfile_unlock(hfsmp, lockflags);
+               
+        if (error) {                   
                         goto out;
-
+               }
+               
  #if QUOTA
-               if (hfsmp->hfs_flags & HFS_QUOTAS)
-                       (void)hfs_chkiq(cp, -1, NOCRED, 0);
+        if (hfsmp->hfs_flags & HFS_QUOTAS)
+            (void)hfs_chkiq(cp, -1, NOCRED, 0);
  #endif /* QUOTA */
-
-               cp->c_mode = 0;
-               cp->c_flag &= ~C_DELETED;
-               cp->c_touch_chgtime = TRUE;
-               cp->c_touch_modtime = TRUE;
-
-               if (error == 0)
-                       hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
-       }
-
+               
+        /* Already set C_NOEXISTS at the beginning of this block */
+        cp->c_flag &= ~C_DELETED;
+        cp->c_touch_chgtime = TRUE;
+        cp->c_touch_modtime = TRUE;
+               
+        if (error == 0)
+            hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
+    }
+       
         /*
-        * A file may have had delayed allocations, in which case hfs_update
-        * would not have updated the catalog record (cat_update).  We need
-        * to do that now, before we lose our fork data.  We also need to
-        * force the update, or hfs_update will again skip the cat_update.
+     * A file may have had delayed allocations, in which case hfs_update
+     * would not have updated the catalog record (cat_update).  We need
+     * to do that now, before we lose our fork data.  We also need to
+     * force the update, or hfs_update will again skip the cat_update.
+        *
+        * If the file has C_NOEXISTS set, then we can skip the hfs_update call
+        * because the catalog entry has already been removed.  There would be no point
+     * to looking up the entry in the catalog to modify it when we already know it's gone
          */
-       if ((cp->c_flag & C_MODIFIED) ||
-           cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
-               if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
-                       cp->c_flag |= C_FORCEUPDATE;
+    if ((!ISSET(cp->c_flag, C_NOEXISTS)) &&
+               ((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime || 
+                cp->c_touch_chgtime || cp->c_touch_modtime)) {
+                       
+                       if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
+                               cp->c_flag |= C_FORCEUPDATE;
+                       }
+                       hfs_update(vp, 0);
                 }
-               hfs_update(vp, 0);
-       }
+       
  out:
-       if (cat_reserve)
-               cat_postflight(hfsmp, &cookie, p);
+    if (cat_reserve)
+        cat_postflight(hfsmp, &cookie, p);
+       
+    // XXXdbg - have to do this because a goto could have come here
+    if (started_tr) {
+        hfs_end_transaction(hfsmp);
+        started_tr = 0;
+    }
  
-       // XXXdbg - have to do this because a goto could have come here
-       if (started_tr) {
-           hfs_end_transaction(hfsmp);
-           started_tr = 0;
+#if 0
+#if CONFIG_PROTECT
+       /* 
+        * cnode truncate lock and cnode lock are both held exclusive here. 
+        *
+        * Go ahead and flush the keys out if this cnode is the last fork
+        * and it is not class F.  Class F keys should not be purged because they only
+        * exist in memory and have no persistent keys.  Only do this 
+        * if we haven't already done it yet (maybe a vnode skipped inactive 
+        * and went straight to reclaim).  This function gets called from both reclaim and
+        * inactive, so it will happen first in inactive if possible.
+        * 
+        * We need to be mindful that all pending IO for this file has already been
+        * issued and completed before we bzero out the key.  This is because
+        * if it isn't, tossing the key here could result in garbage IO being
+        * written (by using the bzero'd key) if the writes are happening asynchronously.
+        * 
+        * In addition, class A files may have already been purged due to the 
+        * lock event occurring.
+        */
+       if (forkcount == 1) {
+               struct cprotect *entry = cp->c_cpentry;
+               if ((entry) && (entry->cp_pclass != PROTECTION_CLASS_F)) {
+                       if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) {
+                               cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED;
+                               bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len);
+                               bzero (cp->c_cpentry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx));
+                       }
+               }
         }
+#endif
+#endif
+       
+       return error;   
+}
  
-       hfs_unlock(cp);
  
-       if (took_trunc_lock)
-           hfs_unlock_truncate(cp, TRUE);
+/*
+ * hfs_vnop_inactive
+ *
+ * The last usecount on the vnode has gone away, so we need to tear down
+ * any remaining data still residing in the cnode.  If necessary, write out
+ * remaining blocks or delete the cnode's entry in the catalog.
+ */
+int
+hfs_vnop_inactive(struct vnop_inactive_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct cnode *cp;
+       struct hfsmount *hfsmp = VTOHFS(vp);
+       struct proc *p = vfs_context_proc(ap->a_context);
+       int error = 0;
+       int took_trunc_lock = 0;
+       enum vtype v_type;
+       
+       v_type = vnode_vtype(vp);
+       cp = VTOC(vp);
  
+       if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
+           (hfsmp->hfs_freezing_proc == p)) {
+               error = 0;
+               goto inactive_done;
+       }       
+       
         /*
-        * If we are done with the vnode, reclaim it
-        * so that it can be reused immediately.
+        * For safety, do NOT call vnode_recycle from inside this function.  This can cause 
+        * problems in the following scenario:
+        * 
+        * vnode_create -> vnode_reclaim_internal -> vclean -> VNOP_INACTIVE
+        * 
+        * If we're being invoked as a result of a reclaim that was already in-flight, then we
+        * cannot call vnode_recycle again.  Being in reclaim means that there are no usecounts or
+        * iocounts by definition.  As a result, if we were to call vnode_recycle, it would immediately
+        * try to re-enter reclaim again and panic.  
+        *
+        * Currently, there are three things that can cause us (VNOP_INACTIVE) to get called.
+        * 1) last usecount goes away on the vnode (vnode_rele)
+        * 2) last iocount goes away on a vnode that previously had usecounts but didn't have 
+        *              vnode_recycle called (vnode_put)
+        * 3) vclean by way of reclaim
+        *
+        * In this function we would generally want to call vnode_recycle to speed things 
+        * along to ensure that we don't leak blocks due to open-unlinked files.  However, by 
+        * virtue of being in this function already, we can call hfs_cnode_teardown, which 
+        * will release blocks held by open-unlinked files, and mark them C_NOEXISTS so that 
+        * there's no entry in the catalog and no backing store anymore.  If that's the case, 
+        * then we really don't care all that much when the vnode actually goes through reclaim.
+        * Further, the HFS VNOPs that manipulated the namespace in order to create the open-
+        * unlinked file in the first place should have already called vnode_recycle on the vnode
+        * to guarantee that it would go through reclaim in a speedy way.
          */
-       if (cp->c_mode == 0 || recycle)
-               vnode_recycle(vp);
-
-       return (error);
+       
+       if (cp->c_flag & C_NOEXISTS) {
+               /* 
+                * If the cnode has already had its cat entry removed, then 
+                * just skip to the end. We don't need to do anything here.
+                */
+               error = 0;
+               goto inactive_done;
+       }
+       
+       if ((v_type == VREG || v_type == VLNK)) {
+               hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
+               took_trunc_lock = 1;
+       }
+       
+       (void) hfs_lock(cp, HFS_FORCE_LOCK);
+       
+       /* 
+        * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked
+        * files' blocks from being in use, and move the cnode from C_DELETED to C_NOEXISTS.
+        */
+       error = hfs_cnode_teardown (vp, ap->a_context, 0);
+
+    /*
+     * Drop the truncate lock before unlocking the cnode
+     * (which can potentially perform a vnode_put and
+     * recycle the vnode which in turn might require the
+     * truncate lock)
+     */
+       if (took_trunc_lock) {
+           hfs_unlock_truncate(cp, 0);
+       }
+       
+       hfs_unlock(cp);
+       
+inactive_done: 
+       
+       return error;
  }
  
+
  /*
   * File clean-up (zero fill and shrink peof).
   */
-static int
+
+int
  hfs_filedone(struct vnode *vp, vfs_context_t context)
  {
         struct cnode *cp;
         struct filefork *fp;
         struct hfsmount *hfsmp;
+       struct rl_entry *invalid_range;
         off_t leof;
-       u_long blks, blocksize;
+       u_int32_t blks, blocksize;
+       /* flags for zero-filling sparse ranges */
+       int cluster_flags = IO_CLOSE;
+       int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE;
  
         cp = VTOC(vp);
         fp = VTOF(vp);
@@ -332,16 +756,44 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
         if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
                 return (0);
  
+#if CONFIG_PROTECT
+       /* 
+        * Figure out if we need to do synchronous IO. 
+        * 
+        * If the file represents a content-protected file, we may need
+        * to issue synchronous IO when we dispatch to the cluster layer.
+        * If we didn't, then the IO would go out to the disk asynchronously.
+        * If the vnode hits the end of inactive before getting reclaimed, the
+        * content protection keys would be wiped/bzeroed out, and we'd end up
+        * trying to issue the IO with an invalid key.  This will lead to file 
+        * corruption.  IO_SYNC will force the cluster_push to wait until all IOs
+        * have completed (though they may be in the track cache).
+        */
+       if (cp_fs_protected(VTOVFS(vp))) {
+               cluster_flags |= IO_SYNC;
+               cluster_zero_flags |= IO_SYNC;
+       }
+#endif
+
+       /* 
+        * If we are being invoked from F_SWAPDATAEXTENTS, then we 
+        * need to issue synchronous IO; Unless we are sure that all 
+        * of the data has been written to the disk, we won't know 
+        * that all of the blocks have been allocated properly.
+        */
+       if (cp->c_flag & C_SWAPINPROGRESS) {
+               cluster_flags |= IO_SYNC;
+       }
+
         hfs_unlock(cp);
-       (void) cluster_push(vp, IO_CLOSE);
+       (void) cluster_push(vp, cluster_flags);
         hfs_lock(cp, HFS_FORCE_LOCK);
  
         /*
          * Explicitly zero out the areas of file
          * that are currently marked invalid.
          */
-       while (!CIRCLEQ_EMPTY(&fp->ff_invalidranges)) {
-               struct rl_entry *invalid_range = CIRCLEQ_FIRST(&fp->ff_invalidranges);
+       while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) {
                 off_t start = invalid_range->rl_start;
                 off_t end = invalid_range->rl_end;
         
@@ -353,8 +805,7 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
  
                 hfs_unlock(cp);
                 (void) cluster_write(vp, (struct uio *) 0,
-                                    leof, end + 1, start, (off_t)0,
-                                    IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
+                                    leof, end + 1, start, (off_t)0, cluster_zero_flags);
                 hfs_lock(cp, HFS_FORCE_LOCK);
                 cp->c_flag |= C_MODIFIED;
         }
@@ -367,10 +818,12 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
         /*
          * Shrink the peof to the smallest size neccessary to contain the leof.
          */
-       if (blks < fp->ff_blocks)
-               (void) hfs_truncate(vp, leof, IO_NDELAY, 0, context);
+       if (blks < fp->ff_blocks) {
+               (void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context);
+       }
+
         hfs_unlock(cp);
-       (void) cluster_push(vp, IO_CLOSE);
+       (void) cluster_push(vp, cluster_flags);
         hfs_lock(cp, HFS_FORCE_LOCK);
         
         /*
@@ -388,7 +841,6 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
  /*
   * Reclaim a cnode so that it can be used for other purposes.
   */
-__private_extern__
  int
  hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
  {
@@ -396,40 +848,31 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
         struct cnode *cp;
         struct filefork *fp = NULL;
         struct filefork *altfp = NULL;
+       struct hfsmount *hfsmp = VTOHFS(vp);
+       vfs_context_t ctx = ap->a_context;
         int reclaim_cnode = 0;
-
-       (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+       int err = 0;
+       enum vtype v_type;
+       
+       v_type = vnode_vtype(vp);
         cp = VTOC(vp);
-
-       /*
-        * Check if a deleted resource fork vnode missed a
-        * VNOP_INACTIVE call and requires truncation.
+       
+       /* 
+        * We don't take the truncate lock since by the time reclaim comes along,
+        * all dirty pages have been synced and nobody should be competing
+        * with us for this thread.
          */
-       if (VNODE_IS_RSRC(vp) &&
-           (cp->c_flag & C_DELETED) &&
-           (VTOF(vp)->ff_blocks != 0)) {
-               hfs_unlock(cp);
-               ubc_setsize(vp, 0);
-
-               hfs_lock_truncate(cp, TRUE);
-               (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
-
-               (void) hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ap->a_context);
+       (void) hfs_lock (cp, HFS_FORCE_LOCK);
  
-               hfs_unlock_truncate(cp, TRUE);
-       }
-       /*
-        * A file may have had delayed allocations, in which case hfs_update
-        * would not have updated the catalog record (cat_update).  We need
-        * to do that now, before we lose our fork data.  We also need to
-        * force the update, or hfs_update will again skip the cat_update.
+       /* 
+        * Sync to disk any remaining data in the cnode/vnode.  This includes
+        * a call to hfs_update if the cnode has outbound data.
+        * 
+        * If C_NOEXISTS is set on the cnode, then there's nothing teardown needs to do
+        * because the catalog entry for this cnode is already gone.
          */
-       if ((cp->c_flag & C_MODIFIED) ||
-               cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
-               if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
-                       cp->c_flag |= C_FORCEUPDATE;
-               }       
-               hfs_update(vp, 0);
+       if (!ISSET(cp->c_flag, C_NOEXISTS)) {
+               err = hfs_cnode_teardown(vp, ctx, 1);
         }
  
         /*
@@ -459,14 +902,14 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
                 cp->c_rsrcfork = NULL;
                 cp->c_rsrc_vp = NULL;
         } else {
-               panic("hfs_vnop_reclaim: vp points to wrong cnode\n");
+               panic("hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp);
         }
         /*
          * On the last fork, remove the cnode from its hash chain.
          */
         if (altfp == NULL) {
                 /* If we can't remove it then the cnode must persist! */
-               if (hfs_chashremove(cp) == 0)
+               if (hfs_chashremove(hfsmp, cp) == 0)
                         reclaim_cnode = 1;
                 /* 
                  * Remove any directory hints
@@ -474,11 +917,10 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
                 if (vnode_isdir(vp)) {
                         hfs_reldirhints(cp, 0);
                 }
-       
-               if (cp->c_flag & C_HARDLINK) {
+               
+               if(cp->c_flag & C_HARDLINK) {
                         hfs_relorigins(cp);
                 }
-
         }
         /* Release the file fork and related data */
         if (fp) {
@@ -493,9 +935,14 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
          * If there was only one active fork then we can release the cnode.
          */
         if (reclaim_cnode) {
-               hfs_chashwakeup(cp, H_ALLOC | H_TRANSIT);
+               hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
                 hfs_reclaim_cnode(cp);
-       } else /* cnode in use */ {
+       } 
+       else  {
+               /* 
+                * cnode in use.  If it is a directory, it could have 
+                * no live forks. Just release the lock.
+                */
                 hfs_unlock(cp);
         }
  
@@ -505,6 +952,7 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
  
  
  extern int (**hfs_vnodeop_p) (void *);
+extern int (**hfs_std_vnodeop_p) (void *);
  extern int (**hfs_specop_p)  (void *);
  #if FIFO
  extern int (**hfs_fifoop_p)  (void *);
@@ -515,7 +963,6 @@ extern int (**hfs_fifoop_p)  (void *);
   *
   * The vnode is returned with an iocount and the cnode locked
   */
-__private_extern__
  int
  hfs_getnewvnode(
         struct hfsmount *hfsmp,
@@ -525,7 +972,8 @@ hfs_getnewvnode(
         int flags,
         struct cat_attr *attrp,
         struct cat_fork *forkp,
-       struct vnode **vpp)
+       struct vnode **vpp,
+       int *out_flags)
  {
         struct mount *mp = HFSTOVFS(hfsmp);
         struct vnode *vp = NULL;
@@ -533,14 +981,18 @@ hfs_getnewvnode(
         struct vnode *tvp = NULLVP;
         struct cnode *cp = NULL;
         struct filefork *fp = NULL;
+       int hfs_standard = 0;
         int retval;
         int issystemfile;
         int wantrsrc;
+       int hflags = 0;
         struct vnode_fsparam vfsp;
         enum vtype vtype;
  #if QUOTA
         int i;
  #endif /* QUOTA */
+       
+       hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD);
  
         if (attrp->ca_fileid == 0) {
                 *vpp = NULL;
@@ -557,6 +1009,16 @@ hfs_getnewvnode(
         issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG);
         wantrsrc = flags & GNV_WANTRSRC;
  
+       /* Sanity check the vtype and mode */
+       if (vtype == VBAD) {
+               /* Mark the FS as corrupt and bail out */
+               hfs_mark_volume_inconsistent(hfsmp);
+               return (EINVAL);
+       }
+
+       /* Zero out the out_flags */
+       *out_flags = 0;
+
  #ifdef HFS_CHECK_LOCK_ORDER
         /*
          * The only case were its permissible to hold the parent cnode
@@ -573,7 +1035,8 @@ hfs_getnewvnode(
         /*
          * Get a cnode (new or existing)
          */
-       cp = hfs_chash_getcnode(hfsmp->hfs_raw_dev, attrp->ca_fileid, vpp, wantrsrc, (flags & GNV_SKIPLOCK));
+       cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc, 
+                                                       (flags & GNV_SKIPLOCK), out_flags, &hflags);
  
         /*
          * If the id is no longer valid for lookups we'll get back a NULL cp.
@@ -582,27 +1045,123 @@ hfs_getnewvnode(
                 return (ENOENT);
         }
  
-       /* Hardlinks may need an updated catalog descriptor */
-       if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
-               replace_desc(cp, descp);
+       /* 
+        * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the 
+        * descriptor in the cnode as needed if the cnode represents a hardlink.  
+        * We want the caller to get the most up-to-date copy of the descriptor
+        * as possible. However, we only do anything here if there was a valid vnode.
+        * If there isn't a vnode, then the cnode is brand new and needs to be initialized
+        * as it doesn't have a descriptor or cat_attr yet.
+        * 
+        * If we are about to replace the descriptor with the user-supplied one, then validate
+        * that the descriptor correctly acknowledges this item is a hardlink.  We could be
+        * subject to a race where the calling thread invoked cat_lookup, got a valid lookup 
+        * result but the file was not yet a hardlink. With sufficient delay between there
+        * and here, we might accidentally copy in the raw inode ID into the descriptor in the
+        * call below.  If the descriptor's CNID is the same as the fileID then it must
+        * not yet have been a hardlink when the lookup occurred.
+        */
+       
+       if (!(hfs_checkdeleted(cp))) {
+               if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
+                       /* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */
+                       if ((descp->cd_cnid == cp->c_attr.ca_fileid)  &&
+                                       (attrp->ca_linkcount != cp->c_attr.ca_linkcount)){
+                               if ((flags & GNV_SKIPLOCK) == 0) {
+                                       /* 
+                                        * Then we took the lock. Drop it before calling
+                                        * vnode_put, which may invoke hfs_vnop_inactive and need to take 
+                                        * the cnode lock again.
+                                        */
+                                       hfs_unlock(cp);
+                               }
+                               
+                               /* 
+                                * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to 
+                                * force a re-drive in the lookup routine.  
+                                * Drop the iocount on the vnode obtained from 
+                                * chash_getcnode if needed.
+                                */     
+                               if (*vpp != NULL) {
+                                       vnode_put (*vpp);
+                                       *vpp = NULL;
+                               }
+                               
+                               /*
+                                * If we raced with VNOP_RECLAIM for this vnode, the hash code could
+                                * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
+                                * the hash code peeks at those fields without holding the cnode lock because
+                                * it needs to be fast.  As a result, we may have set H_ATTACH in the chash
+                                * call above.  Since we're bailing out, unset whatever flags we just set, and
+                                * wake up all waiters for this cnode.
+                                */
+                               if (hflags) {
+                                       hfs_chashwakeup(hfsmp, cp, hflags);
+                               }
+                               
+                               *out_flags = GNV_CAT_ATTRCHANGED;
+                               return ERECYCLE;        
+                       }
+                       else {
+                               /* 
+                                * Otherwise, CNID != fileid. Go ahead and copy in the new descriptor. 
+                                *
+                                * Replacing the descriptor here is fine because we looked up the item without
+                                * a vnode in hand before.  If a vnode existed, its identity must be attached to this
+                                * item.  We are not susceptible to the lookup fastpath issue at this point.
+                                */
+                               replace_desc(cp, descp);
+                       }
+               }
         }
+       
         /* Check if we found a matching vnode */
-       if (*vpp != NULL)
+       if (*vpp != NULL) {
                 return (0);
+       }
  
         /*
          * If this is a new cnode then initialize it.
          */
         if (ISSET(cp->c_hflag, H_ALLOC)) {
                 lck_rw_init(&cp->c_truncatelock, hfs_rwlock_group, hfs_lock_attr);
+#if HFS_COMPRESSION
+               cp->c_decmp = NULL;
+#endif
  
                 /* Make sure its still valid (ie exists on disk). */
-               if (!(flags & GNV_CREATE) &&
-                   !hfs_valid_cnode(hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid)) {
-                       hfs_chash_abort(cp);
-                       hfs_reclaim_cnode(cp);
-                       *vpp = NULL;
-                       return (ENOENT);
+               if (!(flags & GNV_CREATE)) {
+                       int error = 0;
+                       if (!hfs_valid_cnode (hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid, attrp, &error)) {
+                               hfs_chash_abort(hfsmp, cp);
+                               hfs_reclaim_cnode(cp);
+                               *vpp = NULL;
+                               /* 
+                                * If we hit this case, that means that the entry was there in the catalog when
+                                * we did a cat_lookup earlier.  Think hfs_lookup.  However, in between the time
+                                * that we checked the catalog and the time we went to get a vnode/cnode for it,
+                                * it had been removed from the namespace and the vnode totally reclaimed.  As a result,
+                                * it's not there in the catalog during the check in hfs_valid_cnode and we bubble out
+                                * an ENOENT.  To indicate to the caller that they should really double-check the
+                                * entry (it could have been renamed over and gotten a new fileid), we mark a bit
+                                * in the output flags.
+                                */
+                               if (error == ENOENT) {
+                                       *out_flags = GNV_CAT_DELETED;
+                                       return ENOENT;  
+                               }
+
+                               /*
+                                * Also, we need to protect the cat_attr acquired during hfs_lookup and passed into
+                                * this function as an argument because the catalog may have changed w.r.t hardlink
+                                * link counts and the firstlink field.  If that validation check fails, then let 
+                                * lookup re-drive itself to get valid/consistent data with the same failure condition below.
+                                */
+                               if (error == ERECYCLE) {
+                                       *out_flags = GNV_CAT_ATTRCHANGED;
+                                       return (ERECYCLE);
+                               }
+                       }
                 }
                 bcopy(attrp, &cp->c_attr, sizeof(struct cat_attr));
                 bcopy(descp, &cp->c_desc, sizeof(struct cat_desc));
@@ -652,6 +1211,8 @@ hfs_getnewvnode(
                                 cp->c_dquot[i] = NODQUOT;
                 }
  #endif /* QUOTA */
+               /* Mark the output flag that we're vending a new cnode */
+               *out_flags |= GNV_NEW_CNODE;
         }
  
         if (vtype == VDIR) {
@@ -730,16 +1291,24 @@ hfs_getnewvnode(
                 vfsp.vnfs_cnp = cnp;
         }
         vfsp.vnfs_fsnode = cp;
+
+       /*
+        * Special Case HFS Standard VNOPs from HFS+, since
+        * HFS standard is readonly/deprecated as of 10.6 
+        */
+
  #if FIFO
-       if (vtype == VFIFO )
+       if (vtype == VFIFO ) 
                 vfsp.vnfs_vops = hfs_fifoop_p;
         else
  #endif
         if (vtype == VBLK || vtype == VCHR)
                 vfsp.vnfs_vops = hfs_specop_p;
-       else
+       else if (hfs_standard)
+               vfsp.vnfs_vops = hfs_std_vnodeop_p;
+       else 
                 vfsp.vnfs_vops = hfs_vnodeop_p;
-               
+
         if (vtype == VBLK || vtype == VCHR)
                 vfsp.vnfs_rdev = attrp->ca_rdev;
         else
@@ -751,7 +1320,7 @@ hfs_getnewvnode(
                 vfsp.vnfs_filesize = 0;
  
         vfsp.vnfs_flags = VNFS_ADDFSREF;
-       if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY))
+       if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE))
                 vfsp.vnfs_flags |= VNFS_NOCACHE;
  
         /* Tag system files */
@@ -777,11 +1346,11 @@ hfs_getnewvnode(
                  * occurred during the attachment, then cleanup the cnode.
                  */
                 if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) {
-                       hfs_chash_abort(cp);
+                       hfs_chash_abort(hfsmp, cp);
                         hfs_reclaim_cnode(cp);
                 } 
                 else {
-                       hfs_chashwakeup(cp, H_ALLOC | H_ATTACH);
+                       hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
                         if ((flags & GNV_SKIPLOCK) == 0){
                                 hfs_unlock(cp);
                         }
@@ -800,11 +1369,16 @@ hfs_getnewvnode(
          * have the chance to process the resource fork.
          */
         if (VNODE_IS_RSRC(vp)) {
+               int err;
+               KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 37)), cp->c_vp, cp->c_rsrc_vp, 0, 0, 0);
+
                 /* Force VL_NEEDINACTIVE on this vnode */
-               vnode_ref(vp);
-               vnode_rele(vp);
+               err = vnode_ref(vp);
+               if (err == 0) {
+                       vnode_rele(vp);
+               }
         }
-       hfs_chashwakeup(cp, H_ALLOC | H_ATTACH);
+       hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
  
         /*
          * Stop tracking an active hot file.
@@ -812,6 +1386,13 @@ hfs_getnewvnode(
         if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile) {
                 (void) hfs_removehotfile(vp);
         }
+       
+#if CONFIG_PROTECT
+       /* Initialize the cp data structures. The key should be in place now. */
+       if (!issystemfile && (*out_flags & GNV_NEW_CNODE)) {
+               cp_entry_init(cp, mp);
+       }
+#endif
  
         *vpp = vp;
         return (0);
@@ -844,17 +1425,45 @@ hfs_reclaim_cnode(struct cnode *cp)
                 cp->c_desc.cd_namelen = 0;
                 vfs_removename(nameptr);
         }
-
+       
+       /*
+        * We only call this function if we are in hfs_vnop_reclaim and 
+        * attempting to reclaim a cnode with only one live fork.  Because the vnode
+        * went through reclaim, any future attempts to use this item will have to
+        * go through lookup again, which will need to create a new vnode.  Thus,
+        * destroying the locks below (while they were still held during our parent 
+        * function hfs_vnop_reclaim) is safe.
+        */     
+       
         lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group);
         lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group);
+#if HFS_COMPRESSION
+       if (cp->c_decmp) {
+               decmpfs_cnode_destroy(cp->c_decmp);
+               FREE_ZONE(cp->c_decmp, sizeof(*(cp->c_decmp)), M_DECMPFS_CNODE);
+       }
+#endif
+#if CONFIG_PROTECT
+       cp_entry_destroy(&cp->c_cpentry);
+#endif
+       
+       
         bzero(cp, sizeof(struct cnode));
         FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE);
  }
  
  
-__private_extern__
+/*
+ * hfs_valid_cnode
+ *
+ * This function is used to validate data that is stored in-core against what is contained
+ * in the catalog.  Common uses include validating that the parent-child relationship still exist
+ * for a specific directory entry (guaranteeing it has not been renamed into a different spot) at
+ * the point of the check.
+ */
  int
-hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, cnid_t cnid)
+hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, 
+               cnid_t cnid, struct cat_attr *cattr, int *error)
  {
         struct cat_attr attr;
         struct cat_desc cndesc;
@@ -862,34 +1471,180 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname
         int lockflags;
  
         /* System files are always valid */
-       if (cnid < kHFSFirstUserCatalogNodeID)
+       if (cnid < kHFSFirstUserCatalogNodeID) {
+               *error = 0;
                 return (1);
+       }
  
         /* XXX optimization:  check write count in dvp */
  
         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
  
         if (dvp && cnp) {
+               int lookup = 0;
+               struct cat_fork fork;
                 bzero(&cndesc, sizeof(cndesc));
                 cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
                 cndesc.cd_namelen = cnp->cn_namelen;
                 cndesc.cd_parentcnid = VTOC(dvp)->c_fileid;
                 cndesc.cd_hint = VTOC(dvp)->c_childhint;
  
-               if ((cat_lookup(hfsmp, &cndesc, 0, NULL, &attr, NULL, NULL) == 0) &&
-                   (cnid == attr.ca_fileid)) {
+               /* 
+                * We have to be careful when calling cat_lookup.  The result argument
+                * 'attr' may get different results based on whether or not you ask
+                * for the filefork to be supplied as output.  This is because cat_lookupbykey
+                * will attempt to do basic validation/smoke tests against the resident
+                * extents if there are no overflow extent records, but it needs someplace
+                * in memory to store the on-disk fork structures.
+                *
+                * Since hfs_lookup calls cat_lookup with a filefork argument, we should
+                * do the same here, to verify that block count differences are not
+                * due to calling the function with different styles.  cat_lookupbykey
+                * will request the volume be fsck'd if there is true on-disk corruption
+                * where the number of blocks does not match the number generated by 
+                * summing the number of blocks in the resident extents.
+                */
+               
+               lookup = cat_lookup (hfsmp, &cndesc, 0, NULL, &attr, &fork, NULL);
+
+               if ((lookup == 0) && (cnid == attr.ca_fileid)) {
                         stillvalid = 1;
+                       *error = 0;
+               }
+               else {
+                       *error = ENOENT;
+               }
+       
+               /*
+                * In hfs_getnewvnode, we may encounter a time-of-check vs. time-of-vnode creation 
+                * race.  Specifically, if there is no vnode/cnode pair for the directory entry 
+                * being looked up, we have to go to the catalog.  But since we don't hold any locks (aside
+                * from the dvp in 'shared' mode) there is nothing to protect us against the catalog record
+                * changing in between the time we do the cat_lookup there and the time we re-grab the 
+                * catalog lock above to do another cat_lookup. 
+                * 
+                * However, we need to check more than just the CNID and parent-child name relationships above.  
+                * Hardlinks can suffer the same race in the following scenario:  Suppose we do a 
+                * cat_lookup, and find a leaf record and a raw inode for a hardlink.  Now, we have 
+                * the cat_attr in hand (passed in above).  But in between then and now, the vnode was 
+                * created by a competing hfs_getnewvnode call, and is manipulated and reclaimed before we get 
+                * a chance to do anything.  This is possible if there are a lot of threads thrashing around
+                * with the cnode hash.  In this case, if we don't check/validate the cat_attr in-hand, we will
+                * blindly stuff it into the cnode, which will make the in-core data inconsistent with what is 
+                * on disk.  So validate the cat_attr below, if required.  This race cannot happen if the cnode/vnode
+                * already exists, as it does in the case of rename and delete. 
+                */ 
+               if (stillvalid && cattr != NULL) {
+                       if (cattr->ca_linkcount != attr.ca_linkcount) {
+                               stillvalid = 0;
+                               *error = ERECYCLE;
+                               goto notvalid;
+                       }
+                       
+                       if (cattr->ca_union1.cau_linkref != attr.ca_union1.cau_linkref) {
+                               stillvalid = 0;
+                               *error = ERECYCLE;
+                               goto notvalid;
+                       }
+
+                       if (cattr->ca_union3.cau_firstlink != attr.ca_union3.cau_firstlink) {
+                               stillvalid = 0;
+                               *error = ERECYCLE;
+                               goto notvalid;
+                       }
+
+                       if (cattr->ca_union2.cau_blocks != attr.ca_union2.cau_blocks) {
+                               stillvalid = 0;
+                               *error = ERECYCLE;
+                               goto notvalid;
+                       }
                 }
         } else {
-               if (cat_idlookup(hfsmp, cnid, 0, NULL, NULL, NULL) == 0) {
+               if (cat_idlookup(hfsmp, cnid, 0, 0, NULL, NULL, NULL) == 0) {
                         stillvalid = 1;
+                       *error = 0;
+               }
+               else {
+                       *error = ENOENT;
                 }
         }
+notvalid:
         hfs_systemfile_unlock(hfsmp, lockflags);
  
         return (stillvalid);
  }
  
+
+/*
+ * Per HI and Finder requirements, HFS should add in the
+ * date/time that a particular directory entry was added 
+ * to the containing directory. 
+ * This is stored in the extended Finder Info for the 
+ * item in question.
+ *
+ * Note that this field is also set explicitly in the hfs_vnop_setxattr code.
+ * We must ignore user attempts to set this part of the finderinfo, and
+ * so we need to save a local copy of the date added, write in the user 
+ * finderinfo, then stuff the value back in.  
+ */
+void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
+       u_int8_t *finfo = NULL;
+
+       /* overlay the FinderInfo to the correct pointer, and advance */
+       finfo = (u_int8_t*)attrp->ca_finderinfo;
+       finfo = finfo + 16;
+
+       /* 
+        * Make sure to write it out as big endian, since that's how
+        * finder info is defined.  
+        * 
+        * NOTE: This is a Unix-epoch timestamp, not a HFS/Traditional Mac timestamp.
+        */
+       if (S_ISREG(attrp->ca_mode)) {
+               struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+               extinfo->date_added = OSSwapHostToBigInt32(dateadded);
+               attrp->ca_recflags |= kHFSHasDateAddedMask; 
+       }
+       else if (S_ISDIR(attrp->ca_mode)) {
+               struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+               extinfo->date_added = OSSwapHostToBigInt32(dateadded);          
+                               attrp->ca_recflags |= kHFSHasDateAddedMask; 
+       }
+       /* If it were neither directory/file, then we'd bail out */
+       return;
+}
+
+
+u_int32_t hfs_get_dateadded (struct cnode *cp) {
+       u_int8_t *finfo = NULL;
+       u_int32_t dateadded = 0;
+
+       if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) {
+               /* Date added was never set.  Return 0. */
+               return dateadded;
+       }
+
+
+       /* overlay the FinderInfo to the correct pointer, and advance */
+       finfo = (u_int8_t*)cp->c_finderinfo;
+       finfo = finfo + 16;
+
+       /* 
+        * FinderInfo is written out in big endian... make sure to convert it to host
+        * native before we use it.
+        */
+       if (S_ISREG(cp->c_attr.ca_mode)) {
+               struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+               dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
+       }
+       else if (S_ISDIR(cp->c_attr.ca_mode)) {
+               struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+               dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
+       }
+
+       return dateadded;
+}
+
  /*
   * Touch cnode times based on c_touch_xxx flags
   *
@@ -897,34 +1652,41 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname
   *
   * This will also update the volume modify time
   */
-__private_extern__
  void
  hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
  {
+       vfs_context_t ctx;
         /* don't modify times if volume is read-only */
         if (hfsmp->hfs_flags & HFS_READ_ONLY) {
                 cp->c_touch_acctime = FALSE;
                 cp->c_touch_chgtime = FALSE;
                 cp->c_touch_modtime = FALSE;
+               return;
         }
         else if (hfsmp->hfs_flags & HFS_STANDARD) {
         /* HFS Standard doesn't support access times */
                 cp->c_touch_acctime = FALSE;
         }
  
+       ctx = vfs_context_current();
         /*
          * Skip access time updates if:
          *      . MNT_NOATIME is set
          *      . a file system freeze is in progress
          *      . a file system resize is in progress
+        *      . the vnode associated with this cnode is marked for rapid aging
          */
         if (cp->c_touch_acctime) {
                 if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) ||
                     (hfsmp->hfs_freezing_proc != NULL) ||
-                   (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS))
+                   (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) ||
+                   (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) {
+                               
                         cp->c_touch_acctime = FALSE;
+               }
         }
-       if (cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
+       if (cp->c_touch_acctime || cp->c_touch_chgtime || 
+               cp->c_touch_modtime || (cp->c_flag & C_NEEDS_DATEADDED)) {
                 struct timeval tv;
                 int touchvol = 0;
  
@@ -964,6 +1726,14 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
                         touchvol = 1;
                 }
  
+               if (cp->c_flag & C_NEEDS_DATEADDED) {
+                       hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
+                       cp->c_flag |= C_MODIFIED;
+                       /* untwiddle the bit */
+                       cp->c_flag &= ~C_NEEDS_DATEADDED;
+                       touchvol = 1;
+               }
+
                 /* Touch the volume modtime if needed */
                 if (touchvol) {
                         MarkVCBDirty(hfsmp);
@@ -975,7 +1745,6 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
  /*
   * Lock a cnode.
   */
-__private_extern__
  int
  hfs_lock(struct cnode *cp, enum hfslocktype locktype)
  {
@@ -1058,7 +1827,6 @@ hfs_lock(struct cnode *cp, enum hfslocktype locktype)
  /*
   * Lock a pair of cnodes.
   */
-__private_extern__
  int
  hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfslocktype locktype)
  {
@@ -1118,10 +1886,9 @@ hfs_isordered(struct cnode *cp1, struct cnode *cp2)
   *   - only one lock taken per cnode (dup cnodes are skipped)
   *   - some of the cnode pointers may be null
   */
-__private_extern__
  int
  hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
-             struct cnode *cp4, enum hfslocktype locktype)
+             struct cnode *cp4, enum hfslocktype locktype, struct cnode **error_cnode)
  {
         struct cnode * a[3];
         struct cnode * b[3];
@@ -1129,6 +1896,9 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
         struct cnode * tmp;
         int i, j, k;
         int error;
+       if (error_cnode) {
+               *error_cnode = NULL;
+       }
  
         if (hfs_isordered(cp1, cp2)) {
                 a[0] = cp1; a[1] = cp2;
@@ -1159,6 +1929,10 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
         for (i = 0; i < k; ++i) {
                 if (list[i])
                         if ((error = hfs_lock(list[i], locktype))) {
+                               /* Only stuff error_cnode if requested */
+                               if (error_cnode) {
+                                       *error_cnode = list[i];
+                               }
                                 /* Drop any locks we acquired. */
                                 while (--i >= 0) {
                                         if (list[i])
@@ -1174,7 +1948,6 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
  /*
   * Unlock a cnode.
   */
-__private_extern__
  void
  hfs_unlock(struct cnode *cp)
  {
@@ -1228,7 +2001,6 @@ hfs_unlock(struct cnode *cp)
  /*
   * Unlock a pair of cnodes.
   */
-__private_extern__
  void
  hfs_unlockpair(struct cnode *cp1, struct cnode *cp2)
  {
@@ -1240,7 +2012,6 @@ hfs_unlockpair(struct cnode *cp1, struct cnode *cp2)
  /*
   * Unlock a group of cnodes.
   */
-__private_extern__
  void
  hfs_unlockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode *cp4)
  {
@@ -1287,34 +2058,119 @@ skip2:
   *
   * The process doing a truncation must take the lock
   * exclusive. The read/write processes can take it
- * non-exclusive.
+ * shared.  The locktype argument is the same as supplied to
+ * hfs_lock.
   */
-__private_extern__
  void
-hfs_lock_truncate(struct cnode *cp, int exclusive)
+hfs_lock_truncate(struct cnode *cp, enum hfslocktype locktype)
  {
-#ifdef HFS_CHECK_LOCK_ORDER
-       if (cp->c_lockowner == current_thread())
-               panic("hfs_lock_truncate: cnode %p locked!", cp);
-#endif /* HFS_CHECK_LOCK_ORDER */
+       void * thread = current_thread();
  
-       if (exclusive)
-               lck_rw_lock_exclusive(&cp->c_truncatelock);
-       else
+       if (cp->c_truncatelockowner == thread) {
+               /* 
+                * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse.
+                * 
+                * This is needed on the hfs_vnop_pagein path where we need to ensure
+                * the file does not change sizes while we are paging in.  However,
+                * we may already hold the lock exclusive due to another 
+                * VNOP from earlier in the call stack.  So if we already hold 
+                * the truncate lock exclusive, allow it to proceed, but ONLY if 
+                * it's in the recursive case.
+                */
+               if (locktype != HFS_RECURSE_TRUNCLOCK) {
+                       panic("hfs_lock_truncate: cnode %p locked!", cp);
+               }
+       }
+       /* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */
+       else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) {
                 lck_rw_lock_shared(&cp->c_truncatelock);
+               cp->c_truncatelockowner = HFS_SHARED_OWNER;
+       }
+       else { /* must be an HFS_EXCLUSIVE_LOCK */
+               lck_rw_lock_exclusive(&cp->c_truncatelock);
+               cp->c_truncatelockowner = thread;
+       }
  }
  
-__private_extern__
-void
-hfs_unlock_truncate(struct cnode *cp, int exclusive)
-{
-    if (exclusive) {
-       lck_rw_unlock_exclusive(&cp->c_truncatelock);
-    } else {
-       lck_rw_unlock_shared(&cp->c_truncatelock);
-    }
+
+/*
+ * Attempt to get the truncate lock.  If it cannot be acquired, error out.
+ * This function is needed in the degenerate hfs_vnop_pagein during force unmount
+ * case.  To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will
+ * temporarily need to disable V2 semantics.  
+ */
+int hfs_try_trunclock (struct cnode *cp, enum hfslocktype locktype) {
+       void * thread = current_thread();
+       boolean_t didlock = false;
+
+       if (cp->c_truncatelockowner == thread) {
+               /* 
+                * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse.
+                * 
+                * This is needed on the hfs_vnop_pagein path where we need to ensure
+                * the file does not change sizes while we are paging in.  However,
+                * we may already hold the lock exclusive due to another 
+                * VNOP from earlier in the call stack.  So if we already hold 
+                * the truncate lock exclusive, allow it to proceed, but ONLY if 
+                * it's in the recursive case.
+                */
+               if (locktype != HFS_RECURSE_TRUNCLOCK) {
+                       panic("hfs_lock_truncate: cnode %p locked!", cp);
+               }
+       }
+       /* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */
+       else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) {
+               didlock = lck_rw_try_lock(&cp->c_truncatelock, LCK_RW_TYPE_SHARED);
+               if (didlock) {
+                       cp->c_truncatelockowner = HFS_SHARED_OWNER;
+               }
+       }
+       else { /* must be an HFS_EXCLUSIVE_LOCK */
+               didlock = lck_rw_try_lock (&cp->c_truncatelock, LCK_RW_TYPE_EXCLUSIVE);
+               if (didlock) {
+                       cp->c_truncatelockowner = thread;
+               }
+       }
+       
+       return didlock;
  }
  
  
+/*
+ * Unlock the truncate lock, which protects against size changes.
+ * 
+ * The been_recursed argument is used when we may need to return
+ * from this function without actually unlocking the truncate lock.
+ */
+void
+hfs_unlock_truncate(struct cnode *cp, int been_recursed)
+{
+       void *thread = current_thread();        
  
+       /*
+        * If been_recursed is nonzero AND the current lock owner of the
+        * truncate lock is our current thread, then we must have recursively
+        * taken the lock earlier on.  If the lock were unlocked, 
+        * HFS_RECURSE_TRUNCLOCK took a shared lock and it would fall through
+        * to the SHARED case below. 
+        *
+        * If been_recursed is zero (most of the time) then we check the 
+        * lockowner field to infer whether the lock was taken exclusively or
+        * shared in order to know what underlying lock routine to call. 
+        */
+       if (been_recursed) {
+               if (cp->c_truncatelockowner == thread) {
+                       return; 
+               }
+       }
  
+       /* HFS_LOCK_EXCLUSIVE */
+       if (thread == cp->c_truncatelockowner) {
+               cp->c_truncatelockowner = NULL;
+               lck_rw_unlock_exclusive(&cp->c_truncatelock);
+       }
+       /* HFS_LOCK_SHARED */
+       else {
+               lck_rw_unlock_shared(&cp->c_truncatelock);
+       }
+}