xnu-3247.10.11.tar.gz

[apple/xnu.git] / bsd / hfs / hfs_vnops.c
diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c

index 414d6de78975f7cc8810d272ff2e5b1ecbb6fb1d..dac4b088f814fb1ae4b13f377c2df5bfcd647994 100644 (file)
--- a/bsd/hfs/hfs_vnops.c
+++ b/bsd/hfs/hfs_vnops.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -26,6 +26,7 @@
   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   */
  
+#include <stdbool.h>
  #include <sys/systm.h>
  #include <sys/param.h>
  #include <sys/kernel.h>
@@ -47,7 +48,6 @@
  #include <sys/kauth.h>
  #include <sys/uio_internal.h>
  #include <sys/fsctl.h>
-#include <sys/cprotect.h>
  #include <sys/xattr.h>
  #include <string.h>
  #include <sys/fsevents.h>
@@ -60,6 +60,7 @@
  
  #include <sys/kdebug.h>
  #include <sys/sysctl.h>
+#include <stdbool.h>
  
  #include "hfs.h"
  #include "hfs_catalog.h"
@@ -68,6 +69,9 @@
  #include "hfs_mount.h"
  #include "hfs_quota.h"
  #include "hfs_endian.h"
+#include "hfs_kdebug.h"
+#include "hfs_cprotect.h"
+
  
  #include "hfscommon/headers/BTreesInternal.h"
  #include "hfscommon/headers/FileMgrInternal.h"
@@ -95,9 +99,19 @@ int hfs_removefile(struct vnode *, struct vnode *, struct componentname *,
  /* Used here and in cnode teardown -- for symlinks */
  int hfs_removefile_callback(struct buf *bp, void *hfsmp);
  
-int hfs_movedata (struct vnode *, struct vnode*);
-static int hfs_move_fork (struct filefork *srcfork, struct cnode *src, 
-                                                 struct filefork *dstfork, struct cnode *dst);
+enum {
+       HFS_MOVE_DATA_INCLUDE_RSRC              = 1,
+};
+typedef uint32_t hfs_move_data_options_t;
+
+static int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp, 
+                                                hfs_move_data_options_t options);
+static int hfs_move_fork(filefork_t *srcfork, cnode_t *src, 
+                                                filefork_t *dstfork, cnode_t *dst);
+
+#if HFS_COMPRESSION
+static int hfs_move_compressed(cnode_t *from_vp, cnode_t *to_vp);
+#endif
  
  decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp);
  
@@ -125,7 +139,6 @@ int hfs_vnop_symlink(struct vnop_symlink_args*);
  int hfs_vnop_setattr(struct vnop_setattr_args*);
  int hfs_vnop_readlink(struct vnop_readlink_args *);
  int hfs_vnop_pathconf(struct vnop_pathconf_args *);
-int hfs_vnop_whiteout(struct vnop_whiteout_args *);
  int hfs_vnop_mmap(struct vnop_mmap_args *ap);
  int hfsspec_read(struct vnop_read_args *);
  int hfsspec_write(struct vnop_write_args *);
@@ -169,42 +182,20 @@ hfs_is_journal_file(struct hfsmount *hfsmp, struct cnode *cp)
  int
  hfs_vnop_create(struct vnop_create_args *ap)
  {
-       int error;
-
-again:
-       error = hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context);
-
         /*
-        * We speculatively skipped the original lookup of the leaf
-        * for CREATE.  Since it exists, go get it as long as they
-        * didn't want an exclusive create.
+        * We leave handling of certain race conditions here to the caller
+        * which will have a better understanding of the semantics it
+        * requires.  For example, if it turns out that the file exists,
+        * it would be wrong of us to return a reference to the existing
+        * file because the caller might not want that and it would be
+        * misleading to suggest the file had been created when it hadn't
+        * been.  Note that our NFS server code does not set the
+        * VA_EXCLUSIVE flag so you cannot assume that callers don't want
+        * EEXIST errors if it's not set.  The common case, where users
+        * are calling open with the O_CREAT mode, is handled in VFS; when
+        * we return EEXIST, it will loop and do the look-up again.
          */
-       if ((error == EEXIST) && !(ap->a_vap->va_vaflags & VA_EXCLUSIVE)) {
-               struct vnop_lookup_args args;
-
-               args.a_desc = &vnop_lookup_desc;
-               args.a_dvp = ap->a_dvp;
-               args.a_vpp = ap->a_vpp;
-               args.a_cnp = ap->a_cnp;
-               args.a_context = ap->a_context;
-               args.a_cnp->cn_nameiop = LOOKUP;
-               error = hfs_vnop_lookup(&args);
-               /*
-                * We can also race with remove for this file.
-                */
-               if (error == ENOENT) {
-                       goto again;
-               }
-
-               /* Make sure it was file. */
-               if ((error == 0) && !vnode_isreg(*args.a_vpp)) {
-                       vnode_put(*args.a_vpp);
-                       *args.a_vpp = NULLVP;
-                       error = EEXIST;
-               }
-               args.a_cnp->cn_nameiop = CREATE;
-       }
-       return (error);
+       return hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context);
  }
  
  /*
@@ -482,7 +473,7 @@ get_uthread_doc_tombstone(void)
  // where we have the necessary info.
  // 
  static void
-clear_tombstone_docid(struct  doc_tombstone *ut, struct hfsmount *hfsmp, struct cnode *dst_cnode)
+clear_tombstone_docid(struct  doc_tombstone *ut, __unused struct hfsmount *hfsmp, struct cnode *dst_cnode)
  {
         uint32_t old_id = ut->t_lastop_document_id;
  
@@ -507,7 +498,7 @@ clear_tombstone_docid(struct  doc_tombstone *ut, struct hfsmount *hfsmp, struct
                 // printf("clearing doc-id from ino %d\n", ocp->c_desc.cd_cnid);
                 ofip->document_id = 0;
                 ocp->c_bsdflags &= ~UF_TRACKED;
-               ocp->c_flag |= C_MODIFIED | C_FORCEUPDATE;   // mark it dirty
+               ocp->c_flag |= C_MODIFIED;
                 /* cat_update(hfsmp, &ocp->c_desc, &ocp->c_attr, NULL, NULL); */
  
         }
@@ -699,6 +690,38 @@ hfs_vnop_open(struct vnop_open_args *ap)
         if (hfs_is_journal_file(hfsmp, cp))
                 return (EPERM);
  
+       bool have_lock = false;
+
+#if CONFIG_PROTECT
+       if (ISSET(ap->a_mode, FENCRYPTED) && cp->c_cpentry && vnode_isreg(vp)) {
+               bool have_trunc_lock = false;
+
+
+               if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+                       if (have_trunc_lock)
+                               hfs_unlock_truncate(cp, 0);
+                       return error;
+               }
+
+               have_lock = true;
+
+               if (cp->c_cpentry->cp_raw_open_count + 1
+                       < cp->c_cpentry->cp_raw_open_count) {
+                       // Overflow; too many raw opens on this file
+                       hfs_unlock(cp);
+                       if (have_trunc_lock)
+                               hfs_unlock_truncate(cp, 0);
+                       return ENFILE;
+               }
+
+
+               if (have_trunc_lock)
+                       hfs_unlock_truncate(cp, 0);
+
+               ++cp->c_cpentry->cp_raw_open_count;
+       }
+#endif
+
         if ((hfsmp->hfs_flags & HFS_READ_ONLY) ||
             (hfsmp->jnl == NULL) ||
  #if NAMEDSTREAMS
@@ -706,10 +729,16 @@ hfs_vnop_open(struct vnop_open_args *ap)
  #else
             !vnode_isreg(vp) || vnode_isinuse(vp, 0)) {
  #endif
+
+#if CONFIG_PROTECT
+               if (have_lock)
+                       hfs_unlock(cp);
+#endif
+
                 return (0);
         }
  
-       if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
+       if (!have_lock && (error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
                 return (error);
  
  #if QUOTA
@@ -784,6 +813,13 @@ hfs_vnop_close(ap)
         cp = VTOC(vp);
         hfsmp = VTOHFS(vp);
  
+#if CONFIG_PROTECT
+       if (cp->c_cpentry && ISSET(ap->a_fflag, FENCRYPTED) && vnode_isreg(vp)) {
+               assert(cp->c_cpentry->cp_raw_open_count > 0);
+               --cp->c_cpentry->cp_raw_open_count;
+       }
+#endif
+
         /* 
          * If the rsrc fork is a named stream, it can cause the data fork to
          * stay around, preventing de-allocation of these blocks. 
@@ -821,7 +857,8 @@ hfs_vnop_close(ap)
                                         // now we can truncate the file, if necessary
                                         blks = howmany(VTOF(vp)->ff_size, VTOVCB(vp)->blockSize);
                                         if (blks < VTOF(vp)->ff_blocks){
-                                               (void) hfs_truncate(vp, VTOF(vp)->ff_size, IO_NDELAY, 0, 0, ap->a_context);
+                                               (void) hfs_truncate(vp, VTOF(vp)->ff_size, IO_NDELAY,
+                                                                                       0, ap->a_context);
                                         }
                                 }
                         }
@@ -830,10 +867,9 @@ hfs_vnop_close(ap)
  
  
         // if we froze the fs and we're exiting, then "thaw" the fs 
-       if (hfsmp->hfs_freezing_proc == p && proc_exiting(p)) {
-           hfsmp->hfs_freezing_proc = NULL;
-           hfs_unlock_global (hfsmp);
-               lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
+       if (hfsmp->hfs_freeze_state == HFS_FROZEN
+           && hfsmp->hfs_freezing_proc == p && proc_exiting(p)) {
+               hfs_thaw(hfsmp, p);
         }
  
         busy = vnode_isinuse(vp, 1);
@@ -859,6 +895,14 @@ hfs_vnop_close(ap)
         return (0);
  }
  
+static bool hfs_should_generate_document_id(hfsmount_t *hfsmp, cnode_t *cp)
+{
+       return (!ISSET(hfsmp->hfs_flags, HFS_READ_ONLY)
+                       && ISSET(cp->c_bsdflags, UF_TRACKED)
+                       && cp->c_desc.cd_cnid != kHFSRootFolderID
+                       && (S_ISDIR(cp->c_mode) || S_ISREG(cp->c_mode) || S_ISLNK(cp->c_mode)));
+}
+
  /*
   * Get basic attributes.
   */
@@ -936,6 +980,65 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
  
         hfsmp = VTOHFS(vp);
         v_type = vnode_vtype(vp);
+
+       if (VATTR_IS_ACTIVE(vap, va_document_id)) {
+               uint32_t document_id;
+
+               if (cp->c_desc.cd_cnid == kHFSRootFolderID)
+                       document_id = kHFSRootFolderID;
+               else {
+                       /*
+                        * This is safe without a lock because we're just reading
+                        * a 32 bit aligned integer which should be atomic on all
+                        * platforms we support.
+                        */
+                       document_id = hfs_get_document_id(cp);
+
+                       if (!document_id && hfs_should_generate_document_id(hfsmp, cp)) {
+                               uint32_t new_document_id;
+
+                               error = hfs_generate_document_id(hfsmp, &new_document_id);
+                               if (error)
+                                       return error;
+
+                               error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+                               if (error)
+                                       return error;
+
+                               bool want_docid_fsevent = false;
+
+                               // Need to check again now that we have the lock
+                               document_id = hfs_get_document_id(cp);
+                               if (!document_id && hfs_should_generate_document_id(hfsmp, cp)) {
+                                       cp->c_attr.ca_finderextendeddirinfo.document_id = document_id = new_document_id;
+                                       want_docid_fsevent = true;
+                                       SET(cp->c_flag, C_MODIFIED);
+                               }
+
+                               hfs_unlock(cp);
+
+                               if (want_docid_fsevent) {
+#if CONFIG_FSE
+                                       add_fsevent(FSE_DOCID_CHANGED, ap->a_context,
+                                                               FSE_ARG_DEV,   hfsmp->hfs_raw_dev,
+                                                               FSE_ARG_INO,   (ino64_t)0,             // src inode #
+                                                               FSE_ARG_INO,   (ino64_t)cp->c_fileid,  // dst inode #
+                                                               FSE_ARG_INT32, document_id,
+                                                               FSE_ARG_DONE);
+
+                                       if (need_fsevent(FSE_STAT_CHANGED, vp)) {
+                                               add_fsevent(FSE_STAT_CHANGED, ap->a_context, 
+                                                                       FSE_ARG_VNODE, vp, FSE_ARG_DONE);
+                                       }
+#endif
+                               }
+                       }
+               }
+
+               vap->va_document_id = document_id;
+               VATTR_SET_SUPPORTED(vap, va_document_id);
+       }
+
         /*
          * If time attributes are requested and we have cnode times
          * that require updating, then acquire an exclusive lock on
@@ -947,10 +1050,13 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
                         return (error);
                 hfs_touchtimes(hfsmp, cp);
-       }
-       else {
-               if ((error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT)))
-                       return (error);
+
+               // downgrade to a shared lock since that's all we need from here on out
+               cp->c_lockowner = HFS_SHARED_OWNER;
+               lck_rw_lock_exclusive_to_shared(&cp->c_rwlock);
+
+       } else if ((error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) {
+               return (error);
         }
  
         if (v_type == VDIR) {
@@ -1065,7 +1171,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                                 
                                 if (cp->c_blocks - VTOF(vp)->ff_blocks) {
                                         /* We deal with rsrc fork vnode iocount at the end of the function */
-                                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
+                                       error = hfs_vgetrsrc(hfsmp, vp, &rvp);
                                         if (error) {
                                                 /*
                                                  * Note that we call hfs_vgetrsrc with error_on_unlinked
@@ -1111,19 +1217,9 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                         VATTR_SET_SUPPORTED(vap, va_acl);
                 }
         }
-       if (VATTR_IS_ACTIVE(vap, va_access_time)) {
-               /* Access times are lazily updated, get current time if needed */
-               if (cp->c_touch_acctime) {
-                       struct timeval tv;
-       
-                       microtime(&tv);
-                       vap->va_access_time.tv_sec = tv.tv_sec;
-               } else {
-                       vap->va_access_time.tv_sec = cp->c_atime;
-               }
-               vap->va_access_time.tv_nsec = 0;
-               VATTR_SET_SUPPORTED(vap, va_access_time);
-       }
+
+       vap->va_access_time.tv_sec = cp->c_atime;
+       vap->va_access_time.tv_nsec = 0;
         vap->va_create_time.tv_sec = cp->c_itime;
         vap->va_create_time.tv_nsec = 0;
         vap->va_modify_time.tv_sec = cp->c_mtime;
@@ -1172,7 +1268,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
          */
         if (cp->c_flag & C_HARDLINK) {
                 vap->va_linkid = (u_int64_t)hfs_currentcnid(cp);
-               vap->va_parentid = (u_int64_t)hfs_currentparent(cp);
+               vap->va_parentid = (u_int64_t)hfs_currentparent(cp, /* have_lock: */ true);
         } else {
                 vap->va_linkid = (u_int64_t)cp->c_cnid;
                 vap->va_parentid = (u_int64_t)cp->c_parentcnid;
@@ -1195,7 +1291,6 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                         }
                 } else
                         vap->va_data_size = data_size;
-//             vap->va_supported |= VNODE_ATTR_va_data_size;
                 VATTR_SET_SUPPORTED(vap, va_data_size);
         }
  #else
@@ -1203,30 +1298,32 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
         vap->va_supported |= VNODE_ATTR_va_data_size;
  #endif
  
-       if (VATTR_IS_ACTIVE(vap, va_gen)) {
-               if (UBCINFOEXISTS(vp) && (vp->v_ubcinfo->ui_flags & UI_ISMAPPED)) {
-                       /* While file is mmapped the generation count is invalid. 
-                        * However, bump the value so that the write-gen counter 
-                        * will be different once the file is unmapped (since,
-                        * when unmapped the pageouts may not yet have happened)
-                        */
-                       if (vp->v_ubcinfo->ui_flags & UI_MAPPEDWRITE) {
-                               hfs_incr_gencount (cp);
-                       }
-                       vap->va_gen = 0;
+#if CONFIG_PROTECT
+       if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) {
+               vap->va_dataprotect_class = cp->c_cpentry ? CP_CLASS(cp->c_cpentry->cp_pclass) : 0;
+               VATTR_SET_SUPPORTED(vap, va_dataprotect_class);
+       }       
+#endif
+       if (VATTR_IS_ACTIVE(vap, va_write_gencount)) {
+               if (ubc_is_mapped_writable(vp)) {
+                       /*
+                        * Return 0 to the caller to indicate the file may be
+                        * changing.  There is no need for us to increment the
+                        * generation counter here because it gets done as part of
+                        * page-out and also when the file is unmapped (to account
+                        * for changes we might not have seen).
+                        */
+                       vap->va_write_gencount = 0;
                 } else {
-                       vap->va_gen = hfs_get_gencount(cp);
+                       vap->va_write_gencount = hfs_get_gencount(cp);
                 }
-                       
-               VATTR_SET_SUPPORTED(vap, va_gen);
-       }
-       if (VATTR_IS_ACTIVE(vap, va_document_id)) {
-               vap->va_document_id = hfs_get_document_id(cp);
-               VATTR_SET_SUPPORTED(vap, va_document_id);
+
+               VATTR_SET_SUPPORTED(vap, va_write_gencount);
         }
  
         /* Mark them all at once instead of individual VATTR_SET_SUPPORTED calls. */
-       vap->va_supported |= VNODE_ATTR_va_create_time | VNODE_ATTR_va_modify_time |
+       vap->va_supported |= VNODE_ATTR_va_access_time |
+                                                VNODE_ATTR_va_create_time | VNODE_ATTR_va_modify_time |
                              VNODE_ATTR_va_change_time| VNODE_ATTR_va_backup_time |
                              VNODE_ATTR_va_iosize | VNODE_ATTR_va_uid |
                              VNODE_ATTR_va_gid | VNODE_ATTR_va_mode |
@@ -1359,8 +1456,15 @@ hfs_vnop_setattr(ap)
         }
  
  #if CONFIG_PROTECT
-       if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
-               return (error);
+       /*
+        * All metadata changes should be allowed except a size-changing setattr, which
+        * has effects on file content and requires calling into cp_handle_vnop
+        * to have content protection check.
+        */
+       if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+               if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
+                       return (error);
+               }
         }
  #endif /* CONFIG_PROTECT */
  
@@ -1428,29 +1532,19 @@ hfs_vnop_setattr(ap)
                 }
  #endif
  
-               /* Take truncate lock before taking cnode lock. */
+               // Take truncate lock
                 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
-               
-               /* Perform the ubc_setsize before taking the cnode lock. */
-               ubc_setsize(vp, vap->va_data_size);
  
-               if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
-                       hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
-#if HFS_COMPRESSION
-                       decmpfs_unlock_compressed_data(dp, 1);
-#endif
-                       return (error);
-               }
-               cp = VTOC(vp);
+               // hfs_truncate will deal with the cnode lock
+               error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 
+                                                        0, ap->a_context);
  
-               error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 1, 0, ap->a_context);
-
-               hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+               hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
  #if HFS_COMPRESSION
                 decmpfs_unlock_compressed_data(dp, 1);
  #endif
                 if (error)
-                       goto out;
+                       return error;
         }
         if (cp == NULL) {
                 if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
@@ -1560,6 +1654,7 @@ hfs_vnop_setattr(ap)
                 }
  
                 cp->c_bsdflags = vap->va_flags;
+               cp->c_flag |= C_MODIFIED;
                 cp->c_touch_chgtime = TRUE;
  
                 
@@ -1600,6 +1695,8 @@ hfs_vnop_setattr(ap)
                         cp->c_touch_modtime = FALSE;
                         cp->c_touch_chgtime = TRUE;
  
+                       hfs_clear_might_be_dirty_flag(cp);
+
                         /*
                          * The utimes system call can reset the modification
                          * time but it doesn't know about HFS create times.
@@ -1608,13 +1705,14 @@ hfs_vnop_setattr(ap)
                          */
                         if ((VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) &&
                             (cp->c_cnid != kHFSRootFolderID) &&
+                               !VATTR_IS_ACTIVE(vap, va_create_time) &&
                             (cp->c_mtime < cp->c_itime)) {
                                 cp->c_itime = cp->c_mtime;
                         }
                 }
                 if (VATTR_IS_ACTIVE(vap, va_backup_time))
                         cp->c_btime = vap->va_backup_time.tv_sec;
-               cp->c_flag |= C_MODIFIED;
+               cp->c_flag |= C_MINOR_MOD;
         }
         
         /*
@@ -1623,10 +1721,11 @@ hfs_vnop_setattr(ap)
         VATTR_SET_SUPPORTED(vap, va_encoding);
         if (VATTR_IS_ACTIVE(vap, va_encoding)) {
                 cp->c_encoding = vap->va_encoding;
+               cp->c_flag |= C_MODIFIED;
                 hfs_setencodingbits(hfsmp, cp->c_encoding);
         }
  
-       if ((error = hfs_update(vp, TRUE)) != 0)
+       if ((error = hfs_update(vp, 0)) != 0)
                 goto out;
  out:
         if (cp) {
@@ -1687,8 +1786,12 @@ hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struc
                 return (0);
         };
  #endif
-       cp->c_mode &= ~ALLPERMS;
-       cp->c_mode |= (mode & ALLPERMS);
+
+       mode_t new_mode = (cp->c_mode & ~ALLPERMS) | (mode & ALLPERMS);
+       if (new_mode != cp->c_mode) {
+               cp->c_mode = new_mode;
+               cp->c_flag |= C_MINOR_MOD;
+       }
         cp->c_touch_chgtime = TRUE;
         return (0);
  }
@@ -1787,6 +1890,13 @@ hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
  
         ogid = cp->c_gid;
         ouid = cp->c_uid;
+
+       if (ouid == uid && ogid == gid) {
+               // No change, just set change time
+               cp->c_touch_chgtime = TRUE;
+               return 0;
+       }
+
  #if QUOTA
         if ((error = hfs_getinoquota(cp)))
                 return (error);
@@ -1858,6 +1968,11 @@ good:
                 panic("hfs_chown: lost quota");
  #endif /* QUOTA */
  
+       /*
+        * Without quotas, we could probably make this a minor
+        * modification.
+        */
+       cp->c_flag |= C_MODIFIED;
  
         /*
           According to the SUSv3 Standard, chown() shall mark
@@ -1868,6 +1983,47 @@ good:
         return (0);
  }
  
+#if HFS_COMPRESSION
+/*
+ * Flush the resource fork if it exists.  vp is the data fork and has
+ * an iocount.
+ */
+static int hfs_flush_rsrc(vnode_t vp, vfs_context_t ctx)
+{
+       cnode_t *cp = VTOC(vp);
+
+       hfs_lock(cp, HFS_SHARED_LOCK, 0);
+
+       vnode_t rvp = cp->c_rsrc_vp;
+
+       if (!rvp) {
+               hfs_unlock(cp);
+               return 0;
+       }
+
+       int vid = vnode_vid(rvp);
+
+       hfs_unlock(cp);
+
+       int error = vnode_getwithvid(rvp, vid);
+
+       if (error)
+               return error == ENOENT ? 0 : error;
+
+       hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, 0);
+       hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+       hfs_filedone(rvp, ctx, HFS_FILE_DONE_NO_SYNC);
+       hfs_unlock(cp);
+       hfs_unlock_truncate(cp, 0);
+
+       error = ubc_msync(rvp, 0, ubc_getsize(rvp), NULL,
+                                         UBC_PUSHALL | UBC_SYNC);
+
+       vnode_put(rvp);
+
+       return error;
+}
+#endif // HFS_COMPRESSION
  
  /*
   * hfs_vnop_exchange:
@@ -1917,6 +2073,7 @@ hfs_vnop_exchange(ap)
         int error = 0, started_tr = 0, got_cookie = 0;
         cat_cookie_t cookie;
         time_t orig_from_ctime, orig_to_ctime;
+       bool have_cnode_locks = false, have_from_trunc_lock = false, have_to_trunc_lock = false;
  
         /*
          * VFS does the following checks:
@@ -1925,9 +2082,12 @@ hfs_vnop_exchange(ap)
          * 3. Validate that they're not the same vnode.
          */
  
-       orig_from_ctime = VTOC(from_vp)->c_ctime;
-       orig_to_ctime = VTOC(to_vp)->c_ctime;
+       from_cp = VTOC(from_vp);
+       to_cp = VTOC(to_vp);
+       hfsmp = VTOHFS(from_vp);
  
+       orig_from_ctime = from_cp->c_ctime;
+       orig_to_ctime = to_cp->c_ctime;
  
  #if CONFIG_PROTECT
         /* 
@@ -1941,19 +2101,25 @@ hfs_vnop_exchange(ap)
  #endif
  
  #if HFS_COMPRESSION
-       if ( hfs_file_is_compressed(VTOC(from_vp), 0) ) {
-               if ( 0 != ( error = decmpfs_decompress_file(from_vp, VTOCMP(from_vp), -1, 0, 1) ) ) {
-                       return error;
+       if (!ISSET(ap->a_options, FSOPT_EXCHANGE_DATA_ONLY)) {
+               if ( hfs_file_is_compressed(from_cp, 0) ) {
+                       if ( 0 != ( error = decmpfs_decompress_file(from_vp, VTOCMP(from_vp), -1, 0, 1) ) ) {
+                               return error;
+                       }
                 }
-       }
-       
-       if ( hfs_file_is_compressed(VTOC(to_vp), 0) ) {
-               if ( 0 != ( error = decmpfs_decompress_file(to_vp, VTOCMP(to_vp), -1, 0, 1) ) ) {
-                       return error;
+
+               if ( hfs_file_is_compressed(to_cp, 0) ) {
+                       if ( 0 != ( error = decmpfs_decompress_file(to_vp, VTOCMP(to_vp), -1, 0, 1) ) ) {
+                               return error;
+                       }
                 }
         }
  #endif // HFS_COMPRESSION
-       
+
+       // Resource forks cannot be exchanged.
+       if (VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp))
+               return EINVAL;
+
         /* 
          * Normally, we want to notify the user handlers about the event,
          * except if it's a handler driving the event.
@@ -1962,69 +2128,100 @@ hfs_vnop_exchange(ap)
                 check_for_tracked_file(from_vp, orig_from_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL);
                 check_for_tracked_file(to_vp, orig_to_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL);
         } else {
-               /* 
-                * We're doing a data-swap.
-                * Take the truncate lock/cnode lock, then verify there are no mmap references.
-                * Issue a hfs_filedone to flush out all of the remaining state for this file.
-                * Allow the rest of the codeflow to re-acquire the cnode locks in order.
+               /*
+                * This is currently used by mtmd so we should tidy up the
+                * file now because the data won't be used again in the
+                * destination file.
                  */
-               
-               hfs_lock_truncate (VTOC(from_vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);   
-                       
-               if ((error = hfs_lock(VTOC(from_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
-                       hfs_unlock_truncate (VTOC(from_vp), HFS_LOCK_DEFAULT);
-                       return error;
-               }
+               hfs_lock_truncate(from_cp, HFS_EXCLUSIVE_LOCK, 0);
+               hfs_lock_always(from_cp, HFS_EXCLUSIVE_LOCK);
+               hfs_filedone(from_vp, ap->a_context, HFS_FILE_DONE_NO_SYNC);
+               hfs_unlock(from_cp);
+               hfs_unlock_truncate(from_cp, 0);
+
+               // Flush all the data from the source file
+               error = ubc_msync(from_vp, 0, ubc_getsize(from_vp), NULL,
+                                                 UBC_PUSHALL | UBC_SYNC);
+               if (error)
+                       goto exit;
  
-               /* Verify the source file is not in use by anyone besides us (including mmap refs) */
-               if (vnode_isinuse(from_vp, 1)) {
-                       error = EBUSY;
-                       hfs_unlock(VTOC(from_vp));
-                       hfs_unlock_truncate (VTOC(from_vp), HFS_LOCK_DEFAULT);
-                       return error;
+#if HFS_COMPRESSION
+               /*
+                * If this is a compressed file, we need to do the same for
+                * the resource fork.
+                */
+               if (ISSET(from_cp->c_bsdflags, UF_COMPRESSED)) {
+                       error = hfs_flush_rsrc(from_vp, ap->a_context);
+                       if (error)
+                               goto exit;
                 }
+#endif
  
-               /* Flush out the data in the source file */
-               VTOC(from_vp)->c_flag |= C_SWAPINPROGRESS;
-               error = hfs_filedone (from_vp, ap->a_context);
-               VTOC(from_vp)->c_flag &= ~C_SWAPINPROGRESS;
-               hfs_unlock(VTOC(from_vp));
-               hfs_unlock_truncate(VTOC(from_vp), HFS_LOCK_DEFAULT);
+               /*
+                * We're doing a data-swap so we need to take the truncate
+                * lock exclusively.  We need an exclusive lock because we
+                * will be completely truncating the source file and we must
+                * make sure nobody else sneaks in and trys to issue I/O
+                * whilst we don't have the cnode lock.
+                *
+                * After taking the truncate lock we do a quick check to
+                * verify there are no other references (including mmap
+                * references), but we must remember that this does not stop
+                * anybody coming in later and taking a reference.  We will
+                * have the truncate lock exclusively so that will prevent
+                * them from issuing any I/O.
+                */
  
-               if (error) {
-                       return error;
+               if (to_cp < from_cp) {
+                       hfs_lock_truncate(to_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+                       have_to_trunc_lock = true;
                 }
-       }
  
-       if ((error = hfs_lockpair(VTOC(from_vp), VTOC(to_vp), HFS_EXCLUSIVE_LOCK)))
-               return (error);
+               hfs_lock_truncate(from_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+               have_from_trunc_lock = true;
  
-       from_cp = VTOC(from_vp);
-       to_cp = VTOC(to_vp);
-       hfsmp = VTOHFS(from_vp);
+               /*
+                * Do an early check to verify the source is not in use by
+                * anyone.  We should be called from an FD opened as F_EVTONLY
+                * so that doesn't count as a reference.
+                */
+               if (vnode_isinuse(from_vp, 0)) {
+                       error = EBUSY;
+                       goto exit;
+               }
  
-       /* Resource forks cannot be exchanged. */
-       if ( VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) {
-               error = EINVAL;
-               goto exit;
+               if (to_cp >= from_cp) {
+                       hfs_lock_truncate(to_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+                       have_to_trunc_lock = true;
+               }
         }
  
+       if ((error = hfs_lockpair(from_cp, to_cp, HFS_EXCLUSIVE_LOCK)))
+               goto exit;
+       have_cnode_locks = true;
+
         // Don't allow modification of the journal or journal_info_block
         if (hfs_is_journal_file(hfsmp, from_cp) ||
             hfs_is_journal_file(hfsmp, to_cp)) {
                 error = EPERM;
                 goto exit;
         }
-       
+
         /* 
          * Ok, now that all of the pre-flighting is done, call the underlying
          * function if needed.
          */
-       if (ap->a_options & FSOPT_EXCHANGE_DATA_ONLY) {
-               error = hfs_movedata(from_vp, to_vp);
+       if (ISSET(ap->a_options, FSOPT_EXCHANGE_DATA_ONLY)) {
+#if HFS_COMPRESSION
+               if (ISSET(from_cp->c_bsdflags, UF_COMPRESSED)) {
+                       error = hfs_move_compressed(from_cp, to_cp);
+                       goto exit;
+               }
+#endif
+
+               error = hfs_move_data(from_cp, to_cp, 0);
                 goto exit;
         }
-       
  
         if ((error = hfs_start_transaction(hfsmp)) != 0) {
             goto exit;
@@ -2068,21 +2265,31 @@ hfs_vnop_exchange(ap)
         }
  
         /*
-        * ExchangeFileIDs swaps the extent information attached to two
-        * different file IDs.  It also swaps the extent information that 
-        * may live in the extents-overflow B-Tree.
+        * ExchangeFileIDs swaps the on-disk, or in-BTree extent information 
+        * attached to two different file IDs.  It also swaps the extent 
+        * information that may live in the extents-overflow B-Tree.
          *
          * We do this in a transaction as this may require a lot of B-Tree nodes
          * to do completely, particularly if one of the files in question
-        * has a lot of extents.
+        * has a lot of extents. 
          *
          * For example, assume "file1" has fileID 50, and "file2" has fileID 52.
          * For the on-disk records, which are assumed to be synced, we will
          * first swap the resident inline-8 extents as part of the catalog records.
          * Then we will swap any extents overflow records for each file.
          * 
-        * When this function is done, "file1" will have fileID 52, and "file2" will
-        * have fileID 50.
+        * When ExchangeFileIDs returns successfully, "file1" will have fileID 52, 
+        * and "file2" will have fileID 50.  However, note that this is only 
+        * approximately half of the work that exchangedata(2) will need to 
+        * accomplish.  In other words, we swap "too much" of the information 
+        * because if we only called ExchangeFileIDs, both the fileID and extent 
+        * information would be the invariants of this operation.  We don't 
+        * actually want that; we want to conclude with "file1" having 
+        * file ID 50, and "file2" having fileID 52.
+        * 
+        * The remainder of hfs_vnop_exchange will swap the file ID and other cnode
+        * data back to the proper ownership, while still allowing the cnode to remain
+        * pointing at the same set of extents that it did originally.
          */
         error = ExchangeFileIDs(hfsmp, from_nameptr, to_nameptr, from_parid,
                                 to_parid, from_cp->c_hint, to_cp->c_hint);
@@ -2110,7 +2317,6 @@ hfs_vnop_exchange(ap)
                 hfs_incr_gencount (to_cp);
         }
  
-
         /* Save a copy of "from" attributes before swapping. */
         bcopy(&from_cp->c_desc, &tempdesc, sizeof(struct cat_desc));
         bcopy(&from_cp->c_attr, &tempattr, sizeof(struct cat_attr));
@@ -2124,7 +2330,7 @@ hfs_vnop_exchange(ap)
         to_cp->c_flag &= ~(C_HARDLINK | C_HASXATTRS);
  
         /*
-        * Complete the in-memory portion of the copy.
+        * Now complete the in-memory portion of the copy.
          *
          * ExchangeFileIDs swaps the on-disk records involved.  We complete the
          * operation by swapping the in-memory contents of the two files here.
@@ -2197,7 +2403,7 @@ hfs_vnop_exchange(ap)
          * 2) Drop the special bits from the current flags
          * 3) swap the special flag bits to their destination
          */      
-       from_cp->c_flag |= to_flag_special;
+       from_cp->c_flag |= to_flag_special | C_MODIFIED;
         from_cp->c_attr.ca_recflags = to_cp->c_attr.ca_recflags;
         bcopy(to_cp->c_finderinfo, from_cp->c_finderinfo, 32);
  
@@ -2226,7 +2432,7 @@ hfs_vnop_exchange(ap)
          * Only OR in the "from" flags into our cnode flags below. 
          * Leave the rest of the flags alone.
          */
-       to_cp->c_flag |= from_flag_special;
+       to_cp->c_flag |= from_flag_special | C_MODIFIED;
  
         to_cp->c_attr.ca_recflags = tempattr.ca_recflags;
         bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32);
@@ -2252,29 +2458,89 @@ hfs_vnop_exchange(ap)
  
  exit:
         if (got_cookie) {
-               cat_postflight(hfsmp, &cookie, vfs_context_proc(ap->a_context));
+               cat_postflight(hfsmp, &cookie, vfs_context_proc(ap->a_context));
         }
         if (started_tr) {
             hfs_end_transaction(hfsmp);
         }
  
-       hfs_unlockpair(from_cp, to_cp);
-       return (error);
+       if (have_cnode_locks)
+               hfs_unlockpair(from_cp, to_cp);
+
+       if (have_from_trunc_lock)
+               hfs_unlock_truncate(from_cp, 0);
+
+       if (have_to_trunc_lock)
+               hfs_unlock_truncate(to_cp, 0);
+
+       return (error);
  }
  
+#if HFS_COMPRESSION
+/*
+ * This function is used specifically for the case when a namespace
+ * handler is trying to steal data before it's deleted.  Note that we
+ * don't bother deleting the xattr from the source because it will get
+ * deleted a short time later anyway.
+ *
+ * cnodes must be locked
+ */
+static int hfs_move_compressed(cnode_t *from_cp, cnode_t *to_cp)
+{
+       int     ret;
+       void   *data    = NULL;
+
+       CLR(from_cp->c_bsdflags, UF_COMPRESSED);
+       SET(from_cp->c_flag, C_MODIFIED);
+
+       ret = hfs_move_data(from_cp, to_cp, HFS_MOVE_DATA_INCLUDE_RSRC);
+       if (ret)
+               goto exit;
+
+       /*
+        * Transfer the xattr that decmpfs uses.  Ideally, this code
+        * should be with the other decmpfs code but it's file system
+        * agnostic and this path is currently, and likely to remain, HFS+
+        * specific.  It's easier and more performant if we implement it
+        * here.
+        */
+
+       size_t size = MAX_DECMPFS_XATTR_SIZE;
+       MALLOC(data, void *, size, M_TEMP, M_WAITOK);
+
+       ret = hfs_xattr_read(from_cp->c_vp, DECMPFS_XATTR_NAME, data, &size);
+       if (ret)
+               goto exit;
+
+       ret = hfs_xattr_write(to_cp->c_vp, DECMPFS_XATTR_NAME, data, size);
+       if (ret)
+               goto exit;
+
+       SET(to_cp->c_bsdflags, UF_COMPRESSED);
+       SET(to_cp->c_flag, C_MODIFIED);
+
+exit:
+       if (data)
+               FREE(data, M_TEMP);
+
+       return ret;
+}
+#endif // HFS_COMPRESSION
+
  int
  hfs_vnop_mmap(struct vnop_mmap_args *ap)
  {
         struct vnode *vp = ap->a_vp;
+       cnode_t *cp = VTOC(vp);
         int error;
         
         if (VNODE_IS_RSRC(vp)) {
                 /* allow pageins of the resource fork */
         } else {
-               int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
-               time_t orig_ctime = VTOC(vp)->c_ctime;
+               int compressed = hfs_file_is_compressed(cp, 1); /* 1 == don't take the cnode lock */
+               time_t orig_ctime = cp->c_ctime;
                 
-               if (!compressed && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
+               if (!compressed && (cp->c_bsdflags & UF_COMPRESSED)) {
                         error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
                         if (error != 0) {
                                 return error;
@@ -2283,19 +2549,9 @@ hfs_vnop_mmap(struct vnop_mmap_args *ap)
  
                 if (ap->a_fflags & PROT_WRITE) {
                         check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL);
-
-                       /* even though we're manipulating a cnode field here, we're only monotonically increasing
-                        * the generation counter.  The vnode can't be recycled (because we hold a FD in order to cause the
-                        * map to happen).  So it's safe to do this without holding the cnode lock.  The caller's only 
-                         * requirement is that the number has been changed.
-                        */
-                       struct cnode *cp = VTOC(vp);
-                       if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
-                               hfs_incr_gencount(cp);
-                       }
                 }
         }
-       
+
         //
         // NOTE: we return ENOTSUP because we want the cluster layer
         //       to actually do all the real work.
@@ -2303,64 +2559,106 @@ hfs_vnop_mmap(struct vnop_mmap_args *ap)
         return (ENOTSUP);
  }
  
+static errno_t hfs_vnop_mnomap(struct vnop_mnomap_args *ap)
+{
+       vnode_t vp = ap->a_vp;
+
+       /*
+        * Whilst the file was mapped, there may not have been any
+        * page-outs so we need to increment the generation counter now.
+        * Unfortunately this may lead to a change in the generation
+        * counter when no actual change has been made, but there is
+        * little we can do about that with our current architecture.
+        */
+       if (ubc_is_mapped_writable(vp)) {
+               cnode_t *cp = VTOC(vp);
+               hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+               hfs_incr_gencount(cp);
+
+               /*
+                * We don't want to set the modification time here since a
+                * change to that is not acceptable if no changes were made.
+                * Instead we set a flag so that if we get any page-outs we
+                * know to update the modification time.  It's possible that
+                * they weren't actually because of changes made whilst the
+                * file was mapped but that's not easy to fix now.
+                */
+               SET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING);
+
+               hfs_unlock(cp);
+       }
+
+       return 0;
+}
+
+/*
+ * Mark the resource fork as needing a ubc_setsize when we drop the
+ * cnode lock later.
+ */
+static void hfs_rsrc_setsize(cnode_t *cp)
+{
+       /*
+        * We need to take an iocount if we don't have one.  vnode_get
+        * will return ENOENT if the vnode is terminating which is what we
+        * want as it's not safe to call ubc_setsize in that case.
+        */
+       if (cp->c_rsrc_vp && !vnode_get(cp->c_rsrc_vp)) {
+               // Shouldn't happen, but better safe...
+               if (ISSET(cp->c_flag, C_NEED_RVNODE_PUT))
+                       vnode_put(cp->c_rsrc_vp);
+               SET(cp->c_flag, C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE);
+       }
+}
+
  /*
- * hfs_movedata
+ * hfs_move_data
   *
   * This is a non-symmetric variant of exchangedata.  In this function,
- * the contents of the fork in from_vp are moved to the fork
- * specified by to_vp.  
+ * the contents of the data fork (and optionally the resource fork)
+ * are moved from from_cp to to_cp.
   * 
- * The cnodes pointed to by 'from_vp' and 'to_vp' must be locked. 
+ * The cnodes must be locked. 
   *
- * The vnode pointed to by 'to_vp' *must* be empty prior to invoking this function.
- * We impose this restriction because we may not be able to fully delete the entire 
- * file's contents in a single transaction, particularly if it has a lot of extents.
- * In the normal file deletion codepath, the file is screened for two conditions:
- * 1) bigger than 400MB, and 2) more than 8 extents.  If so, the file is relocated to 
- * the hidden directory and the deletion is broken up into multiple truncates.  We can't
- * do that here because both files need to exist in the namespace. The main reason this
- * is imposed is that we may have to touch a whole lot of bitmap blocks if there are 
- * many extents.
+ * The cnode pointed to by 'to_cp' *must* be empty prior to invoking
+ * this function.  We impose this restriction because we may not be
+ * able to fully delete the entire file's contents in a single
+ * transaction, particularly if it has a lot of extents.  In the
+ * normal file deletion codepath, the file is screened for two
+ * conditions: 1) bigger than 400MB, and 2) more than 8 extents.  If
+ * so, the file is relocated to the hidden directory and the deletion
+ * is broken up into multiple truncates.  We can't do that here
+ * because both files need to exist in the namespace. The main reason
+ * this is imposed is that we may have to touch a whole lot of bitmap
+ * blocks if there are many extents.
   * 
- * Any data written to 'from_vp' after this call completes is not guaranteed
- * to be moved. 
+ * Any data written to 'from_cp' after this call completes is not
+ * guaranteed to be moved.
   * 
   * Arguments:
- * vnode from_vp: source file
- * vnode to_vp: destination file; must be empty
+ * cnode_t *from_cp : source file
+ * cnode_t *to_cp   : destination file; must be empty
   * 
   * Returns:
+ *  
+ *  EBUSY - File has been deleted or is in use
   *     EFBIG - Destination file was not empty
- *     0       - success
- * 
- * 
+ *  EIO   - An I/O error
+ *     0         - success
+ *  other - Other errors that can be returned from called functions
   */
-int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
-       
-       struct cnode *from_cp;
-       struct cnode *to_cp;
-       struct hfsmount *hfsmp = NULL;
-       int error = 0;
-       int started_tr = 0;
-       int lockflags = 0;
-       int overflow_blocks;
-       int rsrc = 0;
-       
-       
-       /* Get the HFS pointers */
-       from_cp = VTOC(from_vp);
-       to_cp = VTOC(to_vp);
-       hfsmp = VTOHFS(from_vp);
-       
-       /* Verify that neither source/dest file is open-unlinked */
-       if (from_cp->c_flag & (C_DELETED | C_NOEXISTS)) {
-               error = EBUSY;
-               goto movedata_exit;
-       }
+int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp, 
+                                 hfs_move_data_options_t options)
+{      
+       hfsmount_t *hfsmp                       = VTOHFS(from_cp->c_vp);
+       int error                                       = 0;
+       int lockflags                           = 0;
+       bool return_EIO_on_error        = false;
+       const bool include_rsrc         = ISSET(options, HFS_MOVE_DATA_INCLUDE_RSRC);
  
-       if (to_cp->c_flag & (C_DELETED | C_NOEXISTS)) {
-               error = EBUSY;
-               goto movedata_exit;
+       /* Verify that neither source/dest file is open-unlinked */
+       if (ISSET(from_cp->c_flag, C_DELETED | C_NOEXISTS)
+               || ISSET(to_cp->c_flag, C_DELETED | C_NOEXISTS)) {
+               return EBUSY;
         }
  
         /* 
@@ -2375,248 +2673,276 @@ int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
          * As a result, we shouldn't have any active usecounts against
          * this vnode when we go to check it below.
          */
-       if (vnode_isinuse(from_vp, 0)) {
-               error = EBUSY;
-               goto movedata_exit;
-       }
+       if (vnode_isinuse(from_cp->c_vp, 0))
+               return EBUSY;
  
-       if (from_cp->c_rsrc_vp == from_vp) {
-               rsrc = 1;
-       }
-       
-       /* 
-        * We assume that the destination file is already empty. 
-        * Verify that it is.
-        */
-       if (rsrc) {
-               if (to_cp->c_rsrcfork->ff_size > 0) {
-                       error = EFBIG;
-                       goto movedata_exit;
-               }
-       }       
-       else {
-               if (to_cp->c_datafork->ff_size > 0) {
-                       error = EFBIG;
-                       goto movedata_exit;
-               }
-       }
-       
-       /* If the source has the rsrc open, make sure the destination is also the rsrc */
-       if (rsrc) {
-               if (to_vp != to_cp->c_rsrc_vp) {
-                       error = EINVAL;
-                       goto movedata_exit;
+       if (include_rsrc && from_cp->c_rsrc_vp) {
+               if (vnode_isinuse(from_cp->c_rsrc_vp, 0))
+                       return EBUSY;
+
+               /* 
+                * In the code below, if the destination file doesn't have a
+                * c_rsrcfork then we don't create it which means we we cannot
+                * transfer the ff_invalidranges and cf_vblocks fields.  These
+                * shouldn't be set because we flush the resource fork before
+                * calling this function but there is a tiny window when we
+                * did not have any locks...
+                */
+               if (!to_cp->c_rsrcfork
+                       && (!TAILQ_EMPTY(&from_cp->c_rsrcfork->ff_invalidranges)
+                               || from_cp->c_rsrcfork->ff_unallocblocks)) {
+                       /*
+                        * The file isn't really busy now but something did slip
+                        * in and tinker with the file while we didn't have any
+                        * locks, so this is the most meaningful return code for
+                        * the caller.
+                        */
+                       return EBUSY;
                 }
         }
-       else {
-               /* Verify that both forks are data forks */
-               if (to_vp != to_cp->c_vp) {
-                       error = EINVAL;
-                       goto movedata_exit;
-               }          
-       }
-       
-       /* 
-        * See if the source file has overflow extents.  If it doesn't, we don't
-        * need to call into MoveData, and the catalog will be enough.
-        */
-       if (rsrc) {
-               overflow_blocks = overflow_extents(from_cp->c_rsrcfork);
-       }
-       else {
-               overflow_blocks = overflow_extents(from_cp->c_datafork);
-       }       
-       
-       if ((error = hfs_start_transaction (hfsmp)) != 0) {
-               goto movedata_exit;
+
+       // Check the destination file is empty
+       if (to_cp->c_datafork->ff_blocks 
+               || to_cp->c_datafork->ff_size
+               || (include_rsrc
+                       && (to_cp->c_blocks
+                               || (to_cp->c_rsrcfork && to_cp->c_rsrcfork->ff_size)))) {
+               return EFBIG;
         }
-       started_tr = 1;
-       
-       /* Lock the system files: catalog, extents, attributes */
-       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
-       
-       /* Copy over any catalog allocation data into the new spot. */
-       if (rsrc) {
-               if ((error = hfs_move_fork (from_cp->c_rsrcfork, from_cp, to_cp->c_rsrcfork, to_cp))){
-                       hfs_systemfile_unlock(hfsmp, lockflags);
-                       goto movedata_exit;
+
+       if ((error = hfs_start_transaction (hfsmp)))
+               return error;
+
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, 
+                                                                       HFS_EXCLUSIVE_LOCK);
+
+       // filefork_t is 128 bytes which should be OK
+       filefork_t rfork_buf, *from_rfork = NULL;
+
+       if (include_rsrc) {
+               from_rfork = from_cp->c_rsrcfork;
+
+               /*
+                * Creating resource fork vnodes is expensive, so just get get
+                * the fork data if we need it.
+                */
+               if (!from_rfork && hfs_has_rsrc(from_cp)) {
+                       from_rfork = &rfork_buf;
+
+                       from_rfork->ff_cp = from_cp;
+                       TAILQ_INIT(&from_rfork->ff_invalidranges);
+
+                       error = cat_idlookup(hfsmp, from_cp->c_fileid, 0, 1, NULL, NULL,
+                                                                &from_rfork->ff_data);
+
+                       if (error)
+                               goto exit;
                 }
         }
-       else {
-               if ((error = hfs_move_fork (from_cp->c_datafork, from_cp, to_cp->c_datafork, to_cp))) {
-                       hfs_systemfile_unlock(hfsmp, lockflags);
-                       goto movedata_exit;
-               }
+
+       /*
+        * From here on, any failures mean that we might be leaving things
+        * in a weird or inconsistent state.  Ideally, we should back out
+        * all the changes, but to do that properly we need to fix
+        * MoveData.  We'll save fixing that for another time.  For now,
+        * just return EIO in all cases to the caller so that they know.
+        */
+       return_EIO_on_error = true;
+
+       bool data_overflow_extents = overflow_extents(from_cp->c_datafork);
+
+       // Move the data fork
+       if ((error = hfs_move_fork (from_cp->c_datafork, from_cp,
+                                                               to_cp->c_datafork, to_cp))) {
+               goto exit;
         }
-       
+
+       SET(from_cp->c_flag, C_NEED_DATA_SETSIZE);
+       SET(to_cp->c_flag, C_NEED_DATA_SETSIZE);
+
+       // We move the resource fork later
+
         /* 
-        * Note that because all we're doing is moving the extents around, we can 
-        * probably do this in a single transaction:  Each extent record (group of 8) 
-        * is 64 bytes.  A extent overflow B-Tree node is typically 4k.  This means 
-        * each node can hold roughly ~60 extent records == (480 extents).
+        * Note that because all we're doing is moving the extents around,
+        * we can probably do this in a single transaction: Each extent
+        * record (group of 8) is 64 bytes.  A extent overflow B-Tree node
+        * is typically 4k.  This means each node can hold roughly ~60
+        * extent records == (480 extents).
          *
-        * If a file was massively fragmented and had 20k extents, this means we'd 
-        * roughly touch 20k/480 == 41 to 42 nodes, plus the index nodes, for half 
-        * of the operation.  (inserting or deleting). So if we're manipulating 80-100 
-        * nodes, this is basically 320k of data to write to the journal in
-        * a bad case.  
+        * If a file was massively fragmented and had 20k extents, this
+        * means we'd roughly touch 20k/480 == 41 to 42 nodes, plus the
+        * index nodes, for half of the operation.  (inserting or
+        * deleting). So if we're manipulating 80-100 nodes, this is
+        * basically 320k of data to write to the journal in a bad case.
          */
-       if (overflow_blocks != 0) {
-               if (rsrc) {
-                       error = MoveData(hfsmp, from_cp->c_cnid, to_cp->c_cnid, 1);
-               }
-               else {
-                       error = MoveData (hfsmp, from_cp->c_cnid, to_cp->c_cnid, 0);
-               }
+       if (data_overflow_extents) {
+               if ((error = MoveData(hfsmp, from_cp->c_cnid, to_cp->c_cnid, 0)))
+                       goto exit;
         }
-       
-       if (error) {
-               /* Reverse the operation. Copy the fork data back into the source */
-               if (rsrc) {
-                       hfs_move_fork (to_cp->c_rsrcfork, to_cp, from_cp->c_rsrcfork, from_cp);
-               }
-               else {
-                       hfs_move_fork (to_cp->c_datafork, to_cp, from_cp->c_datafork, from_cp);
-               }
+
+       if (from_rfork && overflow_extents(from_rfork)) {
+               if ((error = MoveData(hfsmp, from_cp->c_cnid, to_cp->c_cnid, 1)))
+                       goto exit;
         }
-       else {
-               struct cat_fork *src_data = NULL;
-               struct cat_fork *src_rsrc = NULL;
-               struct cat_fork *dst_data = NULL;
-               struct cat_fork *dst_rsrc = NULL;
-               
-               /* Touch the times*/
-               to_cp->c_touch_acctime = TRUE;
-               to_cp->c_touch_chgtime = TRUE;
-               to_cp->c_touch_modtime = TRUE;
-               
-               from_cp->c_touch_acctime = TRUE;
-               from_cp->c_touch_chgtime = TRUE;
-               from_cp->c_touch_modtime = TRUE;
-               
-               hfs_touchtimes(hfsmp, to_cp);
-               hfs_touchtimes(hfsmp, from_cp);
-               
-               if (from_cp->c_datafork) {
-                       src_data = &from_cp->c_datafork->ff_data;
-               }
-               if (from_cp->c_rsrcfork) {
-                       src_rsrc = &from_cp->c_rsrcfork->ff_data;
-               }
-               
-               if (to_cp->c_datafork) {
-                       dst_data = &to_cp->c_datafork->ff_data;
-               }
-               if (to_cp->c_rsrcfork) {
-                       dst_rsrc = &to_cp->c_rsrcfork->ff_data;
+
+       // Touch times
+       from_cp->c_touch_acctime = TRUE;
+       from_cp->c_touch_chgtime = TRUE;
+       from_cp->c_touch_modtime = TRUE;
+       hfs_touchtimes(hfsmp, from_cp);
+
+       to_cp->c_touch_acctime = TRUE;
+       to_cp->c_touch_chgtime = TRUE;
+       to_cp->c_touch_modtime = TRUE;
+       hfs_touchtimes(hfsmp, to_cp);
+
+       struct cat_fork dfork_buf;
+       const struct cat_fork *dfork, *rfork;
+
+       dfork = hfs_prepare_fork_for_update(to_cp->c_datafork, NULL,
+                                                                               &dfork_buf, hfsmp->blockSize);
+       rfork = hfs_prepare_fork_for_update(from_rfork, NULL,
+                                                                               &rfork_buf.ff_data, hfsmp->blockSize);
+
+       // Update the catalog nodes, to_cp first
+       if ((error = cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, 
+                                                       dfork, rfork))) {
+               goto exit;
+       }
+
+       CLR(to_cp->c_flag, C_MODIFIED | C_MINOR_MOD);
+
+       // Update in-memory resource fork data here
+       if (from_rfork) {
+               // Update c_blocks
+               uint32_t moving = from_rfork->ff_blocks + from_rfork->ff_unallocblocks;
+
+               from_cp->c_blocks -= moving;
+               to_cp->c_blocks   += moving;
+
+               // Update to_cp's resource data if it has it
+               filefork_t *to_rfork = to_cp->c_rsrcfork;
+               if (to_rfork) {
+                       to_rfork->ff_invalidranges = from_rfork->ff_invalidranges;
+                       to_rfork->ff_data                  = from_rfork->ff_data;
+
+                       // Deal with ubc_setsize
+                       hfs_rsrc_setsize(to_cp);
                 }
-               
-               /* Update the catalog nodes */
-               (void) cat_update(hfsmp, &from_cp->c_desc, &from_cp->c_attr, 
-                                                 src_data, src_rsrc);
-               
-               (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, 
-                                                 dst_data, dst_rsrc);
-               
+
+               // Wipe out the resource fork in from_cp
+               rl_init(&from_rfork->ff_invalidranges);
+               bzero(&from_rfork->ff_data, sizeof(from_rfork->ff_data));
+
+               // Deal with ubc_setsize
+               hfs_rsrc_setsize(from_cp);
         }
-       /* unlock the system files */
-       hfs_systemfile_unlock(hfsmp, lockflags);
-       
-       
-movedata_exit:
-       if (started_tr) {
+
+       // Currently unnecessary, but might be useful in future...
+       dfork = hfs_prepare_fork_for_update(from_cp->c_datafork, NULL, &dfork_buf,
+                                                                               hfsmp->blockSize);
+       rfork = hfs_prepare_fork_for_update(from_rfork, NULL, &rfork_buf.ff_data,
+                                                                               hfsmp->blockSize);
+
+       // Update from_cp
+       if ((error = cat_update(hfsmp, &from_cp->c_desc, &from_cp->c_attr, 
+                                                       dfork, rfork))) {
+               goto exit;
+       }
+
+       CLR(from_cp->c_flag, C_MODIFIED | C_MINOR_MOD);
+
+exit:
+       if (lockflags) {
+               hfs_systemfile_unlock(hfsmp, lockflags);
                 hfs_end_transaction(hfsmp);
         }
-       
+
+       if (error && error != EIO && return_EIO_on_error) {
+               printf("hfs_move_data: encountered error %d\n", error);
+               error = EIO;
+       }
+
         return error;
-       
-}              
+}
  
  /* 
- * Copy all of the catalog and runtime data in srcfork to dstfork.
+ * Move all of the catalog and runtime data in srcfork to dstfork.
   * 
- * This allows us to maintain the invalid ranges across the movedata operation so 
- * we don't need to force all of the pending IO right now. In addition, we move all
- * non overflow-extent extents into the destination here.
+ * This allows us to maintain the invalid ranges across the move data
+ * operation so we don't need to force all of the pending IO right
+ * now. In addition, we move all non overflow-extent extents into the
+ * destination here.
+ *
+ * The destination fork must be empty and should have been checked
+ * prior to calling this.
   */
-static int hfs_move_fork (struct filefork *srcfork, struct cnode *src_cp,
-                                                 struct filefork *dstfork, struct cnode *dst_cp) {
-       struct rl_entry *invalid_range;
-       int size = sizeof(struct HFSPlusExtentDescriptor);
-       size = size * kHFSPlusExtentDensity;
-       
-       /* If the dstfork has any invalid ranges, bail out */
-       invalid_range = TAILQ_FIRST(&dstfork->ff_invalidranges);
-       if (invalid_range != NULL) {
-               return EFBIG;
-       }
-       
-       if (dstfork->ff_data.cf_size != 0 || dstfork->ff_data.cf_new_size != 0) {
-               return EFBIG;
-       }
-       
-       /* First copy the invalid ranges */
-       while ((invalid_range = TAILQ_FIRST(&srcfork->ff_invalidranges))) {
-               off_t start = invalid_range->rl_start;
-               off_t end = invalid_range->rl_end;
-               
-               /* Remove it from the srcfork and add it to dstfork */
-               rl_remove(start, end, &srcfork->ff_invalidranges);
-               rl_add(start, end, &dstfork->ff_invalidranges);
-       }
-       
-       /* 
-        * Ignore the ff_union.  We don't move symlinks or system files.  
-        * Now copy the in-catalog extent information
-        */
-       dstfork->ff_data.cf_size = srcfork->ff_data.cf_size;
-       dstfork->ff_data.cf_new_size = srcfork->ff_data.cf_new_size;
-       dstfork->ff_data.cf_vblocks = srcfork->ff_data.cf_vblocks;
-       dstfork->ff_data.cf_blocks = srcfork->ff_data.cf_blocks;
-       
-       /* just memcpy the whole array of extents to the new location. */
-       memcpy (dstfork->ff_data.cf_extents, srcfork->ff_data.cf_extents, size);
-       
-       /* 
-        * Copy the cnode attribute data.
-        *
-        */
-       src_cp->c_blocks -= srcfork->ff_data.cf_vblocks;
-       src_cp->c_blocks -= srcfork->ff_data.cf_blocks;
-       
-       dst_cp->c_blocks += srcfork->ff_data.cf_vblocks;
-       dst_cp->c_blocks += srcfork->ff_data.cf_blocks;
-       
-       /* Now delete the entries in the source fork */
-       srcfork->ff_data.cf_size = 0;
-       srcfork->ff_data.cf_new_size = 0;
-       srcfork->ff_data.cf_union.cfu_bytesread = 0;
-       srcfork->ff_data.cf_vblocks = 0;
-       srcfork->ff_data.cf_blocks = 0;
-       
-       /* Zero out the old extents */
-       bzero (srcfork->ff_data.cf_extents, size);
+static int hfs_move_fork(filefork_t *srcfork, cnode_t *src_cp,
+                                                filefork_t *dstfork, cnode_t *dst_cp) 
+{
+       // Move the invalid ranges
+       TAILQ_SWAP(&dstfork->ff_invalidranges, &srcfork->ff_invalidranges,
+                          rl_entry, rl_link);
+       rl_remove_all(&srcfork->ff_invalidranges);
+
+       // Move the fork data (copy whole structure)
+       dstfork->ff_data = srcfork->ff_data;
+       bzero(&srcfork->ff_data, sizeof(srcfork->ff_data));
+
+       // Update c_blocks
+       src_cp->c_blocks -= dstfork->ff_blocks + dstfork->ff_unallocblocks;
+       dst_cp->c_blocks += dstfork->ff_blocks + dstfork->ff_unallocblocks;
+
         return 0;
  }
-       
+
+
+#include <i386/panic_hooks.h>
+
+struct hfs_fsync_panic_hook {
+       panic_hook_t hook;
+       struct cnode *cp;
+};
+
+static void hfs_fsync_panic_hook(panic_hook_t *hook_)
+{
+       struct hfs_fsync_panic_hook *hook = (struct hfs_fsync_panic_hook *)hook_;
+       extern int kdb_log(const char *fmt, ...);
+
+       // Get the physical region just before cp
+       panic_phys_range_t range;
+       uint64_t phys;
+
+       if (panic_phys_range_before(hook->cp, &phys, &range)) {
+               kdb_log("cp = %p, phys = %p, prev (%p: %p-%p)\n", 
+                               hook->cp, phys, range.type, range.phys_start,
+                               range.phys_start + range.len);
+       } else
+               kdb_log("cp = %p, phys = %p, prev (!)\n", hook->cp, phys);
+
+       panic_dump_mem((void *)(((vm_offset_t)hook->cp - 4096) & ~4095), 12288);
+
+       kdb_log("\n");
+}
+
  
  /*
   *  cnode must be locked
   */
  int
-hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
+hfs_fsync(struct vnode *vp, int waitfor, hfs_fsync_mode_t fsyncmode, struct proc *p)
  {
         struct cnode *cp = VTOC(vp);
         struct filefork *fp = NULL;
         int retval = 0;
         struct hfsmount *hfsmp = VTOHFS(vp);
-       struct rl_entry *invalid_range;
         struct timeval tv;
         int waitdata;           /* attributes necessary for data retrieval */
         int wait;               /* all other attributes (e.g. atime, etc.) */
         int lockflag;
         int took_trunc_lock = 0;
         int locked_buffers = 0;
+       int fsync_default = 1;
  
         /*
          * Applications which only care about data integrity rather than full
@@ -2625,8 +2951,11 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
          */
         wait = (waitfor == MNT_WAIT);
         waitdata = (waitfor == MNT_DWAIT) | wait;
+
         if (always_do_fullfsync)
-               fullsync = 1;
+               fsyncmode = HFS_FSYNC_FULL;
+       if (fsyncmode != HFS_FSYNC)
+               fsync_default = 0;
         
         /* HFS directories don't have any data blocks. */
         if (vnode_isdir(vp))
@@ -2649,11 +2978,18 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
                 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
                 took_trunc_lock = 1;
  
+               struct hfs_fsync_panic_hook hook;
+               hook.cp = cp;
+               panic_hook(&hook.hook, hfs_fsync_panic_hook);
+
                 if (fp->ff_unallocblocks != 0) {
                         hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
  
                         hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
                 }
+
+               panic_unhook(&hook.hook);
+
                 /* Don't hold cnode lock when calling into cluster layer. */
                 (void) cluster_push(vp, waitdata ? IO_SYNC : 0);
  
@@ -2673,7 +3009,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
                 cp->c_zftimeout != 0))) {
  
                 microuptime(&tv);
-               if ((cp->c_flag & C_ALWAYS_ZEROFILL) == 0 && !fullsync && tv.tv_sec < (long)cp->c_zftimeout) {
+               if ((cp->c_flag & C_ALWAYS_ZEROFILL) == 0 && fsync_default && tv.tv_sec < (long)cp->c_zftimeout) {
                         /* Remember that a force sync was requested. */
                         cp->c_flag |= C_ZFWANTSYNC;
                         goto datasync;
@@ -2688,30 +3024,11 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
                                 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
                                 took_trunc_lock = 1;
                         }
-                       while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) {
-                               off_t start = invalid_range->rl_start;
-                               off_t end = invalid_range->rl_end;
-               
-                               /* The range about to be written must be validated
-                                * first, so that VNOP_BLOCKMAP() will return the
-                                * appropriate mapping for the cluster code:
-                                */
-                               rl_remove(start, end, &fp->ff_invalidranges);
-
-                               /* Don't hold cnode lock when calling into cluster layer. */
-                               hfs_unlock(cp);
-                               (void) cluster_write(vp, (struct uio *) 0,
-                                                    fp->ff_size, end + 1, start, (off_t)0,
-                                                    IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
-                               hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-                               cp->c_flag |= C_MODIFIED;
-                       }
+                       hfs_flush_invalid_ranges(vp);
                         hfs_unlock(cp);
                         (void) cluster_push(vp, waitdata ? IO_SYNC : 0);
                         hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
                 }
-               cp->c_flag &= ~C_ZFWANTSYNC;
-               cp->c_zftimeout = 0;
         }
  datasync:
         if (took_trunc_lock) {
@@ -2758,7 +3075,7 @@ datasync:
                  * if the vnode is in the middle of a recycle (VL_TERMINATE or VL_DEAD is set).
                  */
                 if (vnode_isrecycled(vp)) {
-                       fullsync = 1;
+                       fsync_default = 0;
                 }
         }
  
@@ -2772,7 +3089,7 @@ metasync:
                 cp->c_touch_chgtime = FALSE;
                 cp->c_touch_modtime = FALSE;
         } else if ( !(vp->v_flag & VSWAP) ) /* User file */ {
-               retval = hfs_update(vp, wait);
+               retval = hfs_update(vp, HFS_UPDATE_FORCE);
  
                 /*
                  * When MNT_WAIT is requested push out the catalog record for
@@ -2780,7 +3097,7 @@ metasync:
                  * because the journal_flush or hfs_metasync_all will push out
                  * all of the metadata changes.
                  */
-               if ((retval == 0) && wait && !fullsync && cp->c_hint &&
+               if ((retval == 0) && wait && fsync_default && cp->c_hint &&
                     !ISSET(cp->c_flag, C_DELETED | C_NOEXISTS)) {
                         hfs_metasync(VTOHFS(vp), (daddr64_t)cp->c_hint, p);
                 }
@@ -2789,27 +3106,24 @@ metasync:
                  * If this was a full fsync, make sure all metadata
                  * changes get to stable storage.
                  */
-               if (fullsync) {
+               if (!fsync_default) {
                         if (hfsmp->jnl) {
-                               hfs_journal_flush(hfsmp, FALSE);
-                       
-                               if (journal_uses_fua(hfsmp->jnl)) {
-                                       /*
-                                        * the journal_flush did NOT issue a sync track cache command,
-                                        * and the fullsync indicates we are supposed to flush all cached
-                                        * data to the media, so issue the sync track cache command
-                                        * explicitly
-                                        */
-                                       VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
-                               }
+                               if (fsyncmode == HFS_FSYNC_FULL)
+                                       hfs_flush(hfsmp, HFS_FLUSH_FULL);
+                               else
+                                       hfs_flush(hfsmp,
+                                           HFS_FLUSH_JOURNAL_BARRIER);
                         } else {
                                 retval = hfs_metasync_all(hfsmp);
                                 /* XXX need to pass context! */
-                               VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
+                               hfs_flush(hfsmp, HFS_FLUSH_CACHE);
                         }
                 }
         }
  
+       if (!hfs_is_dirty(cp) && !ISSET(cp->c_flag, C_DELETED))
+               vnode_cleardirty(vp);
+
         return (retval);
  }
  
@@ -3184,15 +3498,13 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                         dcp->c_entries--;
                 DEC_FOLDERCOUNT(hfsmp, dcp->c_attr);
                 dcp->c_dirchangecnt++;
-               {
-                       struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16);
-                       extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1);
-               }
+               hfs_incr_gencount(dcp);
+
                 dcp->c_touch_chgtime = TRUE;
                 dcp->c_touch_modtime = TRUE;
-               hfs_touchtimes(hfsmp, cp);
-               (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
-               cp->c_flag &= ~(C_MODIFIED | C_FORCEUPDATE);
+               dcp->c_flag |= C_MODIFIED;
+
+               hfs_update(dcp->c_vp, 0);
         }
  
         hfs_systemfile_unlock(hfsmp, lockflags);
@@ -3209,6 +3521,7 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
  
         /* Mark C_NOEXISTS since the catalog entry is now gone */
         cp->c_flag |= C_NOEXISTS;
+
  out:
         dcp->c_flag &= ~C_DIR_MODIFICATION;
         wakeup((caddr_t)&dcp->c_flag);
@@ -3364,6 +3677,8 @@ relock:
          * more work.
          */
         if (error == 0) {
+               hfs_hotfile_deleted(vp);
+               
                 if (rvp) {
                 recycle_rsrc = 1;
                 }
@@ -3385,8 +3700,8 @@ relock:
          * truncate lock)
          */
  rm_done:
-       hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
         hfs_unlockpair(dcp, cp);
+       hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
  
         if (recycle_rsrc) {
                 /* inactive or reclaim on rvp will clean up the blocks from the rsrc fork */
@@ -3488,23 +3803,6 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                 return (EPERM);
         }
  
-       /*
-        * If removing a symlink, then we need to ensure that the
-        * data blocks for the symlink are not still in-flight or pending.  
-        * If so, we will unlink the symlink here, making its blocks 
-        * available for re-allocation by a subsequent transaction.  That is OK, but
-        * then the I/O for the data blocks could then go out before the journal 
-        * transaction that created it was flushed, leading to I/O ordering issues.
-        */
-       if (vp->v_type == VLNK) {       
-               /* 
-                * This will block if the asynchronous journal flush is in progress.
-                * If this symlink is not being renamed over and doesn't have any open FDs,
-                * then we'll remove it from the journal's bufs below in kill_block.
-                */
-               buf_wait_for_shadow_io (vp, 0);
-       }
-
         /*
          * Hard links require special handling.
          */
@@ -3572,9 +3870,8 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
         }
         
         /* Check if we have to break the deletion into multiple pieces. */
-       if (isdir == 0) {
-               isbigfile = ((cp->c_datafork->ff_size >= HFS_BIGFILE_SIZE) && overflow_extents(VTOF(vp)));
-       }
+       if (isdir == 0)
+               isbigfile = cp->c_datafork->ff_size >= HFS_BIGFILE_SIZE;
  
         /* Check if the file has xattrs.  If it does we'll have to delete them in
            individual transactions in case there are too many */
@@ -3763,10 +4060,8 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                                 DEC_FOLDERCOUNT(hfsmp, dcp->c_attr);
                         }
                         dcp->c_dirchangecnt++;
-                       {
-                               struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16);
-                               extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1);
-                       }
+                       hfs_incr_gencount(dcp);
+
                         dcp->c_ctime = tv.tv_sec;
                         dcp->c_mtime = tv.tv_sec;
                         (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
@@ -3848,10 +4143,8 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                         if (dcp->c_entries > 0)
                                 dcp->c_entries--;
                         dcp->c_dirchangecnt++;
-                       {
-                               struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16);
-                               extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1);
-                       }
+                       hfs_incr_gencount(dcp);
+
                         dcp->c_ctime = tv.tv_sec;
                         dcp->c_mtime = tv.tv_sec;
                         (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
@@ -3919,7 +4212,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                          * to proceed and update the volume header and mark the cnode C_NOEXISTS.
                          * The subsequent fsck should be able to recover the free space for us.
                          */
-                       hfs_mark_volume_inconsistent(hfsmp);
+                       hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE);
                 }
                 else {
                         /* reset update_vh to 0, since hfs_release_storage should have done it for us */
@@ -3934,7 +4227,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                 cp->c_flag |= C_NOEXISTS;
                 cp->c_flag &= ~C_DELETED;
                 
-               cp->c_touch_chgtime = TRUE;   /* XXX needed ? */
+               cp->c_touch_chgtime = TRUE;
                 --cp->c_linkcount;
                 
                 /* 
@@ -4199,10 +4492,9 @@ relock:
                         goto retry;
                 }
  
-               if (emit_rename && is_tracked) {
-                       resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_FAILED_OP | NAMESPACE_HANDLER_TRACK_EVENT);
-               }
-       
+               /* If we want to reintroduce notifications for failed renames, this
+                  is the place to do it. */
+
                 return (error);
         }
  
@@ -4475,6 +4767,54 @@ relock:
         /* Preflighting done, take fvp out of the name space. */
         cache_purge(fvp);
  
+#if CONFIG_SECLUDED_RENAME
+       /*
+        * Check for "secure" rename that imposes additional restrictions on the
+        * source vnode.  We wait until here to check in order to prevent a race
+        * with other threads that manage to look up fvp, but their open or link
+        * is blocked by our locks.  At this point, with fvp out of the name cache,
+        * and holding the lock on fdvp, no other thread can find fvp.
+        *
+        * TODO: Do we need to limit these checks to regular files only?
+        */
+       if (fcnp->cn_flags & CN_SECLUDE_RENAME) {
+               if (vnode_isdir(fvp)) {
+                       error = EISDIR;
+                       goto out;
+               }
+           
+               /*
+                * Neither fork of source may be open or memory mapped.
+                * We also don't want it in use by any other system call.
+                * The file must not have hard links.
+                *
+                * We can't simply use vnode_isinuse() because that does not
+                * count opens with O_EVTONLY.  We don't want a malicious
+                * process using O_EVTONLY to subvert a secluded rename.
+                */
+               if (fcp->c_linkcount != 1) {
+                       error = EMLINK;
+                       goto out;
+               }
+
+               if (fcp->c_rsrc_vp && (fcp->c_rsrc_vp->v_usecount > 0 ||
+                                      fcp->c_rsrc_vp->v_iocount > 0)) {
+                       /* Resource fork is in use (including O_EVTONLY) */
+                       error = EBUSY;
+                       goto out;
+               }
+               if (fcp->c_vp && (fcp->c_vp->v_usecount > (fcp->c_rsrc_vp ? 1 : 0) ||
+                                 fcp->c_vp->v_iocount > 1)) {
+                       /*
+                        * Data fork is in use, including O_EVTONLY, but not
+                        * including a reference from the resource fork.
+                        */
+                       error = EBUSY;
+                       goto out;
+               }
+       }
+#endif
+    
         bzero(&from_desc, sizeof(from_desc));
         from_desc.cd_nameptr = (const u_int8_t *)fcnp->cn_nameptr;
         from_desc.cd_namelen = fcnp->cn_namelen;
@@ -4843,29 +5183,30 @@ skip_rm:
                 }
                 tdcp->c_entries++;
                 tdcp->c_dirchangecnt++;
-               {
-                       struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)tdcp->c_finderinfo + 16);
-                       extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1);
-               }
+               tdcp->c_flag |= C_MODIFIED;
+               hfs_incr_gencount(tdcp);
+
                 if (fdcp->c_entries > 0)
                         fdcp->c_entries--;
                 fdcp->c_dirchangecnt++;
+               fdcp->c_flag |= C_MODIFIED;
                 fdcp->c_touch_chgtime = TRUE;
                 fdcp->c_touch_modtime = TRUE;
  
-               fdcp->c_flag |= C_FORCEUPDATE;  // XXXdbg - force it out!
+               if (ISSET(fcp->c_flag, C_HARDLINK)) {
+                       hfs_relorigin(fcp, fdcp->c_fileid);
+                       if (fdcp->c_fileid != fdcp->c_cnid)
+                               hfs_relorigin(fcp, fdcp->c_cnid);
+               }
+
                 (void) hfs_update(fdvp, 0);
         }
-       {       
-               struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)fdcp->c_finderinfo + 16);
-               extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1);
-       }
+       hfs_incr_gencount(fdcp);
                 
         tdcp->c_childhint = out_desc.cd_hint;   /* Cache directory's location */
         tdcp->c_touch_chgtime = TRUE;
         tdcp->c_touch_modtime = TRUE;
  
-       tdcp->c_flag |= C_FORCEUPDATE;  // XXXdbg - force it out!
         (void) hfs_update(tdvp, 0);
  
         /* Update the vnode's name now that the rename has completed. */
@@ -4917,12 +5258,12 @@ out:
             wakeup((caddr_t)&tdcp->c_flag);
         }
  
+       hfs_unlockfour(fdcp, fcp, tdcp, tcp);
+
         if (took_trunc_lock) {
                 hfs_unlock_truncate(VTOC(tvp), HFS_LOCK_DEFAULT);       
         }
  
-       hfs_unlockfour(fdcp, fcp, tdcp, tcp);
-       
         /* Now vnode_put the resource forks vnodes if necessary */
         if (tvp_rsrc) {
                 vnode_put(tvp_rsrc);
@@ -4933,13 +5274,8 @@ out:
         if (error && tvp_deleted)
                 error = EIO;
  
-       if (emit_rename && is_tracked) {
-               if (error) {
-                       resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_FAILED_OP | NAMESPACE_HANDLER_TRACK_EVENT);
-               } else {
-                       resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_SUCCESS_OP | NAMESPACE_HANDLER_TRACK_EVENT);
-               }
-       }
+       /* If we want to reintroduce notifications for renames, this is the
+          place to do it. */
  
         return (error);
  }
@@ -5020,11 +5356,10 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap)
          * Allocate space for the link.
          *
          * Since we're already inside a transaction,
-        * tell hfs_truncate to skip the ubc_setsize.
          *
          * Don't need truncate lock since a symlink is treated as a system file.
          */
-       error = hfs_truncate(vp, len, IO_NOZEROFILL, 1, 0, ap->a_context);
+       error = hfs_truncate(vp, len, IO_NOZEROFILL, 0, ap->a_context);
  
         /* On errors, remove the symlink file */
         if (error) {
@@ -5065,14 +5400,6 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap)
         } else {
                 buf_bawrite(bp);
         }
-       /*
-        * We defered the ubc_setsize for hfs_truncate
-        * since we were inside a transaction.
-        *
-        * We don't need to drop the cnode lock here
-        * since this is a symlink.
-        */
-       ubc_setsize(vp, len);
  out:
         if (started_tr)
             hfs_end_transaction(hfsmp);
@@ -5172,6 +5499,7 @@ hfs_vnop_readdir(ap)
         int extended;
         int nfs_cookies;
         cnid_t cnid_hint = 0;
+       int bump_valence = 0;
  
         items = 0;
         startoffset = offset = uio_offset(uio);
@@ -5264,14 +5592,13 @@ hfs_vnop_readdir(ap)
                 offset += 2;
         }
  
-       /* If there are no real entries then we're done. */
-       if (cp->c_entries == 0) {
-               error = 0;
-               eofflag = 1;
-               uio_setoffset(uio, offset);
-               goto seekoffcalc;
-       }
-
+       /* 
+        * Intentionally avoid checking the valence here.  If we
+        * have FS corruption that reports the valence is 0, even though it
+        * has contents, we might artificially skip over iterating 
+        * this directory.
+        */
+       
         //
         // We have to lock the user's buffer here so that we won't
         // fault on it after we've acquired a shared lock on the
@@ -5373,6 +5700,31 @@ hfs_vnop_readdir(ap)
         if (items >= (int)cp->c_entries) {
                 eofflag = 1;
         }
+       
+       /* 
+        * Detect valence FS corruption.
+        *
+        * We are holding the cnode lock exclusive, so there should not be
+        * anybody modifying the valence field of this cnode.  If we enter
+        * this block, that means we observed filesystem corruption, because
+        * this directory reported a valence of 0, yet we found at least one
+        * item.  In this case, we need to minimally self-heal this
+        * directory to prevent userland from tripping over a directory
+        * that appears empty (getattr of valence reports 0), but actually 
+        * has contents. 
+        *
+        * We'll force the cnode update at the end of the function after 
+        * completing all of the normal getdirentries steps.
+        */ 
+       if ((cp->c_entries == 0) && (items > 0)) {
+               /* disk corruption */
+               cp->c_entries++;
+               /* Mark the cnode as dirty. */
+               cp->c_flag |= C_MODIFIED;
+               printf("hfs_vnop_readdir: repairing valence to non-zero! \n");
+               bump_valence++;
+       }
+
  
         /* Convert catalog directory index back into an offset. */
         while (tag == 0)
@@ -5406,7 +5758,14 @@ out:
         if (dirhint == &localhint) {
                 cat_releasedesc(&localhint.dh_desc);
         }
+
+       if (bump_valence) {
+               /* force the update before dropping the cnode lock*/
+               hfs_update(vp, 0);
+       }
+
         hfs_unlock(cp);
+       
         return (error);
  }
  
@@ -5583,22 +5942,78 @@ hfs_vnop_pathconf(ap)
         return (0);
  }
  
+/*
+ * Prepares a fork for cat_update by making sure ff_size and ff_blocks
+ * are no bigger than the valid data on disk thus reducing the chance
+ * of exposing uninitialised data in the event of a non clean unmount.
+ * fork_buf is where to put the temporary copy if required.  (It can
+ * be inside pfork.)
+ */
+const struct cat_fork *
+hfs_prepare_fork_for_update(filefork_t *ff,
+                                                       const struct cat_fork *cf,
+                                                       struct cat_fork *cf_buf,
+                                                       uint32_t block_size)
+{
+       if (!ff)
+               return NULL;
+
+       if (!cf)
+               cf = &ff->ff_data;
+       if (!cf_buf)
+               cf_buf = &ff->ff_data;
+
+       off_t max_size = ff->ff_size;
+
+       // Check first invalid range
+       if (!TAILQ_EMPTY(&ff->ff_invalidranges))
+               max_size = TAILQ_FIRST(&ff->ff_invalidranges)->rl_start;
+
+       if (!ff->ff_unallocblocks && ff->ff_size <= max_size)
+               return cf; // Nothing to do
+
+       if (ff->ff_blocks < ff->ff_unallocblocks) {
+               panic("hfs: ff_blocks %d is less than unalloc blocks %d\n",
+                         ff->ff_blocks, ff->ff_unallocblocks);
+       }
+
+       struct cat_fork *out = cf_buf;
+
+       if (out != cf)
+               bcopy(cf, out, sizeof(*cf));
+
+       // Adjust cf_blocks for cf_vblocks
+       out->cf_blocks -= out->cf_vblocks;
+
+       /*
+        * Here we trim the size with the updated cf_blocks.  This is
+        * probably unnecessary now because the invalid ranges should
+        * catch this (but that wasn't always the case).
+        */
+       off_t alloc_bytes = hfs_blk_to_bytes(out->cf_blocks, block_size);
+       if (out->cf_size > alloc_bytes)
+               out->cf_size = alloc_bytes;
+
+       // Trim cf_size to first invalid range
+       if (out->cf_size > max_size)
+               out->cf_size = max_size;
+
+       return out;
+}
  
  /*
   * Update a cnode's on-disk metadata.
   *
- * If waitfor is set, then wait for the disk write of
- * the node to complete.
- *
- * The cnode must be locked exclusive
+ * The cnode must be locked exclusive.  See declaration for possible
+ * options.
   */
  int
-hfs_update(struct vnode *vp, __unused int waitfor)
+hfs_update(struct vnode *vp, int options)
  {
         struct cnode *cp = VTOC(vp);
         struct proc *p;
-       struct cat_fork *dataforkp = NULL;
-       struct cat_fork *rsrcforkp = NULL;
+       const struct cat_fork *dataforkp = NULL;
+       const struct cat_fork *rsrcforkp = NULL;
         struct cat_fork datafork;
         struct cat_fork rsrcfork;
         struct hfsmount *hfsmp;
@@ -5606,6 +6021,9 @@ hfs_update(struct vnode *vp, __unused int waitfor)
         int error;
         uint32_t tstate = 0;
  
+       if (ISSET(cp->c_flag, C_NOEXISTS))
+               return 0;
+
         p = current_proc();
         hfsmp = VTOHFS(vp);
  
@@ -5614,14 +6032,14 @@ hfs_update(struct vnode *vp, __unused int waitfor)
                 return (0);
         }
         if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (cp->c_mode == 0)) {
-               cp->c_flag &= ~C_MODIFIED;
+               CLR(cp->c_flag, C_MODIFIED | C_MINOR_MOD | C_NEEDS_DATEADDED);
                 cp->c_touch_acctime = 0;
                 cp->c_touch_chgtime = 0;
                 cp->c_touch_modtime = 0;
                 return (0);
         }
         if (kdebug_enable) {
-               if (cp->c_touch_acctime)
+               if (cp->c_touch_acctime || cp->c_atime != cp->c_attr.ca_atimeondisk)
                         tstate |= DBG_HFS_UPDATE_ACCTIME;
                 if (cp->c_touch_modtime)
                         tstate |= DBG_HFS_UPDATE_MODTIME;
@@ -5630,109 +6048,65 @@ hfs_update(struct vnode *vp, __unused int waitfor)
  
                 if (cp->c_flag & C_MODIFIED)
                         tstate |= DBG_HFS_UPDATE_MODIFIED;
-               if (cp->c_flag & C_FORCEUPDATE)
+               if (ISSET(options, HFS_UPDATE_FORCE))
                         tstate |= DBG_HFS_UPDATE_FORCE;
                 if (cp->c_flag & C_NEEDS_DATEADDED)
                         tstate |= DBG_HFS_UPDATE_DATEADDED;
+               if (cp->c_flag & C_MINOR_MOD)
+                       tstate |= DBG_HFS_UPDATE_MINOR;
         }
         hfs_touchtimes(hfsmp, cp);
  
-       /* Nothing to update. */
-       if ((cp->c_flag & (C_MODIFIED | C_FORCEUPDATE)) == 0) {
-               return (0);
+       if (!ISSET(cp->c_flag, C_MODIFIED | C_MINOR_MOD)
+               && !hfs_should_save_atime(cp)) {
+               // Nothing to update
+               return 0;
         }
-       
-       if (cp->c_datafork)
-               dataforkp = &cp->c_datafork->ff_data;
-       if (cp->c_rsrcfork)
-               rsrcforkp = &cp->c_rsrcfork->ff_data;
  
-       /*
-        * For delayed allocations updates are
-        * postponed until an fsync or the file
-        * gets written to disk.
-        *
-        * Deleted files can defer meta data updates until inactive.
-        *
-        * If we're ever called with the C_FORCEUPDATE flag though
-        * we have to do the update.
-        */
-       if (ISSET(cp->c_flag, C_FORCEUPDATE) == 0 &&
-           (ISSET(cp->c_flag, C_DELETED) || 
-           (dataforkp && cp->c_datafork->ff_unallocblocks) ||
-           (rsrcforkp && cp->c_rsrcfork->ff_unallocblocks))) {
-       //      cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE);
-               cp->c_flag |= C_MODIFIED;
+       KDBG(HFSDBG_UPDATE | DBG_FUNC_START, VM_KERNEL_ADDRPERM(vp), tstate);
  
-               return (0);
-       }
+       bool check_txn = false;
  
-       KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_START, vp, tstate, 0, 0, 0);
+       if (!ISSET(options, HFS_UPDATE_FORCE) && !ISSET(cp->c_flag, C_MODIFIED)) {
+               /*
+                * This must be a minor modification.  If the current
+                * transaction already has an update for this node, then we
+                * bundle in the modification.
+                */
+               if (hfsmp->jnl
+                       && journal_current_txn(hfsmp->jnl) == cp->c_update_txn) {
+                       check_txn = true;
+               } else {
+                       tstate |= DBG_HFS_UPDATE_SKIPPED;
+                       error = 0;
+                       goto exit;
+               }
+       }
  
-       if ((error = hfs_start_transaction(hfsmp)) != 0) {
+       if ((error = hfs_start_transaction(hfsmp)) != 0)
+               goto exit;
  
-           KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_END, vp, tstate, error, -1, 0);
-           return error;
+       if (check_txn
+               && journal_current_txn(hfsmp->jnl) != cp->c_update_txn) {
+               hfs_end_transaction(hfsmp);
+               tstate |= DBG_HFS_UPDATE_SKIPPED;
+               error = 0;
+               goto exit;
         }
  
-    /* 
+       if (cp->c_datafork)
+               dataforkp = &cp->c_datafork->ff_data;
+       if (cp->c_rsrcfork)
+               rsrcforkp = &cp->c_rsrcfork->ff_data;
+
+    /*
       * Modify the values passed to cat_update based on whether or not
       * the file has invalid ranges or borrowed blocks.
       */
-    if (dataforkp) {
-        off_t numbytes = 0;
-
-        /* copy the datafork into a temporary copy so we don't pollute the cnode's */
-        bcopy(dataforkp, &datafork, sizeof(datafork));
-        dataforkp = &datafork;
-
-        /*
-         * If there are borrowed blocks, ensure that they are subtracted
-         * from the total block count before writing the cnode entry to disk.
-         * Only extents that have actually been marked allocated in the bitmap
-         * should be reflected in the total block count for this fork.
-         */
-        if (cp->c_datafork->ff_unallocblocks != 0) {
-            // make sure that we don't assign a negative block count
-            if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) {
-                panic("hfs: ff_blocks %d is less than unalloc blocks %d\n",
-                        cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks);
-            }
-
-            /* Also cap the LEOF to the total number of bytes that are allocated. */
-            datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks);
-            datafork.cf_size   = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
-        }
-
-        /*
-         * For files with invalid ranges (holes) the on-disk
-         * field representing the size of the file (cf_size)
-         * must be no larger than the start of the first hole.
-         * However, note that if the first invalid range exists
-         * solely within borrowed blocks, then our LEOF and block
-         * count should both be zero.  As a result, set it to the 
-         * min of the current cf_size and the start of the first 
-         * invalid range, because it may have already been reduced
-         * to zero by the borrowed blocks check above.
-         */
-        if (!TAILQ_EMPTY(&cp->c_datafork->ff_invalidranges))  {
-            numbytes = TAILQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start;
-            datafork.cf_size = MIN((numbytes), (datafork.cf_size));
-        }
-    }
-       
-       /*
-        * For resource forks with delayed allocations, make sure
-        * the block count and file size match the number of blocks
-        * actually allocated to the file on disk.
-        */
-       if (rsrcforkp && (cp->c_rsrcfork->ff_unallocblocks != 0)) {
-               bcopy(rsrcforkp, &rsrcfork, sizeof(rsrcfork));
-               rsrcfork.cf_blocks = (cp->c_rsrcfork->ff_blocks - cp->c_rsrcfork->ff_unallocblocks);
-               rsrcfork.cf_size   = rsrcfork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
-               rsrcforkp = &rsrcfork;
-       }
-       if (kdebug_enable) {
+    dataforkp = hfs_prepare_fork_for_update(cp->c_datafork, NULL, &datafork, hfsmp->blockSize);
+       rsrcforkp = hfs_prepare_fork_for_update(cp->c_rsrcfork, NULL, &rsrcfork, hfsmp->blockSize);
+
+       if (__improbable(kdebug_enable & KDEBUG_TRACE)) {
                 long dbg_parms[NUMPARMS];
                 int  dbg_namelen;
  
@@ -5750,24 +6124,26 @@ hfs_update(struct vnode *vp, __unused int waitfor)
          */
         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
  
-       /* XXX - waitfor is not enforced */
         error = cat_update(hfsmp, &cp->c_desc, &cp->c_attr, dataforkp, rsrcforkp);
  
+       if (hfsmp->jnl)
+               cp->c_update_txn = journal_current_txn(hfsmp->jnl);
+
         hfs_systemfile_unlock(hfsmp, lockflags);
  
-       /* After the updates are finished, clear the flags */
-       cp->c_flag &= ~(C_MODIFIED | C_FORCEUPDATE);
+       CLR(cp->c_flag, C_MODIFIED | C_MINOR_MOD);
  
         hfs_end_transaction(hfsmp);
  
-       KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_END, vp, tstate, error, 0, 0);
+exit:
+
+       KDBG(HFSDBG_UPDATE | DBG_FUNC_END, VM_KERNEL_ADDRPERM(vp), tstate, error);
  
-       return (error);
+       return error;
  }
  
  /*
   * Allocate a new node
- * Note - Function does not create and return a vnode for whiteout creation.
   */
  int
  hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
@@ -5792,8 +6168,22 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
  #if CONFIG_PROTECT
         struct cprotect *entry = NULL;
         int32_t cp_class = -1;
+
+       /* 
+        * By default, it's OK for AKS to overrride our target class preferences.
+        */
+       uint32_t keywrap_flags = CP_KEYWRAP_DIFFCLASS;
+
         if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) {
                 cp_class = (int32_t)vap->va_dataprotect_class;
+               /* 
+                * Since the user specifically requested this target class be used,
+                * we want to fail this creation operation if we cannot wrap to their 
+                * target class. The CP_KEYWRAP_DIFFCLASS bit says that it is OK to
+                * use a different class than the one specified, so we turn that off
+                * now.
+                */
+               keywrap_flags &= ~CP_KEYWRAP_DIFFCLASS;
         }
         int protected_mount = 0;        
  #endif
@@ -5933,8 +6323,6 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                 fip->fdType    = SWAP_BE32(kSymLinkFileType);
                 fip->fdCreator = SWAP_BE32(kSymLinkCreator);
         }
-       if (cnp->cn_flags & ISWHITEOUT)
-               attr.ca_flags |= UF_OPAQUE;
  
         /* Setup the descriptor */
         in_desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
@@ -5989,21 +6377,17 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                 /* Update the parent directory */
                 dcp->c_childhint = out_desc.cd_hint;    /* Cache directory's location */
                 dcp->c_entries++;
-               {
-                       struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16);
-                       extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1);
-               }
+
                 if (vnodetype == VDIR) {
                         INC_FOLDERCOUNT(hfsmp, dcp->c_attr);
                 }
                 dcp->c_dirchangecnt++;
-               {       
-                       struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16);
-                       extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1);
-               }
-               dcp->c_ctime = tv.tv_sec;
-               dcp->c_mtime = tv.tv_sec;
-               (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+               hfs_incr_gencount(dcp);
+
+               dcp->c_touch_chgtime = dcp->c_touch_modtime = true;
+               dcp->c_flag |= C_MODIFIED;
+
+               hfs_update(dcp->c_vp, 0);
  
  #if CONFIG_PROTECT
                 /*
@@ -6042,7 +6426,9 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
         hfs_systemfile_unlock(hfsmp, lockflags);
         if (error)
                 goto exit;
-       
+
+       uint32_t txn = hfsmp->jnl ? journal_current_txn(hfsmp->jnl) : 0;
+
         /* Invalidate negative cache entries in the directory */
         if (dcp->c_flag & C_NEG_ENTRIES) {
                 cache_purge_negatives(dvp);
@@ -6076,16 +6462,10 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
          */
  
         if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target))  {
-               cp_entry_destroy (entry);
+               cp_entry_destroy (hfsmp, entry);
                 entry = NULL;
         }
  #endif
-
-       /* Do not create vnode for whiteouts */
-       if (S_ISWHT(mode)) {
-               goto exit;
-       }       
-
         gnv_flags |= GNV_CREATE;
         if (nocache) {
                 gnv_flags |= GNV_NOCACHE;
@@ -6114,20 +6494,21 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
  
         cp = VTOC(tvp);
  
+       cp->c_update_txn = txn;
+
         struct  doc_tombstone *ut;
         ut = get_uthread_doc_tombstone();
         if (   ut->t_lastop_document_id != 0 
             && ut->t_lastop_parent == dvp
             && ut->t_lastop_parent_vid == vnode_vid(dvp)
-           && strcmp((char *)ut->t_lastop_filename, (char *)cp->c_desc.cd_nameptr) == 0) {
+           && strcmp((char *)ut->t_lastop_filename, (const char *)cp->c_desc.cd_nameptr) == 0) {
                 struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16);
  
                 //printf("CREATE: preserving doc-id %lld on %s\n", ut->t_lastop_document_id, ut->t_lastop_filename);
                 fip->document_id = (uint32_t)(ut->t_lastop_document_id & 0xffffffff);
  
                 cp->c_bsdflags |= UF_TRACKED;
-               // mark the cnode dirty
-               cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
+               cp->c_flag |= C_MODIFIED;
  
                 if ((error = hfs_start_transaction(hfsmp)) == 0) {
                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
@@ -6154,6 +6535,36 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                 }
         }
  
+       if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (vnode_isfastdevicecandidate(dvp) && !vnode_isautocandidate(dvp))) {
+
+               //printf("hfs: flagging %s (fileid: %d) as VFASTDEVCANDIDATE (dvp name: %s)\n",
+               //       cnp->cn_nameptr ? cnp->cn_nameptr : "<NONAME>",
+               //       cp->c_fileid,
+               //       dvp->v_name ? dvp->v_name : "no-dir-name");
+
+               //
+               // On new files we set the FastDevCandidate flag so that
+               // any new blocks allocated to it will be pinned.
+               //
+               cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask;
+               vnode_setfastdevicecandidate(tvp);
+
+               //
+               // properly inherit auto-cached flags
+               //
+               if (vnode_isautocandidate(dvp)) {
+                       cp->c_attr.ca_recflags |= kHFSAutoCandidateMask;
+                       vnode_setautocandidate(tvp);
+               }
+
+
+               //
+               // We also want to add it to the hotfile adoption list so
+               // that it will eventually land in the hotfile btree
+               //
+               (void) hfs_addhotfile(tvp);
+       }
+       
         *vpp = tvp;
  
  #if CONFIG_PROTECT
@@ -6171,13 +6582,13 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                         panic ("hfs_makenode: no cpentry for cnode (%p)", cp);
                 }
  
-               error = cp_generate_keys (hfsmp, cp, cp->c_cpentry->cp_pclass, &keyed_entry);
+               error = cp_generate_keys (hfsmp, cp, CP_CLASS(cp->c_cpentry->cp_pclass), keywrap_flags, &keyed_entry);
                 if (error == 0) {
                         /* 
                          * Upon success, the keys were generated and written out. 
                          * Update the cp pointer in the cnode.
                          */
-                       cp_replace_entry (cp, keyed_entry);
+                       cp_replace_entry (hfsmp, cp, keyed_entry);
                         if (nocache) {
                                 cache_enter (dvp, tvp, cnp);
                         }
@@ -6248,7 +6659,7 @@ exit:
          * out the pointer if it was called already.
          */
         if (entry) {
-               cp_entry_destroy (entry);
+               cp_entry_destroy (hfsmp, entry);
                 entry = NULL;
         }       
  #endif
@@ -6275,108 +6686,52 @@ exit:
  
  
  /*
- * hfs_vgetrsrc acquires a resource fork vnode corresponding to the cnode that is
- * found in 'vp'.  The rsrc fork vnode is returned with the cnode locked and iocount
- * on the rsrc vnode.
- * 
- * *rvpp is an output argument for returning the pointer to the resource fork vnode.
- * In most cases, the resource fork vnode will not be set if we return an error. 
- * However, if error_on_unlinked is set, we may have already acquired the resource fork vnode
- * before we discover the error (the file has gone open-unlinked).  In this case only,
- * we may return a vnode in the output argument despite an error.
- * 
- * If can_drop_lock is set, then it is safe for this function to temporarily drop
- * and then re-acquire the cnode lock.  We may need to do this, for example, in order to 
- * acquire an iocount or promote our lock.  
+ * hfs_vgetrsrc acquires a resource fork vnode corresponding to the
+ * cnode that is found in 'vp'.  The cnode should be locked upon entry
+ * and will be returned locked, but it may be dropped temporarily.
+ *
+ * If the resource fork vnode does not exist, HFS will attempt to acquire an
+ * empty (uninitialized) vnode from VFS so as to avoid deadlocks with
+ * jetsam. If we let the normal getnewvnode code produce the vnode for us
+ * we would be doing so while holding the cnode lock of our cnode.
   * 
- * error_on_unlinked is an argument which indicates that we are to return an error if we 
- * discover that the cnode has gone into an open-unlinked state ( C_DELETED or C_NOEXISTS)
- * is set in the cnode flags.  This is only necessary if can_drop_lock is true, otherwise 
- * there's really no reason to double-check for errors on the cnode.
+ * On success, *rvpp wlll hold the resource fork vnode with an
+ * iocount.  *Don't* forget the vnode_put.
   */
-
  int
-hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, 
-               int can_drop_lock, int error_on_unlinked)
+hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp)
  {
-       struct vnode *rvp;
+       struct vnode *rvp = NULLVP;
+       struct vnode *empty_rvp = NULLVP;
         struct vnode *dvp = NULLVP;
         struct cnode *cp = VTOC(vp);
         int error;
         int vid;
-       int delete_status = 0;
  
         if (vnode_vtype(vp) == VDIR) {
                 return EINVAL;
         }
         
-       /*
-        * Need to check the status of the cnode to validate it hasn't gone 
-        * open-unlinked on us before we can actually do work with it.
-        */
-       delete_status = hfs_checkdeleted(cp);
-       if ((delete_status) && (error_on_unlinked)) {
-               return delete_status;
-       }
-
  restart:
         /* Attempt to use existing vnode */
         if ((rvp = cp->c_rsrc_vp)) {
-               vid = vnode_vid(rvp);
+               vid = vnode_vid(rvp);
  
-               /*
-                * It is not safe to hold the cnode lock when calling vnode_getwithvid()
-                * for the alternate fork -- vnode_getwithvid() could deadlock waiting
-                * for a VL_WANTTERM while another thread has an iocount on the alternate
-                * fork vnode and is attempting to acquire the common cnode lock.
-                *
-                * But it's also not safe to drop the cnode lock when we're holding
-                * multiple cnode locks, like during a hfs_removefile() operation
-                * since we could lock out of order when re-acquiring the cnode lock.
-                *
-                * So we can only drop the lock here if its safe to drop it -- which is
-                * most of the time with the exception being hfs_removefile().
-                */
-               if (can_drop_lock)
-                       hfs_unlock(cp);
+               // vnode_getwithvid can block so we need to drop the cnode lock
+               hfs_unlock(cp);
  
                 error = vnode_getwithvid(rvp, vid);
  
-               if (can_drop_lock) {
-                       (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+               hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
  
-                       /*
-                        * When we relinquished our cnode lock, the cnode could have raced
-                        * with a delete and gotten deleted.  If the caller did not want
-                        * us to ignore open-unlinked files, then re-check the C_DELETED
-                        * state and see if we need to return an ENOENT here because the item
-                        * got deleted in the intervening time.
-                        */
-                       if (error_on_unlinked) {
-                               if ((delete_status = hfs_checkdeleted(cp))) {
-                                       /* 
-                                        * If error == 0, this means that we succeeded in acquiring an iocount on the 
-                                        * rsrc fork vnode.  However, if we're in this block of code, that means that we noticed
-                                        * that the cnode has gone open-unlinked.  In this case, the caller requested that we
-                                        * not do any other work and return an errno.  The caller will be responsible for
-                                        * dropping the iocount we just acquired because we can't do it until we've released
-                                        * the cnode lock.  
-                                        */
-                                       if (error == 0) {
-                                               *rvpp = rvp;
-                                       }
-                                       return delete_status;
-                               }
-                       }
+               /*
+                * When our lock was relinquished, the resource fork
+                * could have been recycled.  Check for this and try
+                * again.
+                */
+               if (error == ENOENT)
+                       goto restart;
  
-                       /*
-                        * When our lock was relinquished, the resource fork
-                        * could have been recycled.  Check for this and try
-                        * again.
-                        */
-                       if (error == ENOENT)
-                               goto restart;
-               }
                 if (error) {
                         const char * name = (const char *)VTOC(vp)->c_desc.cd_nameptr;
  
@@ -6393,42 +6748,57 @@ restart:
                 char delname[32];
                 int lockflags;
                 int newvnode_flags = 0;
-                       
-               /*
-                * Make sure cnode lock is exclusive, if not upgrade it.
+
+               /* 
+                * In this case, we don't currently see a resource fork vnode attached
+                * to this cnode.  In most cases, we were called from a read-only VNOP
+                * like getattr, so it should be safe to drop the cnode lock and then 
+                * re-acquire it.  
+                *
+                * Here, we drop the lock so that we can acquire an empty/husk
+                * vnode so that we don't deadlock against jetsam.  
                  *
-                * We assume that we were called from a read-only VNOP (getattr)
-                * and that its safe to have the cnode lock dropped and reacquired.
+                * It does not currently appear possible to hold the truncate lock via
+                * FS re-entrancy when we get to this point. (8/2014)
                  */
-               if (cp->c_lockowner != current_thread()) {
-                       if (!can_drop_lock) {                           
-                               return (EINVAL);
-                       }
-                       /*
-                        * If the upgrade fails we lose the lock and
-                        * have to take the exclusive lock on our own.
-                        */
-                       if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE)
-                               lck_rw_lock_exclusive(&cp->c_rwlock);
-                       cp->c_lockowner = current_thread();
+               hfs_unlock (cp);
+
+               error = vnode_create_empty (&empty_rvp); 
+
+               hfs_lock_always (cp, HFS_EXCLUSIVE_LOCK);
+
+               if (error) {
+                       /* If acquiring the 'empty' vnode failed, then nothing to clean up */
+                       return error;
                 }
  
+               /* 
+                * We could have raced with another thread here while we dropped our cnode
+                * lock.  See if the cnode now has a resource fork vnode and restart if appropriate.
+                *
+                * Note: We just released the cnode lock, so there is a possibility that the
+                * cnode that we just acquired has been deleted or even removed from disk
+                * completely, though this is unlikely. If the file is open-unlinked, the 
+                * check below will resolve it for us.  If it has been completely 
+                * removed (even from the catalog!), then when we examine the catalog 
+                * directly, below, while holding the catalog lock, we will not find the
+                * item and we can fail out properly.
+                */
+               if (cp->c_rsrc_vp) {
+                       /* Drop the empty vnode before restarting */
+                       vnode_put (empty_rvp);
+                       empty_rvp = NULL;
+                       rvp = NULL;
+                       goto restart;
+               }
+       
                 /*
                  * hfs_vgetsrc may be invoked for a cnode that has already been marked
                  * C_DELETED.  This is because we need to continue to provide rsrc
                  * fork access to open-unlinked files.  In this case, build a fake descriptor
                  * like in hfs_removefile.  If we don't do this, buildkey will fail in
-                * cat_lookup because this cnode has no name in its descriptor. However,
-                * only do this if the caller did not specify that they wanted us to
-                * error out upon encountering open-unlinked files.
+                * cat_lookup because this cnode has no name in its descriptor.
                  */
-
-               if ((error_on_unlinked) && (can_drop_lock)) {
-                       if ((error = hfs_checkdeleted(cp))) { 
-                               return error;
-                       }
-               }
-
                 if ((cp->c_flag & C_DELETED ) && (cp->c_desc.cd_namelen == 0)) {
                         bzero (&to_desc, sizeof(to_desc));
                         bzero (delname, 32);
@@ -6504,6 +6874,8 @@ restart:
  
                 hfs_systemfile_unlock(hfsmp, lockflags);
                 if (error) {
+                       /* Drop our 'empty' vnode ! */
+                       vnode_put (empty_rvp);
                         return (error);
                 }
                 /*
@@ -6522,18 +6894,36 @@ restart:
                         cn.cn_namelen = snprintf(cn.cn_nameptr, MAXPATHLEN,
                                                  "%s%s", descptr->cd_nameptr,
                                                  _PATH_RSRCFORKSPEC);
+                       // Should never happen because cn.cn_nameptr won't ever be long...
+                       if (cn.cn_namelen >= MAXPATHLEN) {
+                               FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
+                               /* Drop our 'empty' vnode ! */
+                               vnode_put (empty_rvp);
+                               return ENAMETOOLONG;
+                               
+                       }
                 }
                 dvp = vnode_getparent(vp);
+               
+               /* 
+                * We are about to call hfs_getnewvnode and pass in the vnode that we acquired
+                * earlier when we were not holding any locks. The semantics of GNV_USE_VP require that
+                * either hfs_getnewvnode consume the vnode and vend it back to us, properly initialized,
+                * or it will consume/dispose of it properly if it errors out.
+                */ 
+               rvp = empty_rvp;
+               
                 error = hfs_getnewvnode(hfsmp, dvp, cn.cn_pnbuf ? &cn : NULL,
-                                       descptr, GNV_WANTRSRC | GNV_SKIPLOCK, &cp->c_attr,
-                                       &rsrcfork, &rvp, &newvnode_flags);
+                                       descptr, (GNV_WANTRSRC | GNV_SKIPLOCK | GNV_USE_VP), 
+                                                               &cp->c_attr, &rsrcfork, &rvp, &newvnode_flags);
+                       
                 if (dvp)
                         vnode_put(dvp);
                 if (cn.cn_pnbuf)
                         FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
                 if (error)
                         return (error);
-       }
+       }  /* End 'else' for rsrc fork not existing */
  
         *rvpp = rvp;
         return (0);
@@ -6680,17 +7070,17 @@ hfsfifo_close(ap)
  static u_int32_t 
  hfs_get_document_id_internal(const uint8_t *finderinfo, mode_t mode)
  {
-       u_int8_t *finfo = NULL;
+       const uint8_t *finfo = NULL;
         u_int32_t doc_id = 0;
         
         /* overlay the FinderInfo to the correct pointer, and advance */
-       finfo = ((uint8_t *)finderinfo) + 16;
+       finfo = finderinfo + 16;
  
         if (S_ISDIR(mode) || S_ISREG(mode)) {
-               struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+               const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo;
                 doc_id = extinfo->document_id;
         } else if (S_ISDIR(mode)) {
-               struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)finderinfo + 16);
+               const struct FndrExtendedDirInfo *extinfo = (const struct FndrExtendedDirInfo *)finfo;
                 doc_id = extinfo->document_id;
         }       
  
@@ -6736,15 +7126,14 @@ hfs_vnop_fsync(ap)
                 return 0;               
         }
  
-#if CONFIG_PROTECT
-       if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
-               return (error);
-       }
-#endif /* CONFIG_PROTECT */
-
         /*
-        * We need to allow ENOENT lock errors since unlink
-        * systenm call can call VNOP_FSYNC during vclean.
+        * No need to call cp_handle_vnop to resolve fsync().  Any dirty data
+        * should have caused the keys to be unwrapped at the time the data was
+        * put into the UBC, either at mmap/pagein/read-write.  If we did manage
+        * to let this by, then strategy will auto-resolve for us.
+        * 
+        * We also need to allow ENOENT lock errors since unlink
+        * system call can call VNOP_FSYNC during vclean.
          */
         error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
         if (error)
@@ -6756,74 +7145,6 @@ hfs_vnop_fsync(ap)
         return (error);
  }
  
-
-int
-hfs_vnop_whiteout(ap) 
-       struct vnop_whiteout_args /* {
-               struct vnode *a_dvp;
-               struct componentname *a_cnp;
-               int a_flags;
-               vfs_context_t a_context;
-       } */ *ap;
-{
-       int error = 0;
-       struct vnode *vp = NULL;
-       struct vnode_attr va;
-       struct vnop_lookup_args lookup_args;
-       struct vnop_remove_args remove_args;
-       struct hfsmount *hfsmp;
-
-       hfsmp = VTOHFS(ap->a_dvp);
-       if (hfsmp->hfs_flags & HFS_STANDARD) {
-               error = ENOTSUP;
-               goto exit;
-       }
-
-       switch (ap->a_flags) {
-               case LOOKUP:
-                       error = 0;
-                       break;
-
-               case CREATE: 
-                       VATTR_INIT(&va);
-                       VATTR_SET(&va, va_type, VREG);
-                       VATTR_SET(&va, va_mode, S_IFWHT);
-                       VATTR_SET(&va, va_uid, 0);
-                       VATTR_SET(&va, va_gid, 0);
-                       
-                       error = hfs_makenode(ap->a_dvp, &vp, ap->a_cnp, &va, ap->a_context);
-                       /* No need to release the vnode as no vnode is created for whiteouts */
-                       break;
-
-               case DELETE:
-                       lookup_args.a_dvp = ap->a_dvp;
-                       lookup_args.a_vpp = &vp;
-                       lookup_args.a_cnp = ap->a_cnp;
-                       lookup_args.a_context = ap->a_context;
-
-                       error = hfs_vnop_lookup(&lookup_args);
-                       if (error) {
-                               break;
-                       }
-                       
-                       remove_args.a_dvp = ap->a_dvp;
-                       remove_args.a_vp = vp;
-                       remove_args.a_cnp = ap->a_cnp;
-                       remove_args.a_flags = 0;
-                       remove_args.a_context = ap->a_context;
-
-                       error = hfs_vnop_remove(&remove_args);
-                       vnode_put(vp);
-                       break;
-
-               default:
-                       panic("hfs_vnop_whiteout: unknown operation (flag = %x)\n", ap->a_flags);
-       };
-       
-exit:
-       return (error);
-}
-
  int (**hfs_vnodeop_p)(void *);
  
  #define VOPFUNC int (*)(void *)
@@ -6886,12 +7207,12 @@ struct vnodeopv_entry_desc hfs_standard_vnodeop_entries[] = {
      { &vnop_setxattr_desc, (VOPFUNC)hfs_readonly_op},         /* set xattr (READONLY) */
      { &vnop_removexattr_desc, (VOPFUNC)hfs_readonly_op},      /* remove xattr (READONLY) */
      { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr},
-    { &vnop_whiteout_desc, (VOPFUNC)hfs_readonly_op},       /* whiteout (READONLY) */
  #if NAMEDSTREAMS
      { &vnop_getnamedstream_desc, (VOPFUNC)hfs_vnop_getnamedstream },
      { &vnop_makenamedstream_desc, (VOPFUNC)hfs_readonly_op }, 
      { &vnop_removenamedstream_desc, (VOPFUNC)hfs_readonly_op },
  #endif
+    { &vnop_getattrlistbulk_desc, (VOPFUNC)hfs_vnop_getattrlistbulk }, /* getattrlistbulk */
      { NULL, (VOPFUNC)NULL }
  };
  
@@ -6948,12 +7269,13 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = {
      { &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr},
      { &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr},
      { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr},
-    { &vnop_whiteout_desc, (VOPFUNC)hfs_vnop_whiteout},
  #if NAMEDSTREAMS
      { &vnop_getnamedstream_desc, (VOPFUNC)hfs_vnop_getnamedstream },
      { &vnop_makenamedstream_desc, (VOPFUNC)hfs_vnop_makenamedstream },
      { &vnop_removenamedstream_desc, (VOPFUNC)hfs_vnop_removenamedstream },
  #endif
+    { &vnop_getattrlistbulk_desc, (VOPFUNC)hfs_vnop_getattrlistbulk }, /* getattrlistbulk */
+       { &vnop_mnomap_desc, (VOPFUNC)hfs_vnop_mnomap },
      { NULL, (VOPFUNC)NULL }
  };