xnu-1699.32.7.tar.gz

[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c

index 9fcd6a02d59b71fd1975fb22b2271aaba7157fd2..63acbac05944fc5edbf0d3f27a2d6b9974393477 100644 (file)
--- a/bsd/hfs/hfs_readwrite.c
+++ b/bsd/hfs/hfs_readwrite.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -90,8 +90,7 @@ static int  do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip,
  
  
  int flush_cache_on_write = 0;
-SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
-
+SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
  
  /*
   * Read data from a file.
@@ -109,6 +108,7 @@ hfs_vnop_read(struct vnop_read_args *ap)
         off_t start_resid = uio_resid(uio);
         off_t offset = uio_offset(uio);
         int retval = 0;
+       int took_truncate_lock = 0;
  
         /* Preflight checks */
         if (!vnode_isreg(vp)) {
@@ -147,6 +147,14 @@ hfs_vnop_read(struct vnop_read_args *ap)
                         }
                         /* otherwise the file was converted back to a regular file while we were reading it */
                         retval = 0;
+               } else if ((VTOC(vp)->c_flags & UF_COMPRESSED)) {
+                       int error;
+                       
+                       error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
+                       if (error) {
+                               return error;
+                       }
+
                 }
         }
  #endif /* HFS_COMPRESSION */
@@ -155,8 +163,15 @@ hfs_vnop_read(struct vnop_read_args *ap)
         fp = VTOF(vp);
         hfsmp = VTOHFS(vp);
  
+#if CONFIG_PROTECT
+       if ((retval = cp_handle_vnop (cp, CP_READ_ACCESS)) != 0) {
+               goto exit;
+       }
+#endif
+
         /* Protect against a size change. */
-       hfs_lock_truncate(cp, 0);
+       hfs_lock_truncate(cp, HFS_SHARED_LOCK);
+       took_truncate_lock = 1;
  
         filesize = fp->ff_size;
         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
@@ -209,7 +224,10 @@ hfs_vnop_read(struct vnop_read_args *ap)
                         hfs_unlock(cp);
         }
  exit:
-       hfs_unlock_truncate(cp, 0);
+       if (took_truncate_lock) {
+               hfs_unlock_truncate(cp, 0);
+       }
+
         return (retval);
  }
  
@@ -238,7 +256,10 @@ hfs_vnop_write(struct vnop_write_args *ap)
         int lockflags;
         int cnode_locked = 0;
         int partialwrite = 0;
-       int exclusive_lock = 0;
+       int do_snapshot = 1;
+       time_t orig_ctime=VTOC(vp)->c_ctime;
+       int took_truncate_lock = 0;
+       struct rl_entry *invalid_range;
  
  #if HFS_COMPRESSION
         if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
@@ -247,23 +268,34 @@ hfs_vnop_write(struct vnop_write_args *ap)
                         case FILE_IS_COMPRESSED:
                                 return EACCES;
                         case FILE_IS_CONVERTING:
-                               /* if FILE_IS_CONVERTING, we allow writes */
+                               /* if FILE_IS_CONVERTING, we allow writes but do not
+                                  bother with snapshots or else we will deadlock.
+                               */
+                               do_snapshot = 0;
                                 break;
                         default:
                                 printf("invalid state %d for compressed file\n", state);
                                 /* fall through */
                 }
+       } else if ((VTOC(vp)->c_flags & UF_COMPRESSED)) {
+               int error;
+               
+               error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
+               if (error != 0) {
+                       return error;
+               }
+       }
+
+       if (do_snapshot) {
+               check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
         }
+
  #endif
  
         // LP64todo - fix this! uio_resid may be 64-bit value
         resid = uio_resid(uio);
         offset = uio_offset(uio);
  
-       if (ioflag & IO_APPEND) {
-           exclusive_lock = 1;
-       }
-       
         if (offset < 0)
                 return (EINVAL);
         if (resid == 0)
@@ -275,8 +307,14 @@ hfs_vnop_write(struct vnop_write_args *ap)
         fp = VTOF(vp);
         hfsmp = VTOHFS(vp);
  
+#if CONFIG_PROTECT
+       if ((retval = cp_handle_vnop (cp, CP_WRITE_ACCESS)) != 0) {
+               goto exit;
+       }
+#endif
+
         eflags = kEFDeferMask;  /* defer file block allocations */
-#ifdef HFS_SPARSE_DEV
+#if HFS_SPARSE_DEV
         /* 
          * When the underlying device is sparse and space
          * is low (< 8MB), stop doing delayed allocations
@@ -291,8 +329,22 @@ hfs_vnop_write(struct vnop_write_args *ap)
  
  again:
         /* Protect against a size change. */
-       hfs_lock_truncate(cp, exclusive_lock);
+       /*
+        * Protect against a size change.
+        *
+        * Note: If took_truncate_lock is true, then we previously got the lock shared
+        * but needed to upgrade to exclusive.  So try getting it exclusive from the
+        * start.
+        */
+       if (ioflag & IO_APPEND || took_truncate_lock) {
+               hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
+       }       
+       else {
+               hfs_lock_truncate(cp, HFS_SHARED_LOCK);
+       }
+       took_truncate_lock = 1;
  
+       /* Update UIO */
         if (ioflag & IO_APPEND) {
                 uio_setoffset(uio, fp->ff_size);
                 offset = fp->ff_size;
@@ -306,20 +358,48 @@ again:
         writelimit = offset + resid;
         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
  
-       /* If the truncate lock is shared, and if we either have virtual 
-        * blocks or will need to extend the file, upgrade the truncate 
-        * to exclusive lock.  If upgrade fails, we lose the lock and 
-        * have to get exclusive lock again.  Note that we want to
-        * grab the truncate lock exclusive even if we're not allocating new blocks
-        * because we could still be growing past the LEOF.
+       /*
+        * We may need an exclusive truncate lock for several reasons, all
+        * of which are because we may be writing to a (portion of a) block
+        * for the first time, and we need to make sure no readers see the
+        * prior, uninitialized contents of the block.  The cases are:
+        *
+        * 1. We have unallocated (delayed allocation) blocks.  We may be
+        *    allocating new blocks to the file and writing to them.
+        *    (A more precise check would be whether the range we're writing
+        *    to contains delayed allocation blocks.)
+        * 2. We need to extend the file.  The bytes between the old EOF
+        *    and the new EOF are not yet initialized.  This is important
+        *    even if we're not allocating new blocks to the file.  If the
+        *    old EOF and new EOF are in the same block, we still need to
+        *    protect that range of bytes until they are written for the
+        *    first time.
+        * 3. The write overlaps some invalid ranges (delayed zero fill; that
+        *    part of the file has been allocated, but not yet written).
+        *
+        * If we had a shared lock with the above cases, we need to try to upgrade
+        * to an exclusive lock.  If the upgrade fails, we will lose the shared
+        * lock, and will need to take the truncate lock again; the took_truncate_lock
+        * flag will still be set, causing us to try for an exclusive lock next time.
+        *
+        * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
+        * lock is held, since it protects the range lists.
          */
-       if ((exclusive_lock == 0) && 
-           ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) {
-               exclusive_lock = 1;
-               /* Lock upgrade failed and we lost our shared lock, try again */
+       if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
+           ((fp->ff_unallocblocks != 0) ||
+            (writelimit > origFileSize))) {
                 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
+                       /*
+                        * Lock upgrade failed and we lost our shared lock, try again.
+                        * Note: we do not set took_truncate_lock=0 here.  Leaving it
+                        * set to 1 will cause us to try to get the lock exclusive.
+                        */
                         goto again;
                 } 
+               else {
+                       /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
+                       cp->c_truncatelockowner = current_thread();  
+               }
         }
  
         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
@@ -327,11 +407,28 @@ again:
         }
         cnode_locked = 1;
         
-       if (!exclusive_lock) {
-               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
-                            (int)offset, uio_resid(uio), (int)fp->ff_size,
-                            (int)filebytes, 0);
+       /*
+        * Now that we have the cnode lock, see if there are delayed zero fill ranges
+        * overlapping our write.  If so, we need the truncate lock exclusive (see above).
+        */
+       if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
+           (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) {
+               /*
+                * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
+                * a deadlock, rather than simply returning failure.  (That is, it apparently does
+                * not behave like a "try_lock").  Since this condition is rare, just drop the
+                * cnode lock and try again.  Since took_truncate_lock is set, we will
+                * automatically take the truncate lock exclusive.
+                */
+               hfs_unlock(cp);
+               cnode_locked = 0;
+               hfs_unlock_truncate(cp, 0);
+               goto again;
         }
+       
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
+                    (int)offset, uio_resid(uio), (int)fp->ff_size,
+                    (int)filebytes, 0);
  
         /* Check if we do not need to extend the file */
         if (writelimit <= filebytes) {
@@ -405,7 +502,6 @@ sizeok:
                 off_t inval_end;
                 off_t io_start;
                 int lflag;
-               struct rl_entry *invalid_range;
  
                 if (writelimit > fp->ff_size)
                         filesize = writelimit;
@@ -632,7 +728,10 @@ ioerr_exit:
  exit:
         if (cnode_locked)
                 hfs_unlock(cp);
-       hfs_unlock_truncate(cp, exclusive_lock);
+       
+       if (took_truncate_lock) {
+               hfs_unlock_truncate(cp, 0);
+       }
         return (retval);
  }
  
@@ -892,7 +991,7 @@ do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
         struct cinfo c_info;
  
         /* otherwise, check the cnode hash incase the file/dir is incore */
-       if (hfs_chash_snoop(hfsmp, cnid, snoop_callback, &c_info) == 0) {
+       if (hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info) == 0) {
             cnattrp->ca_uid = c_info.uid;
             cnattrp->ca_gid = c_info.gid;
             cnattrp->ca_mode = c_info.mode;
@@ -1004,7 +1103,7 @@ do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HF
             struct vnode *vp;
  
             /* get the vnode for this cnid */
-           myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
+           myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
             if ( myErr ) {
                 myResult = 0;
                 goto ExitThisRoutine;
@@ -1027,21 +1126,19 @@ do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HF
             }
         } else {
             unsigned int flags;
-                  
-           myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
-               cnattr.ca_mode, hfsmp->hfs_mp,
-               myp_ucred, theProcPtr);
+               int mode = cnattr.ca_mode & S_IFMT;   
+               myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
  
-           if (cnattr.ca_mode & S_IFDIR) {
-               flags = R_OK | X_OK;
-           } else {
-               flags = R_OK;
-           }
-           if ( (myPerms & flags) != flags) {
-               myResult = 0;
-               myErr = EACCES;
-               goto ExitThisRoutine;   /* no access */
-           }
+               if (mode == S_IFDIR) {
+                       flags = R_OK | X_OK;
+               } else {
+                       flags = R_OK;
+               }
+               if ( (myPerms & flags) != flags) {
+                       myResult = 0;
+                       myErr = EACCES;
+                       goto ExitThisRoutine;   /* no access */
+               }
  
             /* up the hierarchy we go */
             thisNodeID = catkey.hfsPlus.parentID;
@@ -1284,7 +1381,7 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
                 struct vnode *cvp;
                 int myErr = 0;
                 /* get the vnode for this cnid */
-               myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
+               myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
                 if ( myErr ) {
                     access[i] = myErr;
                     continue;
@@ -1432,6 +1529,15 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
  
         is64bit = proc_is64bit(p);
  
+#if CONFIG_PROTECT
+       {
+               int error = 0;
+               if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
+                       return error;
+               }
+       }
+#endif /* CONFIG_PROTECT */
+
         switch (ap->a_command) {
  
         case HFS_GETPATH:
@@ -1491,7 +1597,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
                 if (linkfileid < kHFSFirstUserCatalogNodeID) {
                         return (EINVAL);
                 }
-               if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
+               if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
                         return (error);
                 }
                 if (ap->a_command == HFS_NEXT_LINK) {
@@ -1591,7 +1697,7 @@ fail_change_next_allocation:
                 return (error);
         }
  
-#ifdef HFS_SPARSE_DEV
+#if HFS_SPARSE_DEV
         case HFS_SETBACKINGSTOREINFO: {
                 struct vnode * bsfs_rootvp;
                 struct vnode * di_vp;
@@ -1641,7 +1747,17 @@ fail_change_next_allocation:
                 vnode_put(bsfs_rootvp);
  
                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
+
                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
+               /* The free extent cache is managed differently for sparse devices.  
+                * There is a window between which the volume is mounted and the 
+                * device is marked as sparse, so the free extent cache for this 
+                * volume is currently initialized as normal volume (sorted by block 
+                * count).  Reset the cache so that it will be rebuilt again 
+                * for sparse device (sorted by start block).
+                */
+               ResetVCBFreeExtCache(hfsmp);
+
                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
                 hfsmp->hfs_sparsebandblks *= 4;
  
@@ -1717,14 +1833,18 @@ fail_change_next_allocation:
                 // note: can't do this after taking the lock as it will
                 // deadlock against ourselves.
                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
-               hfs_global_exclusive_lock_acquire(hfsmp);
+               hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
  
                 // DO NOT call hfs_journal_flush() because that takes a
                 // shared lock on the global exclusive lock!
-               journal_flush(hfsmp->jnl);
+               journal_flush(hfsmp->jnl, TRUE);
  
                 // don't need to iterate on all vnodes, we just need to
                 // wait for writes to the system files and the device vnode
+               //
+               // Now that journal flush waits for all metadata blocks to 
+               // be written out, waiting for btree writes is probably no
+               // longer required.
                 if (HFSTOVCB(hfsmp)->extentsRefNum)
                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
                 if (HFSTOVCB(hfsmp)->catalogRefNum)
@@ -1756,7 +1876,7 @@ fail_change_next_allocation:
                 //       code that "thaws" the fs in hfs_vnop_close()
                 //
                 hfsmp->hfs_freezing_proc = NULL;
-               hfs_global_exclusive_lock_release(hfsmp);
+               hfs_unlock_global (hfsmp);
                 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
  
                 return (0);
@@ -1794,30 +1914,6 @@ fail_change_next_allocation:
             return do_bulk_access_check(hfsmp, vp, ap, size, context);
         } 
  
-       case HFS_SETACLSTATE: {
-               int state;
-
-               if (ap->a_data == NULL) {
-                       return (EINVAL);
-               }
-
-               vfsp = vfs_statfs(HFSTOVFS(hfsmp));
-               state = *(int *)ap->a_data;
-
-               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
-                       return (EROFS);
-               }
-               // super-user can enable or disable acl's on a volume.
-               // the volume owner can only enable acl's
-               if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
-                       return (EPERM);
-               }
-               if (state == 0 || state == 1)
-                       return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
-               else
-                       return (EINVAL);        
-       }
-
         case HFS_SET_XATTREXTENTS_STATE: {
                 int state;
  
@@ -1833,6 +1929,9 @@ fail_change_next_allocation:
  
                 /* Super-user can enable or disable extent-based extended 
                  * attribute support on a volume 
+                * Note: Starting Mac OS X 10.7, extent-based extended attributes
+                * are enabled by default, so any change will be transient only 
+                * till the volume is remounted.
                  */
                 if (!is_suser()) {
                         return (EPERM);
@@ -1891,7 +1990,7 @@ fail_change_next_allocation:
                 fp = VTOF(vp);
  
                 /* Protect against a size change. */
-               hfs_lock_truncate(VTOC(vp), TRUE);
+               hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK);
  
  #if HFS_COMPRESSION
                 if (compressed && (uncompressed_size == -1)) {
@@ -1910,90 +2009,13 @@ fail_change_next_allocation:
                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
                 }
  
-               hfs_unlock_truncate(VTOC(vp), TRUE);
+               hfs_unlock_truncate(VTOC(vp), 0);
                 return (error);
         }
  
         case F_READBOOTSTRAP:
         case F_WRITEBOOTSTRAP:
-       {
-           struct vnode *devvp = NULL;
-           user_fbootstraptransfer_t *user_bootstrapp;
-           int devBlockSize;
-           int error;
-           uio_t auio;
-           daddr64_t blockNumber;
-           u_int32_t blockOffset;
-           u_int32_t xfersize;
-           struct buf *bp;
-           user_fbootstraptransfer_t user_bootstrap;
-
-               if (!vnode_isvroot(vp))
-                       return (EINVAL);
-               /* LP64 - when caller is a 64 bit process then we are passed a pointer 
-                * to a user_fbootstraptransfer_t else we get a pointer to a 
-                * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
-                */
-               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
-                       return (EROFS);
-               }
-               if (is64bit) {
-                       user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
-               }
-               else {
-               user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
-                       user_bootstrapp = &user_bootstrap;
-                       user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
-                       user_bootstrap.fbt_length = bootstrapp->fbt_length;
-                       user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
-               }
-
-               if ((user_bootstrapp->fbt_offset < 0) || (user_bootstrapp->fbt_offset > 1024) ||
-                               (user_bootstrapp->fbt_length > 1024)) {
-                       return EINVAL;
-               }
-
-               if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024) 
-                       return EINVAL;
-           
-           devvp = VTOHFS(vp)->hfs_devvp;
-               auio = uio_create(1, user_bootstrapp->fbt_offset, 
-                                                 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
-                                                 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
-               uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
-
-           devBlockSize = vfs_devblocksize(vnode_mount(vp));
-
-           while (uio_resid(auio) > 0) {
-                       blockNumber = uio_offset(auio) / devBlockSize;
-                       error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
-                       if (error) {
-                               if (bp) buf_brelse(bp);
-                               uio_free(auio);
-                               return error;
-                       };
-
-                       blockOffset = uio_offset(auio) % devBlockSize;
-                       xfersize = devBlockSize - blockOffset;
-                       error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
-                       if (error) {
-                               buf_brelse(bp);
-                               uio_free(auio);
-                               return error;
-                       };
-                       if (uio_rw(auio) == UIO_WRITE) {
-                               error = VNOP_BWRITE(bp);
-                               if (error) {
-                                       uio_free(auio);
-                       return error;
-                               }
-                       } else {
-                               buf_brelse(bp);
-                       };
-               };
-               uio_free(auio);
-       };
-       return 0;
+               return 0;
  
         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
         {
@@ -2116,6 +2138,21 @@ fail_change_next_allocation:
             break;
         }    
  
+       case HFS_DISABLE_METAZONE: {
+               /* Only root can disable metadata zone */
+               if (!is_suser()) {
+                       return EACCES;
+               }
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+
+               /* Disable metadata zone now */
+               (void) hfs_metadatazone_init(hfsmp, true);
+               printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
+               break;
+       }
+       
         default:
                 return (ENOTTY);
         }
@@ -2541,8 +2578,32 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap)
  {
         buf_t   bp = ap->a_bp;
         vnode_t vp = buf_vnode(bp);
+       int error = 0;
+       
+#if CONFIG_PROTECT
+       cnode_t *cp = NULL; 
+       
+       if ((cp = cp_get_protected_cnode(vp)) != NULL) {
+               /*
+                * Some paths to hfs_vnop_strategy will take the cnode lock, 
+                * and some won't. But since content protection is only enabled
+                * for files that (a) aren't system files and (b) are regular 
+                * files, any valid cnode here will be unlocked.
+                */
+               hfs_lock(cp, HFS_SHARED_LOCK);
+               buf_setcpaddr(bp, cp->c_cpentry);
+       }
+#endif /* CONFIG_PROTECT */
+       
+       error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
  
-       return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
+#if CONFIG_PROTECT
+       if (cp) {
+               hfs_unlock(cp);
+       }
+#endif
+       
+       return error;
  }
  
  static int 
@@ -2556,7 +2617,7 @@ hfs_minorupdate(struct vnode *vp) {
         return 0;
  }
  
-static int
+int
  do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
  {
         register struct cnode *cp = VTOC(vp);
@@ -2801,8 +2862,8 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_c
                                         lockflags |= SFL_EXTENTS;
                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
  
-                               retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
-                                               (FCB*)fp, length, false));
+                               retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, 
+                                                                                                        FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
  
                                 hfs_systemfile_unlock(hfsmp, lockflags);
                         }
@@ -2860,13 +2921,201 @@ Err_Exit:
         return (retval);
  }
  
+/*
+ * Preparation which must be done prior to deleting the catalog record
+ * of a file or directory.  In order to make the on-disk as safe as possible,
+ * we remove the catalog entry before releasing the bitmap blocks and the 
+ * overflow extent records.  However, some work must be done prior to deleting
+ * the catalog record.
+ * 
+ * When calling this function, the cnode must exist both in memory and on-disk.
+ * If there are both resource fork and data fork vnodes, this function should
+ * be called on both.  
+ */
+
+int
+hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
+       
+       struct filefork *fp = VTOF(vp);
+       struct cnode *cp = VTOC(vp);
+       int retval = 0;
+       
+       /* Cannot truncate an HFS directory! */
+       if (vnode_isdir(vp)) {
+               return (EISDIR);
+       }
+       
+       /* 
+        * See the comment below in hfs_truncate for why we need to call 
+        * setsize here.  Essentially we want to avoid pending IO if we 
+        * already know that the blocks are going to be released here.
+        * This function is only called when totally removing all storage for a file, so
+        * we can take a shortcut and immediately setsize (0);
+        */
+       ubc_setsize(vp, 0);
+       
+       /* This should only happen with a corrupt filesystem */
+       if ((off_t)fp->ff_size < 0)
+               return (EINVAL);
+       
+       /* 
+        * We cannot just check if fp->ff_size == length (as an optimization)
+        * since there may be extra physical blocks that also need truncation.
+        */
+#if QUOTA
+       if ((retval = hfs_getinoquota(cp))) {
+               return(retval);
+       }
+#endif /* QUOTA */
+       
+       /* Wipe out any invalid ranges which have yet to be backed by disk */
+       rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
+       
+       /* 
+        * Account for any unmapped blocks. Since we're deleting the 
+        * entire file, we don't have to worry about just shrinking
+        * to a smaller number of borrowed blocks.
+        */
+       if (fp->ff_unallocblocks > 0) {
+               u_int32_t loanedBlocks;
+               
+               HFS_MOUNT_LOCK(hfsmp, TRUE);
+               
+               loanedBlocks = fp->ff_unallocblocks;
+               cp->c_blocks -= loanedBlocks;
+               fp->ff_blocks -= loanedBlocks;
+               fp->ff_unallocblocks = 0;
+               
+               hfsmp->loanedBlocks -= loanedBlocks;
+               
+               HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+       }
+       
+       return 0;
+}
+
+
+/*
+ * Special wrapper around calling TruncateFileC.  This function is useable
+ * even when the catalog record does not exist any longer, making it ideal
+ * for use when deleting a file.  The simplification here is that we know 
+ * that we are releasing all blocks.
+ *
+ * The caller is responsible for saving off a copy of the filefork(s)
+ * embedded within the cnode prior to calling this function.  The pointers
+ * supplied as arguments must be valid even if the cnode is no longer valid.
+ */
+
+int 
+hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, 
+                                        struct filefork *rsrcfork, u_int32_t fileid) {
+       
+       off_t filebytes;
+       u_int32_t fileblocks;
+       int blksize = 0;
+       int error = 0;
+       int lockflags;
+       
+       blksize = hfsmp->blockSize;
+       
+       /* Data Fork */
+       if (datafork->ff_blocks > 0) {
+               fileblocks = datafork->ff_blocks;
+               filebytes = (off_t)fileblocks * (off_t)blksize;         
+               
+               /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
+               
+               while (filebytes > 0) {
+                       if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) {
+                               filebytes -= HFS_BIGFILE_SIZE;
+                       } else {
+                               filebytes = 0;
+                       }
+                       
+                       /* Start a transaction, and wipe out as many blocks as we can in this iteration */
+                       if (hfs_start_transaction(hfsmp) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       
+                       if (datafork->ff_unallocblocks == 0) {
+                               /* Protect extents b-tree and allocation bitmap */
+                               lockflags = SFL_BITMAP;
+                               if (overflow_extents(datafork))
+                                       lockflags |= SFL_EXTENTS;
+                               lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+                               
+                               error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
+                               
+                               hfs_systemfile_unlock(hfsmp, lockflags);
+                       }
+                       if (error == 0) {
+                               datafork->ff_size = filebytes;
+                       }
+                       (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+                       
+                       /* Finish the transaction and start over if necessary */
+                       hfs_end_transaction(hfsmp);
+                       
+                       if (error) {
+                               break;
+                       }
+               }
+       }
+       
+       /* Resource fork */
+       if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) {
+               fileblocks = rsrcfork->ff_blocks;
+               filebytes = (off_t)fileblocks * (off_t)blksize;
+               
+               /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
+               
+               while (filebytes > 0) {
+                       if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) {
+                               filebytes -= HFS_BIGFILE_SIZE;
+                       } else {
+                               filebytes = 0;
+                       }
+                       
+                       /* Start a transaction, and wipe out as many blocks as we can in this iteration */
+                       if (hfs_start_transaction(hfsmp) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       
+                       if (rsrcfork->ff_unallocblocks == 0) {
+                               /* Protect extents b-tree and allocation bitmap */
+                               lockflags = SFL_BITMAP;
+                               if (overflow_extents(rsrcfork))
+                                       lockflags |= SFL_EXTENTS;
+                               lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+                               
+                               error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
+                               
+                               hfs_systemfile_unlock(hfsmp, lockflags);
+                       }
+                       if (error == 0) {
+                               rsrcfork->ff_size = filebytes;
+                       }
+                       (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+                       
+                       /* Finish the transaction and start over if necessary */
+                       hfs_end_transaction(hfsmp);                     
+                       
+                       if (error) {
+                               break;
+                       }
+               }
+       }
+       
+       return error;
+}
  
  
  /*
   * Truncate a cnode to at most length size, freeing (or adding) the
   * disk blocks.
   */
-__private_extern__
  int
  hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
               int skipupdate, vfs_context_t context)
@@ -2980,6 +3229,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
         struct hfsmount *hfsmp;
         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
         int lockflags;
+       time_t orig_ctime;
  
         *(ap->a_bytesallocated) = 0;
  
@@ -2990,7 +3240,11 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
         
         cp = VTOC(vp);
  
-       hfs_lock_truncate(cp, TRUE);
+       orig_ctime = VTOC(vp)->c_ctime;
+
+       check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
+
+       hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
  
         if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
                 goto Err_Exit;
@@ -3181,7 +3435,7 @@ Std_Exit:
         if (retval == 0)
                 retval = retval2;
  Err_Exit:
-       hfs_unlock_truncate(cp, TRUE);
+       hfs_unlock_truncate(cp, 0);
         hfs_unlock(cp);
         return (retval);
  }
@@ -3204,74 +3458,298 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap)
         };
  */
  {
-       vnode_t vp = ap->a_vp;
-       int error;
+       vnode_t         vp;
+       struct cnode    *cp;
+       struct filefork *fp;
+       int             error = 0;
+       upl_t           upl;
+       upl_page_info_t *pl;
+       off_t           f_offset;
+       int             offset;
+       int             isize; 
+       int             pg_index;
+       boolean_t       truncate_lock_held = FALSE;
+       boolean_t       file_converted = FALSE;
+       kern_return_t   kret;
+       
+       vp = ap->a_vp;
+       cp = VTOC(vp);
+       fp = VTOF(vp);
+
+#if CONFIG_PROTECT
+       if ((error = cp_handle_vnop(cp, CP_READ_ACCESS | CP_WRITE_ACCESS)) != 0) {
+               return error;
+       }
+#endif /* CONFIG_PROTECT */
+
+       if (ap->a_pl != NULL) {
+               /*
+                * this can only happen for swap files now that
+                * we're asking for V2 paging behavior...
+                * so don't need to worry about decompression, or
+                * keeping track of blocks read or taking the truncate lock
+                */
+               error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
+                                      ap->a_size, (off_t)fp->ff_size, ap->a_flags);
+               goto pagein_done;
+       }
+
+retry_pagein:
+       /*
+        * take truncate lock (shared/recursive) to guard against 
+        * zero-fill thru fsync interfering, but only for v2
+        *
+        * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the 
+        * lock shared and we are allowed to recurse 1 level if this thread already
+        * owns the lock exclusively... this can legally occur
+        * if we are doing a shrinking ftruncate against a file
+        * that is mapped private, and the pages being truncated
+        * do not currently exist in the cache... in that case
+        * we will have to page-in the missing pages in order
+        * to provide them to the private mapping... we must
+        * also call hfs_unlock_truncate with a postive been_recursed 
+        * arg to indicate that if we have recursed, there is no need to drop
+        * the lock.  Allowing this simple recursion is necessary
+        * in order to avoid a certain deadlock... since the ftruncate
+        * already holds the truncate lock exclusively, if we try
+        * to acquire it shared to protect the pagein path, we will
+        * hang this thread
+        *
+        * NOTE: The if () block below is a workaround in order to prevent a 
+        * VM deadlock. See rdar://7853471.
+        * 
+        * If we are in a forced unmount, then launchd will still have the 
+        * dyld_shared_cache file mapped as it is trying to reboot.  If we 
+        * take the truncate lock here to service a page fault, then our 
+        * thread could deadlock with the forced-unmount.  The forced unmount 
+        * thread will try to reclaim the dyld_shared_cache vnode, but since it's 
+        * marked C_DELETED, it will call ubc_setsize(0).  As a result, the unmount 
+        * thread will think it needs to copy all of the data out of the file 
+        * and into a VM copy object.  If we hold the cnode lock here, then that 
+        * VM operation will not be able to proceed, because we'll set a busy page 
+        * before attempting to grab the lock.  Note that this isn't as simple as "don't
+        * call ubc_setsize" because doing that would just shift the problem to the
+        * ubc_msync done before the vnode is reclaimed.
+        *
+        * So, if a forced unmount on this volume is in flight AND the cnode is 
+        * marked C_DELETED, then just go ahead and do the page in without taking 
+        * the lock (thus suspending pagein_v2 semantics temporarily).  Since it's on a file
+        * that is not going to be available on the next mount, this seems like a 
+        * OK solution from a correctness point of view, even though it is hacky.
+        */
+       if (vfs_isforce(vp->v_mount)) {
+               if (cp->c_flag & C_DELETED) {
+                       /* If we don't get it, then just go ahead and operate without the lock */
+                       truncate_lock_held = hfs_try_trunclock(cp, HFS_RECURSE_TRUNCLOCK);
+               }
+       }
+       else {
+               hfs_lock_truncate(cp, HFS_RECURSE_TRUNCLOCK);
+               truncate_lock_held = TRUE;
+       }
+
+       kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT); 
+
+       if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
+               error = EINVAL;
+               goto pagein_done;
+       }
+       isize = ap->a_size;
+
+       /* 
+        * Scan from the back to find the last page in the UPL, so that we 
+        * aren't looking at a UPL that may have already been freed by the
+        * preceding aborts/completions.
+        */ 
+       for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
+               if (upl_page_present(pl, --pg_index))
+                       break;
+               if (pg_index == 0) {
+                       /*
+                        * no absent pages were found in the range specified
+                        * just abort the UPL to get rid of it and then we're done
+                        */
+                       ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
+                       goto pagein_done;
+               }
+       }
+       /* 
+        * initialize the offset variables before we touch the UPL.
+        * f_offset is the position into the file, in bytes
+        * offset is the position into the UPL, in bytes
+        * pg_index is the pg# of the UPL we're operating on
+        * isize is the offset into the UPL of the last page that is present. 
+        */
+       isize = ((pg_index + 1) * PAGE_SIZE);   
+       pg_index = 0;
+       offset = 0;
+       f_offset = ap->a_f_offset;
+
+       while (isize) {
+               int  xsize;
+               int  num_of_pages;
+
+               if ( !upl_page_present(pl, pg_index)) {
+                       /*
+                        * we asked for RET_ONLY_ABSENT, so it's possible
+                        * to get back empty slots in the UPL.
+                        * just skip over them
+                        */
+                       f_offset += PAGE_SIZE;
+                       offset   += PAGE_SIZE;
+                       isize    -= PAGE_SIZE;
+                       pg_index++;
+
+                       continue;
+               }
+               /* 
+                * We know that we have at least one absent page.
+                * Now checking to see how many in a row we have
+                */
+               num_of_pages = 1;
+               xsize = isize - PAGE_SIZE;
+
+               while (xsize) {
+                       if ( !upl_page_present(pl, pg_index + num_of_pages))
+                               break;
+                       num_of_pages++;
+                       xsize -= PAGE_SIZE;
+               }
+               xsize = num_of_pages * PAGE_SIZE;
  
  #if HFS_COMPRESSION
-       if (VNODE_IS_RSRC(vp)) {
-               /* allow pageins of the resource fork */
-       } else {
-               int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
-               if (compressed) {
-                       error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
+               if (VNODE_IS_RSRC(vp)) {
+                       /* allow pageins of the resource fork */
+               } else {
+                       int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+
                         if (compressed) {
-                               if (error == 0) {
-                                       /* successful page-in, update the access time */
-                                       VTOC(vp)->c_touch_acctime = TRUE;
+                               if (truncate_lock_held) {
+                                       /*
+                                        * can't hold the truncate lock when calling into the decmpfs layer
+                                        * since it calls back into this layer... even though we're only
+                                        * holding the lock in shared mode, and the re-entrant path only
+                                        * takes the lock shared, we can deadlock if some other thread
+                                        * tries to grab the lock exclusively in between.
+                                        */
+                                       hfs_unlock_truncate(cp, 1);
+                                       truncate_lock_held = FALSE;
+                               }
+                               ap->a_pl = upl;
+                               ap->a_pl_offset = offset;
+                               ap->a_f_offset = f_offset;
+                               ap->a_size = xsize;
+
+                               error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
+                               /*
+                                * note that decpfs_pagein_compressed can change the state of
+                                * 'compressed'... it will set it to 0 if the file is no longer
+                                * compressed once the compression lock is successfully taken
+                                * i.e. we would block on that lock while the file is being inflated
+                                */
+                               if (compressed) {
+                                       if (error == 0) {
+                                               /* successful page-in, update the access time */
+                                               VTOC(vp)->c_touch_acctime = TRUE;
                                         
-                                       /* compressed files are not hot file candidates */
-                                       if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
-                                               VTOF(vp)->ff_bytesread = 0;
+                                               /* compressed files are not hot file candidates */
+                                               if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+                                                       fp->ff_bytesread = 0;
+                                               }
+                                       } else if (error == EAGAIN) {
+                                               /*
+                                                * EAGAIN indicates someone else already holds the compression lock...
+                                                * to avoid deadlocking, we'll abort this range of pages with an
+                                                * indication that the pagein needs to be redriven
+                                                */
+                                               ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
                                         }
+                                       goto pagein_next_range;
+                               }
+                               else {
+                                       /* 
+                                        * Set file_converted only if the file became decompressed while we were
+                                        * paging in.  If it were still compressed, we would re-start the loop using the goto
+                                        * in the above block.  This avoid us overloading truncate_lock_held as our retry_pagein
+                                        * condition below, since we could have avoided taking the truncate lock to prevent
+                                        * a deadlock in the force unmount case.
+                                        */
+                                       file_converted = TRUE;
                                 }
-                               return error;
                         }
-                       /* otherwise the file was converted back to a regular file while we were reading it */
+                       if (file_converted == TRUE) {
+                               /*
+                                * the file was converted back to a regular file after we first saw it as compressed
+                                * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
+                                * reset a_size so that we consider what remains of the original request
+                                * and null out a_upl and a_pl_offset.
+                                *
+                                * We should only be able to get into this block if the decmpfs_pagein_compressed 
+                                * successfully decompressed the range in question for this file.
+                                */
+                               ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
+
+                               ap->a_size = isize;
+                               ap->a_pl = NULL;
+                               ap->a_pl_offset = 0;
+
+                               /* Reset file_converted back to false so that we don't infinite-loop. */
+                               file_converted = FALSE;
+                               goto retry_pagein;
+                       }
                 }
-       }
  #endif
+               error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
  
-       error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
-                              ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
-       /*
-        * Keep track of blocks read.
-        */
-       if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
-               struct cnode *cp;
-               struct filefork *fp;
-               int bytesread;
-               int took_cnode_lock = 0;
+               /*
+                * Keep track of blocks read.
+                */
+               if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
+                       int bytesread;
+                       int took_cnode_lock = 0;
                 
-               cp = VTOC(vp);
-               fp = VTOF(vp);
+                       if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
+                               bytesread = fp->ff_size;
+                       else
+                               bytesread = xsize;
  
-               if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
-                       bytesread = fp->ff_size;
-               else
-                       bytesread = ap->a_size;
+                       /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
+                       if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
+                               hfs_lock(cp, HFS_FORCE_LOCK);
+                               took_cnode_lock = 1;
+                       }
+                       /*
+                        * If this file hasn't been seen since the start of
+                        * the current sampling period then start over.
+                        */
+                       if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
+                               struct timeval tv;
  
-               /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
-               if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
-                       hfs_lock(cp, HFS_FORCE_LOCK);
-                       took_cnode_lock = 1;
+                               fp->ff_bytesread = bytesread;
+                               microtime(&tv);
+                               cp->c_atime = tv.tv_sec;
+                       } else {
+                               fp->ff_bytesread += bytesread;
+                       }
+                       cp->c_touch_acctime = TRUE;
+                       if (took_cnode_lock)
+                               hfs_unlock(cp);
                 }
-               /*
-                * If this file hasn't been seen since the start of
-                * the current sampling period then start over.
-                */
-               if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
-                       struct timeval tv;
+pagein_next_range:
+               f_offset += xsize;
+               offset   += xsize;
+               isize    -= xsize;
+               pg_index += num_of_pages;
  
-                       fp->ff_bytesread = bytesread;
-                       microtime(&tv);
-                       cp->c_atime = tv.tv_sec;
-               } else {
-                       fp->ff_bytesread += bytesread;
-               }
-               cp->c_touch_acctime = TRUE;
-               if (took_cnode_lock)
-                       hfs_unlock(cp);
+               error = 0;
         }
+
+pagein_done:
+       if (truncate_lock_held == TRUE) {
+               /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
+               hfs_unlock_truncate(cp, 1);
+       }
+
         return (error);
  }
  
@@ -3338,7 +3816,7 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
                  * take truncate lock (shared) to guard against 
                  * zero-fill thru fsync interfering, but only for v2 
                  */
-               hfs_lock_truncate(cp, 0);
+               hfs_lock_truncate(cp, HFS_SHARED_LOCK);
  
                 if (a_flags & UPL_MSYNC) {
                         request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
@@ -3346,6 +3824,7 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
                 else {
                         request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
                 }
+               
                 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 
  
                 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
@@ -3649,7 +4128,6 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
   *
   * During step 3 page-ins to the file get suspended.
   */
-__private_extern__
  int
  hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
         struct  proc *p)
@@ -3685,6 +4163,22 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
         fp = VTOF(vp);
         if (fp->ff_unallocblocks)
                 return (EINVAL);
+
+#if CONFIG_PROTECT
+       /* 
+        * <rdar://problem/9118426>
+        * Disable HFS file relocation on content-protected filesystems
+        */
+       if (cp_fs_protected (hfsmp->hfs_mp)) {
+               return EINVAL;
+       }
+#endif
+
+       /* If it's an SSD, also disable HFS relocation */
+       if (hfsmp->hfs_flags & HFS_SSD) {
+               return EINVAL;
+       }
+
         blksize = hfsmp->blockSize;
         if (blockHint == 0)
                 blockHint = hfsmp->nextAllocation;
@@ -3707,15 +4201,15 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
  
         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
                 hfs_unlock(cp);
-               hfs_lock_truncate(cp, TRUE);
+               hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
                 /* Force lock since callers expects lock to be held. */
                 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
-                       hfs_unlock_truncate(cp, TRUE);
+                       hfs_unlock_truncate(cp, 0);
                         return (retval);
                 }
                 /* No need to continue if file was removed. */
                 if (cp->c_flag & C_NOEXISTS) {
-                       hfs_unlock_truncate(cp, TRUE);
+                       hfs_unlock_truncate(cp, 0);
                         return (ENOENT);
                 }
                 took_trunc_lock = 1;
@@ -3730,7 +4224,7 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
  
         if (hfs_start_transaction(hfsmp) != 0) {
                 if (took_trunc_lock)
-                       hfs_unlock_truncate(cp, TRUE);
+                       hfs_unlock_truncate(cp, 0);
             return (EINVAL);
         }
         started_tr = 1;
@@ -3850,7 +4344,7 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
                 goto restore;
  out:
         if (took_trunc_lock)
-               hfs_unlock_truncate(cp, TRUE);
+               hfs_unlock_truncate(cp, 0);
  
         if (lockflags) {
                 hfs_systemfile_unlock(hfsmp, lockflags);
@@ -3876,7 +4370,7 @@ exit:
  restore:
         if (fp->ff_blocks == headblks) {
                 if (took_trunc_lock)
-                       hfs_unlock_truncate(cp, TRUE);
+                       hfs_unlock_truncate(cp, 0);
                 goto exit;
         }
         /*
@@ -3889,13 +4383,14 @@ restore:
                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
         }
  
-       (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
+       (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp), 
+                                                FTOC(fp)->c_fileid, false);
  
         hfs_systemfile_unlock(hfsmp, lockflags);
         lockflags = 0;
  
         if (took_trunc_lock)
-               hfs_unlock_truncate(cp, TRUE);
+               hfs_unlock_truncate(cp, 0);
         goto exit;
  }
  
@@ -3954,10 +4449,19 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
         iosize = bufsize = MIN(copysize, 128 * 1024);
         offset = 0;
  
+       hfs_unlock(VTOC(vp));
+
+#if CONFIG_PROTECT
+       if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
+               hfs_lock(VTOC(vp), HFS_FORCE_LOCK);     
+               return (error);
+       }
+#endif /* CONFIG_PROTECT */
+
         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+               hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
                 return (ENOMEM);
-       }       
-       hfs_unlock(VTOC(vp));
+       }
  
         auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);