X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/39236c6e673c41db228275375ab7fdb0f837b292..c18c124eaa464aaaa5549e99e5a70fc9cbb50944:/bsd/hfs/hfs_readwrite.c

diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c
index a3f653fc4..f09bdc7d2 100644
--- a/bsd/hfs/hfs_readwrite.c
+++ b/bsd/hfs/hfs_readwrite.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -54,6 +54,7 @@
 #include <sys/sysctl.h>
 #include <sys/fsctl.h>
 #include <sys/mount_internal.h>
+#include <sys/file_internal.h>
 
 #include <miscfs/specfs/specdev.h>
 
@@ -125,6 +126,7 @@ hfs_vnop_read(struct vnop_read_args *ap)
 	int retval = 0;
 	int took_truncate_lock = 0;
 	int io_throttle = 0;
+	int throttled_count = 0;
 
 	/* Preflight checks */
 	if (!vnode_isreg(vp)) {
@@ -138,8 +140,14 @@ hfs_vnop_read(struct vnop_read_args *ap)
 		return (0);		/* Nothing left to do */
 	if (offset < 0)
 		return (EINVAL);	/* cant read from a negative offset */
-	
-	
+
+	if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
+						(IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
+		/* Don't allow unencrypted io request from user space */
+		return EPERM;
+	}
+
+
 
 #if HFS_COMPRESSION
 	if (VNODE_IS_RSRC(vp)) {
@@ -204,6 +212,13 @@ read_again:
 
 	filesize = fp->ff_size;
 	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+
+	/*
+	 * Check the file size. Note that per POSIX spec, we return 0 at 
+	 * file EOF, so attempting a read at an offset that is too big
+	 * should just return 0 on HFS+. Since the return value was initialized
+	 * to 0 above, we just jump to exit.  HFS Standard has its own behavior.
+	 */
 	if (offset > filesize) {
 		if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 		    (offset > (off_t)MAXHFSFILESIZE)) {
@@ -212,14 +227,14 @@ read_again:
 		goto exit;
 	}
 
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
+	KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START,
 		(int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 
 	retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
 
 	cp->c_touch_acctime = TRUE;
 
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
+	KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END,
 		(int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 
 	/*
@@ -258,10 +273,14 @@ exit:
 	}
 	if (retval == EAGAIN) {
 		throttle_lowpri_io(1);
+		throttled_count++;
 
 		retval = 0;
 		goto read_again;
 	}
+	if (throttled_count) {
+		throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread()));
+	}
 	return (retval);
 }
 
@@ -294,6 +313,7 @@ hfs_vnop_write(struct vnop_write_args *ap)
 	time_t orig_ctime=VTOC(vp)->c_ctime;
 	int took_truncate_lock = 0;
 	int io_return_on_throttle = 0;
+	int throttled_count = 0;
 	struct rl_entry *invalid_range;
 
 #if HFS_COMPRESSION
@@ -327,6 +347,13 @@ hfs_vnop_write(struct vnop_write_args *ap)
 
 #endif
 
+	if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
+						(IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
+		/* Don't allow unencrypted io request from user space */
+		return EPERM;
+	}
+
+
 	resid = uio_resid(uio);
 	offset = uio_offset(uio);
 
@@ -367,7 +394,6 @@ hfs_vnop_write(struct vnop_write_args *ap)
 	}
 
 again:
-	/* Protect against a size change. */
 	/*
 	 * Protect against a size change.
 	 *
@@ -446,10 +472,6 @@ again:
 	}
 	cnode_locked = 1;
 	
-	if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
-		hfs_incr_gencount (cp);
-	}
-
 	/*
 	 * Now that we have the cnode lock, see if there are delayed zero fill ranges
 	 * overlapping our write.  If so, we need the truncate lock exclusive (see above).
@@ -469,7 +491,7 @@ again:
 		goto again;
 	}
 	
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
+	KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START,
 		     (int)offset, uio_resid(uio), (int)fp->ff_size,
 		     (int)filebytes, 0);
 
@@ -518,7 +540,7 @@ again:
 		if (retval != E_NONE)
 			break;
 		filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
-		KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
+		KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE,
 			(int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 	}
 	(void) hfs_update(vp, TRUE);
@@ -708,6 +730,7 @@ sizeok:
 
 					cp->c_touch_chgtime = TRUE;
 					cp->c_touch_modtime = TRUE;
+					hfs_incr_gencount(cp);
 				}
 				if (filesize > fp->ff_size) {
 					/*
@@ -737,13 +760,7 @@ sizeok:
 				fp->ff_bytesread = 0;
 			}
 		}
-		fp->ff_new_size = 0;	/* ff_size now has the correct size */
-		
-		/* If we wrote some bytes, then touch the change and mod times */
-		if (resid > uio_resid(uio)) {
-			cp->c_touch_chgtime = TRUE;
-			cp->c_touch_modtime = TRUE;
-		}
+		fp->ff_new_size = 0;	/* ff_size now has the correct size */		
 	}
 	if (partialwrite) {
 		uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
@@ -758,44 +775,43 @@ sizeok:
 	}
 
 ioerr_exit:
-	/*
-	 * If we successfully wrote any data, and we are not the superuser
-	 * we clear the setuid and setgid bits as a precaution against
-	 * tampering.
-	 */
-	if (cp->c_mode & (S_ISUID | S_ISGID)) {
-		cred = vfs_context_ucred(ap->a_context);
-		if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
-			if (!cnode_locked) {
-				hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-				cnode_locked = 1;
+	if (resid > uio_resid(uio)) {
+		if (!cnode_locked) {
+			hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+			cnode_locked = 1;
+		}
+
+		cp->c_touch_chgtime = TRUE;
+		cp->c_touch_modtime = TRUE;
+		hfs_incr_gencount(cp);
+
+		/*
+		 * If we successfully wrote any data, and we are not the superuser
+		 * we clear the setuid and setgid bits as a precaution against
+		 * tampering.
+		 */
+		if (cp->c_mode & (S_ISUID | S_ISGID)) {
+			cred = vfs_context_ucred(ap->a_context);
+			if (cred && suser(cred, NULL)) {
+				cp->c_mode &= ~(S_ISUID | S_ISGID);
 			}
-			cp->c_mode &= ~(S_ISUID | S_ISGID);
 		}
 	}
 	if (retval) {
 		if (ioflag & IO_UNIT) {
-			if (!cnode_locked) {
-				hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-				cnode_locked = 1;
-			}
 			(void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
-			                   0, 0, ap->a_context);
+			                   0, ap->a_context);
 			uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 			uio_setresid(uio, resid);
 			filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 		}
-	} else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
-		if (!cnode_locked) {
-			hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-			cnode_locked = 1;
-		}
+	} else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio)))
 		retval = hfs_update(vp, TRUE);
-	}
+
 	/* Updating vcbWrCnt doesn't need to be atomic. */
 	hfsmp->vcbWrCnt++;
 
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
+	KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END,
 		(int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 exit:
 	if (cnode_locked)
@@ -806,10 +822,14 @@ exit:
 	}
 	if (retval == EAGAIN) {
 		throttle_lowpri_io(1);
+		throttled_count++;
 
 		retval = 0;
 		goto again;
 	}
+	if (throttled_count) {
+		throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread()));
+	}
 	return (retval);
 }
 
@@ -1031,15 +1051,15 @@ struct cinfo {
 };
 
 static int
-snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
+snoop_callback(const cnode_t *cp, void *arg)
 {
-    struct cinfo *cip = (struct cinfo *)arg;
+    struct cinfo *cip = arg;
 
-    cip->uid = attrp->ca_uid;
-    cip->gid = attrp->ca_gid;
-    cip->mode = attrp->ca_mode;
-    cip->parentcnid = descp->cd_parentcnid;
-    cip->recflags = attrp->ca_recflags;
+    cip->uid = cp->c_uid;
+    cip->gid = cp->c_gid;
+    cip->mode = cp->c_mode;
+    cip->parentcnid = cp->c_parentcnid;
+    cip->recflags = cp->c_attr.ca_recflags;
 	
     return (0);
 }
@@ -1056,36 +1076,41 @@ do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
 
     /* if this id matches the one the fsctl was called with, skip the lookup */
     if (cnid == skip_cp->c_cnid) {
-	cnattrp->ca_uid = skip_cp->c_uid;
-	cnattrp->ca_gid = skip_cp->c_gid;
-	cnattrp->ca_mode = skip_cp->c_mode;
-	cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
-	keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
+		cnattrp->ca_uid = skip_cp->c_uid;
+		cnattrp->ca_gid = skip_cp->c_gid;
+		cnattrp->ca_mode = skip_cp->c_mode;
+		cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
+		keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
     } else {
-	struct cinfo c_info;
-
-	/* otherwise, check the cnode hash incase the file/dir is incore */
-	if (hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info) == 0) {
-	    cnattrp->ca_uid = c_info.uid;
-	    cnattrp->ca_gid = c_info.gid;
-	    cnattrp->ca_mode = c_info.mode;
-	    cnattrp->ca_recflags = c_info.recflags;
-	    keyp->hfsPlus.parentID = c_info.parentcnid;
-	} else {
-	    int lockflags;
-			
-	    if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
-		    throttle_lowpri_io(1);
+		struct cinfo c_info;
+
+		/* otherwise, check the cnode hash incase the file/dir is incore */
+		error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info);
+
+		if (error == EACCES) {
+			// File is deleted
+			return ENOENT;
+		} else if (!error) {
+			cnattrp->ca_uid = c_info.uid;
+			cnattrp->ca_gid = c_info.gid;
+			cnattrp->ca_mode = c_info.mode;
+			cnattrp->ca_recflags = c_info.recflags;
+			keyp->hfsPlus.parentID = c_info.parentcnid;
+		} else {
+			int lockflags;
+
+			if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
+				throttle_lowpri_io(1);
 
-	    lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+			lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 
-	    /* lookup this cnid in the catalog */
-	    error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
+			/* lookup this cnid in the catalog */
+			error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 			
-	    hfs_systemfile_unlock(hfsmp, lockflags);
+			hfs_systemfile_unlock(hfsmp, lockflags);
 			
-	    cache->lookups++;
-	}
+			cache->lookups++;
+		}
     }
 	
     return (error);
@@ -1547,24 +1572,13 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
 /* end "bulk-access" support */
 
 
-/*
- * Callback for use with freeze ioctl.
- */
-static int
-hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
-{
-	vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
-
-	return 0;
-}
-
 /*
  * Control filesystem operating characteristics.
  */
 int
 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		vnode_t a_vp;
-		int  a_command;
+		long  a_command;
 		caddr_t  a_data;
 		int  a_fflag;
 		vfs_context_t a_context;
@@ -1654,64 +1668,133 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		return (error);
 	}
 
-	case HFS_GET_WRITE_GEN_COUNTER:
+	case HFS_TRANSFER_DOCUMENT_ID:
 	{
 		struct cnode *cp = NULL;
 		int error;
-		u_int32_t *counter = (u_int32_t *)ap->a_data;
+		u_int32_t to_fd = *(u_int32_t *)ap->a_data;
+		struct fileproc *to_fp;
+		struct vnode *to_vp;
+		struct cnode *to_cp;
 
 		cp = VTOC(vp);
 
-		if (vnode_isdir (vp)) {
-			error = EISDIR;
-			*counter = 0;
+		if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) {
+			//printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
 			return error;
 		}
-		
-		error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
-		if (error == 0) {
-			struct ubc_info *uip;
-			int is_mapped = 0;
+		if ( (error = vnode_getwithref(to_vp)) ) {
+			file_drop(to_fd);
+			return error;
+		}
+
+		if (VTOHFS(to_vp) != hfsmp) {
+			error = EXDEV;
+			goto transfer_cleanup;
+		}
+
+		int need_unlock = 1;
+		to_cp = VTOC(to_vp);
+		error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
+		if (error != 0) {
+			//printf("could not lock the pair of cnodes (error %d)\n", error);
+			goto transfer_cleanup;
+		}
 			
-			if (UBCINFOEXISTS(vp)) {
-				uip = vp->v_ubcinfo;
-				if (uip->ui_flags & UI_ISMAPPED) {
-					is_mapped = 1;
+		if (!(cp->c_bsdflags & UF_TRACKED)) {
+			error = EINVAL;
+		} else if (to_cp->c_bsdflags & UF_TRACKED) {
+			//
+			// if the destination is already tracked, return an error
+			// as otherwise it's a silent deletion of the target's
+			// document-id
+			//
+			error = EEXIST;
+		} else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
+			//
+			// we can use the FndrExtendedFileInfo because the doc-id is the first
+			// thing in both it and the ExtendedDirInfo struct which is fixed in
+			// format and can not change layout
+			//
+			struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
+			struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16);
+
+			if (f_extinfo->document_id == 0) {
+				uint32_t new_id;
+
+				hfs_unlockpair(cp, to_cp);  // have to unlock to be able to get a new-id
+				
+				if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
+					//
+					// re-lock the pair now that we have the document-id
+					//
+					hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
+					f_extinfo->document_id = new_id;
+				} else {
+					goto transfer_cleanup;
 				}
 			}
+					
+			to_extinfo->document_id = f_extinfo->document_id;
+			f_extinfo->document_id = 0;
+			//printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
 
+			// make sure the destination is also UF_TRACKED
+			to_cp->c_bsdflags |= UF_TRACKED;
+			cp->c_bsdflags &= ~UF_TRACKED;
 
-			if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
-				uint32_t gcount = hfs_get_gencount(cp);
-				//
-				// Even though we return EBUSY for files that are mmap'ed
-				// we also want to bump the value so that the write-gen
-				// counter will always be different once the file is unmapped
-				// (since the file may be unmapped but the pageouts have not
-				// yet happened).
-				//
-				if (is_mapped) {
-					hfs_incr_gencount (cp);
-					gcount = hfs_get_gencount(cp);
-				}
-				
-				*counter = gcount;
+			// mark the cnodes dirty
+			cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
+			to_cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
 
-			} 
-			else {
-				/* not a file or dir? silently return */
-				*counter = 0;
+			int lockflags;
+			if ((error = hfs_start_transaction(hfsmp)) == 0) {
+
+				lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+				(void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
+				(void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL);
+
+				hfs_systemfile_unlock (hfsmp, lockflags);
+				(void) hfs_end_transaction(hfsmp);
 			}
-			hfs_unlock (cp);
 
-			if (is_mapped) {
-				error = EBUSY;
+#if CONFIG_FSE
+			add_fsevent(FSE_DOCID_CHANGED, context,
+				    FSE_ARG_DEV,   hfsmp->hfs_raw_dev,
+				    FSE_ARG_INO,   (ino64_t)cp->c_fileid,       // src inode #
+				    FSE_ARG_INO,   (ino64_t)to_cp->c_fileid,    // dst inode #
+				    FSE_ARG_INT32, to_extinfo->document_id,
+				    FSE_ARG_DONE);
+
+			hfs_unlockpair(cp, to_cp);    // unlock this so we can send the fsevents
+			need_unlock = 0;
+
+			if (need_fsevent(FSE_STAT_CHANGED, vp)) {
+				add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
+			}
+			if (need_fsevent(FSE_STAT_CHANGED, to_vp)) {
+				add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE);
 			}
+#else
+			hfs_unlockpair(cp, to_cp);    // unlock this so we can send the fsevents
+			need_unlock = 0;
+#endif
+		}
+		
+		if (need_unlock) {
+			hfs_unlockpair(cp, to_cp);
 		}
 
+	transfer_cleanup:
+		vnode_put(to_vp);
+		file_drop(to_fd);
+
 		return error;
 	}
 
+
+
 	case HFS_PREV_LINK:
 	case HFS_NEXT_LINK:
 	{
@@ -1883,20 +1966,11 @@ fail_change_next_allocation:
 		vnode_ref(bsfs_rootvp);
 		vnode_put(bsfs_rootvp);
 
+		hfs_lock_mount(hfsmp);
 		hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
-
 		hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
-		/* The free extent cache is managed differently for sparse devices.  
-		 * There is a window between which the volume is mounted and the 
-		 * device is marked as sparse, so the free extent cache for this 
-		 * volume is currently initialized as normal volume (sorted by block 
-		 * count).  Reset the cache so that it will be rebuilt again 
-		 * for sparse device (sorted by start block).
-		 */
-		ResetVCBFreeExtCache(hfsmp);
-
-		hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
-		hfsmp->hfs_sparsebandblks *= 4;
+		hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4;
+		hfs_unlock_mount(hfsmp);
 
 		/* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
 
@@ -1919,6 +1993,15 @@ fail_change_next_allocation:
 			}
 		}
 				
+		/* The free extent cache is managed differently for sparse devices.  
+		 * There is a window between which the volume is mounted and the 
+		 * device is marked as sparse, so the free extent cache for this 
+		 * volume is currently initialized as normal volume (sorted by block 
+		 * count).  Reset the cache so that it will be rebuilt again 
+		 * for sparse device (sorted by start block).
+		 */
+		ResetVCBFreeExtCache(hfsmp);
+
 		(void)vnode_put(di_vp);
 		file_drop(bsdata->backingfd);
 		return (0);
@@ -1938,10 +2021,13 @@ fail_change_next_allocation:
 		if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 		    hfsmp->hfs_backingfs_rootvp) {
 
+			hfs_lock_mount(hfsmp);
 			hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 			tmpvp = hfsmp->hfs_backingfs_rootvp;
 			hfsmp->hfs_backingfs_rootvp = NULLVP;
 			hfsmp->hfs_sparsebandblks = 0;
+			hfs_unlock_mount(hfsmp);
+
 			vnode_rele(tmpvp);
 		}
 		return (0);
@@ -2007,38 +2093,7 @@ fail_change_next_allocation:
 			!kauth_cred_issuser(cred))
 			return (EACCES);
 
-		lck_rw_lock_exclusive(&hfsmp->hfs_insync);
- 
-		// flush things before we get started to try and prevent
-		// dirty data from being paged out while we're frozen.
-		// note: can't do this after taking the lock as it will
-		// deadlock against ourselves.
-		vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
-		hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
-
-		// DO NOT call hfs_journal_flush() because that takes a
-		// shared lock on the global exclusive lock!
-		journal_flush(hfsmp->jnl, TRUE);
-
-		// don't need to iterate on all vnodes, we just need to
-		// wait for writes to the system files and the device vnode
-		//
-		// Now that journal flush waits for all metadata blocks to 
-		// be written out, waiting for btree writes is probably no
-		// longer required.
-		if (HFSTOVCB(hfsmp)->extentsRefNum)
-		    vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
-		if (HFSTOVCB(hfsmp)->catalogRefNum)
-		    vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
-		if (HFSTOVCB(hfsmp)->allocationsRefNum)
-		    vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
-		if (hfsmp->hfs_attribute_vp)
-		    vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
-		vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
-
-		hfsmp->hfs_freezing_proc = current_proc();
-
-		return (0);
+		return hfs_freeze(hfsmp);
 	}
 
 	case F_THAW_FS: {
@@ -2047,20 +2102,7 @@ fail_change_next_allocation:
 			!kauth_cred_issuser(cred))
 			return (EACCES);
 
-		// if we're not the one who froze the fs then we
-		// can't thaw it.
-		if (hfsmp->hfs_freezing_proc != current_proc()) {
-		    return EPERM;
-		}
-
-		// NOTE: if you add code here, also go check the
-		//       code that "thaws" the fs in hfs_vnop_close()
-		//
-		hfsmp->hfs_freezing_proc = NULL;
-		hfs_unlock_global (hfsmp);
-		lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
-
-		return (0);
+		return hfs_thaw(hfsmp, current_proc());
 	}
 
 	case HFS_BULKACCESS_FSCTL: {
@@ -2199,6 +2241,52 @@ fail_change_next_allocation:
 		return error;
 	}
 
+	case F_SETIOTYPE: {
+		int error;
+		uint32_t iotypeflag = 0;
+		
+		struct cnode *cp = NULL;
+		/* 
+		 * lock the cnode, decorate the cnode flag, and bail out.
+		 * VFS should have already authenticated the caller for us.
+		 */
+
+		if (ap->a_data == NULL) {
+			return EINVAL;
+		}
+
+		/* 
+		 * Note that even though ap->a_data is of type caddr_t, we
+		 * can only use 32 bits of flag values.
+		 */
+		iotypeflag = (uint32_t) ap->a_data;
+		switch (iotypeflag) {
+			case F_IOTYPE_ISOCHRONOUS:
+				break;
+			default:
+				return EINVAL;
+		}
+
+
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return EROFS;
+		}
+		cp = VTOC(vp);
+
+		error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error == 0) {
+			switch (iotypeflag) {
+				case F_IOTYPE_ISOCHRONOUS:
+					cp->c_flag |= C_IO_ISOCHRONOUS;
+					break;
+				default:
+					break;
+			}
+			hfs_unlock (cp);
+		}
+		return error;
+	}
+
 	case F_MAKECOMPRESSED: {
 		int error = 0;
 		uint32_t gen_counter;
@@ -2235,7 +2323,7 @@ fail_change_next_allocation:
 			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
 			return error;
 		}
-	
+
 		/* Are there any other usecounts/FDs? */
 		if (vnode_isinuse(vp, 1)) {
 			hfs_unlock(cp);
@@ -2243,7 +2331,6 @@ fail_change_next_allocation:
 			return EBUSY;
 		}
 
-
 		/* now we have the cnode locked down; Validate arguments */
 		if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
 			/* EINVAL if you are trying to manipulate an IMMUTABLE file */
@@ -2259,8 +2346,9 @@ fail_change_next_allocation:
 			 */
 			reset_decmp = 1;
 			cp->c_bsdflags |= UF_COMPRESSED;				
-			
-			error = hfs_truncate(vp, 0, IO_NDELAY, 0, (HFS_TRUNCATE_SKIPTIMES), ap->a_context);	
+
+			error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES,
+								 ap->a_context);
 		}
 		else {
 			error = ESTALE;
@@ -2503,7 +2591,7 @@ fail_change_next_allocation:
 			return (EROFS);
 		}
 		printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
-		hfs_mark_volume_inconsistent(hfsmp);
+		hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED);
 		break;
 
 	case HFS_FSCTL_GET_JOURNAL_INFO:
@@ -2549,7 +2637,144 @@ fail_change_next_allocation:
 		printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
 		break;
 	}
-	
+
+
+	case HFS_FSINFO_METADATA_BLOCKS: {
+		int error;
+		struct hfsinfo_metadata *hinfo;
+
+		hinfo = (struct hfsinfo_metadata *)ap->a_data;
+
+		/* Get information about number of metadata blocks */
+		error = hfs_getinfo_metadata_blocks(hfsmp, hinfo);
+		if (error) {
+			return error;
+		}
+
+		break;
+	}
+
+	case HFS_GET_FSINFO: {
+		hfs_fsinfo *fsinfo = (hfs_fsinfo *)ap->a_data;
+
+		/* Only root is allowed to get fsinfo */
+		if (!kauth_cred_issuser(kauth_cred_get())) {
+			return EACCES;
+		}
+
+		/*
+		 * Make sure that the caller's version number matches with
+		 * the kernel's version number.  This will make sure that
+		 * if the structures being read/written into are changed
+		 * by the kernel, the caller will not read incorrect data.
+		 *
+		 * The first three fields --- request_type, version and
+		 * flags are same for all the hfs_fsinfo structures, so
+		 * we can access the version number by assuming any
+		 * structure for now.
+		 */
+		if (fsinfo->header.version != HFS_FSINFO_VERSION) {
+			return ENOTSUP;
+		}
+
+		/* Make sure that the current file system is not marked inconsistent */
+		if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
+			return EIO;
+		}
+
+		return hfs_get_fsinfo(hfsmp, ap->a_data);
+	}
+
+	case HFS_CS_FREESPACE_TRIM: {
+		int error = 0;
+		int lockflags = 0;
+
+		/* Only root allowed */
+		if (!kauth_cred_issuser(kauth_cred_get())) {
+			return EACCES;
+		}
+
+		/* 
+		 * This core functionality is similar to hfs_scan_blocks().  
+		 * The main difference is that hfs_scan_blocks() is called 
+		 * as part of mount where we are assured that the journal is 
+		 * empty to start with.  This fcntl() can be called on a 
+		 * mounted volume, therefore it has to flush the content of 
+		 * the journal as well as ensure the state of summary table. 
+		 * 
+		 * This fcntl scans over the entire allocation bitmap,
+		 * creates list of all the free blocks, and issues TRIM 
+		 * down to the underlying device.  This can take long time 
+		 * as it can generate up to 512MB of read I/O.
+		 */
+
+		if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
+			error = hfs_init_summary(hfsmp);
+			if (error) {
+				printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN);
+				return error;
+			}
+		}
+
+		/* 
+		 * The journal maintains list of recently deallocated blocks to 
+		 * issue DKIOCUNMAPs when the corresponding journal transaction is 
+		 * flushed to the disk.  To avoid any race conditions, we only 
+		 * want one active trim list and only one thread issuing DKIOCUNMAPs.
+		 * Therefore we make sure that the journal trim list is sync'ed, 
+		 * empty, and not modifiable for the duration of our scan.
+		 * 
+		 * Take the journal lock before flushing the journal to the disk. 
+		 * We will keep on holding the journal lock till we don't get the 
+		 * bitmap lock to make sure that no new journal transactions can 
+		 * start.  This will make sure that the journal trim list is not 
+		 * modified after the journal flush and before getting bitmap lock.
+		 * We can release the journal lock after we acquire the bitmap 
+		 * lock as it will prevent any further block deallocations.
+		 */
+		hfs_journal_lock(hfsmp);
+
+		/* Flush the journal and wait for all I/Os to finish up */
+		error = hfs_journal_flush(hfsmp, TRUE);
+		if (error) {
+			hfs_journal_unlock(hfsmp);
+			return error;
+		}
+
+		/* Take bitmap lock to ensure it is not being modified */
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+		/* Release the journal lock */
+		hfs_journal_unlock(hfsmp);
+
+		/* 
+		 * ScanUnmapBlocks reads the bitmap in large block size 
+		 * (up to 1MB) unlike the runtime which reads the bitmap 
+		 * in the 4K block size.  This can cause buf_t collisions 
+		 * and potential data corruption.  To avoid this, we 
+		 * invalidate all the existing buffers associated with 
+		 * the bitmap vnode before scanning it.
+		 *
+		 * Note: ScanUnmapBlock() cleans up all the buffers 
+		 * after itself, so there won't be any large buffers left 
+		 * for us to clean up after it returns.
+		 */
+		error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+		if (error) {
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			return error;
+		}
+
+		/* Traverse bitmap and issue DKIOCUNMAPs */
+		error = ScanUnmapBlocks(hfsmp);
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error) {
+			return error;
+		}
+
+		break;
+	}
+
 	default:
 		return (ENOTTY);
 	}
@@ -2982,13 +3207,19 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap)
 	
 	/* Mark buffer as containing static data if cnode flag set */
 	if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
-		bufattr_markgreedymode((bufattr_t)(&bp->b_attr));
+		bufattr_markgreedymode(&bp->b_attr);
+	}
+
+	/* mark buffer as containing burst mode data if cnode flag set */
+	if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) {
+		bufattr_markisochronous(&bp->b_attr);
 	}
 	
 #if CONFIG_PROTECT
 	cnode_t *cp = NULL; 
 	
-	if ((cp = cp_get_protected_cnode(vp)) != NULL) {
+	if ((!bufattr_rawencrypted(&bp->b_attr)) && 
+			((cp = cp_get_protected_cnode(vp)) != NULL)) {
 		/* 
 		 * We rely upon the truncate lock to protect the
 		 * CP cache key from getting tossed prior to our IO finishing here.
@@ -3011,8 +3242,31 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap)
 		 * with the CP blob being wiped out in the middle of the IO 
 		 * because there isn't anything to toss; the VM swapfile key stays
 		 * in-core as long as the file is open. 
-		 * 
-		 * NB:
+		 */
+		
+		
+		/*
+		 * Last chance: If this data protected I/O does not have unwrapped keys
+		 * present, then try to get them.  We already know that it should, by this point.
+		 */
+		if (cp->c_cpentry->cp_flags & (CP_KEY_FLUSHED | CP_NEEDS_KEYS)) {
+			int io_op = ( (buf_flags(bp) & B_READ) ? CP_READ_ACCESS : CP_WRITE_ACCESS);
+			if ((error = cp_handle_vnop(vp, io_op, 0)) != 0) {
+				/*
+				 * We have to be careful here.  By this point in the I/O path, VM or the cluster
+				 * engine has prepared a buf_t with the proper file offsets and all the rest,
+				 * so simply erroring out will result in us leaking this particular buf_t.
+				 * We need to properly decorate the buf_t just as buf_strategy would so as 
+				 * to make it appear that the I/O errored out with the particular error code.
+				 */
+				buf_seterror (bp, error);
+				buf_biodone(bp);
+				return error;
+			}
+		}
+		
+		/*
+		 *NB:
 		 * For filesystem resize, we may not have access to the underlying
 		 * file's cache key for whatever reason (device may be locked).  However,
 		 * we do not need it since we are going to use the temporary HFS-wide resize key
@@ -3047,7 +3301,6 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
 {
 	register struct cnode *cp = VTOC(vp);
     	struct filefork *fp = VTOF(vp);
-	struct proc *p = vfs_context_proc(context);;
 	kauth_cred_t cred = vfs_context_ucred(context);
 	int retval;
 	off_t bytesToAdd;
@@ -3059,12 +3312,12 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
 	int lockflags;
 	int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE);
 	int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
-	
+
 	blksize = VTOVCB(vp)->blockSize;
 	fileblocks = fp->ff_blocks;
 	filebytes = (off_t)fileblocks * (off_t)blksize;
 
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
+	KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START,
 		 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
 
 	if (length < 0)
@@ -3118,8 +3371,9 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
 			/* All or nothing and don't round up to clumpsize. */
 			eflags = kEFAllMask | kEFNoClumpMask;
 
-			if (cred && suser(cred, NULL) != 0)
+			if (cred && (suser(cred, NULL) != 0)) {
 				eflags |= kEFReserveMask;  /* keep a reserve */
+			}
 
 			/*
 			 * Allocate Journal and Quota files in metadata zone.
@@ -3141,6 +3395,10 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
 				lockflags |= SFL_EXTENTS;
 			lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 
+			/* 
+			 * Keep growing the file as long as the current EOF is
+			 * less than the desired value.
+			 */
 			while ((length > filebytes) && (retval == E_NONE)) {
 				bytesToAdd = length - filebytes;
 				retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
@@ -3175,11 +3433,15 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
 			if (retval)
 				goto Err_Exit;
 
-			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+			KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
 				(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
 		}
  
-		if (!(flags & IO_NOZEROFILL)) {
+		if (ISSET(flags, IO_NOZEROFILL)) {
+			// An optimisation for the hibernation file
+			if (vnode_isswap(vp))
+				rl_remove_all(&fp->ff_invalidranges);
+		} else {
 			if (UBCINFOEXISTS(vp)  && (vnode_issystem(vp) == 0) && retval == E_NONE) {
 				struct rl_entry *invalid_range;
 				off_t zero_limit;
@@ -3234,7 +3496,10 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
 
 	} else { /* Shorten the size of the file */
 
-		if ((off_t)fp->ff_size > length) {
+		// An optimisation for the hibernation file
+		if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) {
+			rl_remove_all(&fp->ff_invalidranges);
+		} else if ((off_t)fp->ff_size > length) {
 			/* Any space previously marked as invalid is now irrelevant: */
 			rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
 		}
@@ -3268,55 +3533,48 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
 			hfs_unlock_mount (hfsmp);
 		}
 
-		/*
-		 * For a TBE process the deallocation of the file blocks is
-		 * delayed until the file is closed.  And hfs_close calls
-		 * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
-		 * isn't set, we make sure this isn't a TBE process.
-		 */
-		if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
 #if QUOTA
-		  off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
+		off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
 #endif /* QUOTA */
-		  if (hfs_start_transaction(hfsmp) != 0) {
-		      retval = EINVAL;
-		      goto Err_Exit;
-		  }
+		if (hfs_start_transaction(hfsmp) != 0) {
+			retval = EINVAL;
+			goto Err_Exit;
+		}
 
-			if (fp->ff_unallocblocks == 0) {
-				/* Protect extents b-tree and allocation bitmap */
-				lockflags = SFL_BITMAP;
-				if (overflow_extents(fp))
-					lockflags |= SFL_EXTENTS;
-				lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+		if (fp->ff_unallocblocks == 0) {
+			/* Protect extents b-tree and allocation bitmap */
+			lockflags = SFL_BITMAP;
+			if (overflow_extents(fp))
+				lockflags |= SFL_EXTENTS;
+			lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 
-				retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, 
-													 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
+			retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, 
+												 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
 
-				hfs_systemfile_unlock(hfsmp, lockflags);
+			hfs_systemfile_unlock(hfsmp, lockflags);
+		}
+		if (hfsmp->jnl) {
+			if (retval == 0) {
+				fp->ff_size = length;
 			}
-			if (hfsmp->jnl) {
-				if (retval == 0) {
-					fp->ff_size = length;
-				}
-				if (skipupdate) {
-					(void) hfs_minorupdate(vp);
-				}
-				else {
-					(void) hfs_update(vp, TRUE);
-					(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
-				}
+			if (skipupdate) {
+				(void) hfs_minorupdate(vp);
 			}
-			hfs_end_transaction(hfsmp);
+			else {
+				(void) hfs_update(vp, TRUE);
+				(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+			}
+		}
+		hfs_end_transaction(hfsmp);
 
-			filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
-			if (retval)
-				goto Err_Exit;
+		filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+		if (retval)
+			goto Err_Exit;
 #if QUOTA
-			/* These are bytesreleased */
-			(void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
+		/* These are bytesreleased */
+		(void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
 #endif /* QUOTA */
-		}
+
 		/* 
 		 * Only set update flag if the logical length changes & we aren't
 		 * suppressing modtime updates.
@@ -3352,13 +3610,13 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
 		retval = hfs_update(vp, MNT_WAIT);
 	}
 	if (retval) {
-		KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+		KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
 		     -1, -1, -1, retval, 0);
 	}
 
 Err_Exit:
 
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
+	KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END,
 		 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
 
 	return (retval);
@@ -3469,14 +3727,16 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
 	blksize = hfsmp->blockSize;
 	
 	/* Data Fork */
-	if ((datafork != NULL) && (datafork->ff_blocks > 0)) {
+	if (datafork) {
+		datafork->ff_size = 0;
+
 		fileblocks = datafork->ff_blocks;
 		filebytes = (off_t)fileblocks * (off_t)blksize;		
 		
 		/* We killed invalid ranges and loaned blocks before we removed the catalog entry */
 		
 		while (filebytes > 0) {
-			if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) {
+			if (filebytes > HFS_BIGFILE_SIZE) {
 				filebytes -= HFS_BIGFILE_SIZE;
 			} else {
 				filebytes = 0;
@@ -3499,9 +3759,6 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
 				
 				hfs_systemfile_unlock(hfsmp, lockflags);
 			}
-			if (error == 0) {
-				datafork->ff_size = filebytes;
-			}
 			(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 			
 			/* Finish the transaction and start over if necessary */
@@ -3514,14 +3771,16 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
 	}
 	
 	/* Resource fork */
-	if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) {
+	if (error == 0 && rsrcfork) {
+		rsrcfork->ff_size = 0;
+
 		fileblocks = rsrcfork->ff_blocks;
 		filebytes = (off_t)fileblocks * (off_t)blksize;
 		
 		/* We killed invalid ranges and loaned blocks before we removed the catalog entry */
 		
 		while (filebytes > 0) {
-			if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) {
+			if (filebytes > HFS_BIGFILE_SIZE) {
 				filebytes -= HFS_BIGFILE_SIZE;
 			} else {
 				filebytes = 0;
@@ -3544,9 +3803,6 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
 				
 				hfs_systemfile_unlock(hfsmp, lockflags);
 			}
-			if (error == 0) {
-				rsrcfork->ff_size = filebytes;
-			}
 			(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 			
 			/* Finish the transaction and start over if necessary */
@@ -3561,19 +3817,52 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
 	return error;
 }
 
+errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock)
+{
+	errno_t error;
+
+	/*
+	 * Call ubc_setsize to give the VM subsystem a chance to do
+	 * whatever it needs to with existing pages before we delete
+	 * blocks.  Note that symlinks don't use the UBC so we'll
+	 * get back ENOENT in that case.
+	 */
+	if (have_cnode_lock) {
+		error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY);
+		if (error == EAGAIN) {
+			cnode_t *cp = VTOC(vp);
+
+			if (cp->c_truncatelockowner != current_thread()) {
+#if DEVELOPMENT || DEBUG
+				panic("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
+#else
+				printf("hfs: hfs_ubc_setsize called without exclusive truncate lock!\n");
+#endif
+			}
+
+			hfs_unlock(cp);
+			error = ubc_setsize_ex(vp, len, 0);
+			hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+		}
+	} else
+		error = ubc_setsize_ex(vp, len, 0);
+
+	return error == ENOENT ? 0 : error;
+}
 
 /*
  * Truncate a cnode to at most length size, freeing (or adding) the
  * disk blocks.
  */
 int
-hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
-             int truncateflags, vfs_context_t context)
+hfs_truncate(struct vnode *vp, off_t length, int flags,
+			 int truncateflags, vfs_context_t context)
 {
-    	struct filefork *fp = VTOF(vp);
+	struct filefork *fp = VTOF(vp);
 	off_t filebytes;
 	u_int32_t fileblocks;
-	int blksize, error = 0;
+	int blksize;
+	errno_t error = 0;
 	struct cnode *cp = VTOC(vp);
 
 	/* Cannot truncate an HFS directory! */
@@ -3581,7 +3870,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
 		return (EISDIR);
 	}
 	/* A swap file cannot change size. */
-	if (vnode_isswap(vp) && (length != 0)) {
+	if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) {
 		return (EPERM);
 	}
 
@@ -3589,24 +3878,17 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
 	fileblocks = fp->ff_blocks;
 	filebytes = (off_t)fileblocks * (off_t)blksize;
 
-	//
-	// Have to do this here so that we don't wind up with
-	// i/o pending for blocks that are about to be released
-	// if we truncate the file.
-	//
-	// If skipsetsize is set, then the caller is responsible
-	// for the ubc_setsize.
-	//
-	// Even if skipsetsize is set, if the length is zero we
-	// want to call ubc_setsize() because as of SnowLeopard
-	// it will no longer cause any page-ins and it will drop
-	// any dirty pages so that we don't do any i/o that we
-	// don't have to.  This also prevents a race where i/o
-	// for truncated blocks may overwrite later data if the
-	// blocks get reallocated to a different file.
-	//
-	if (!skipsetsize || length == 0)
-		ubc_setsize(vp, length);
+	bool caller_has_cnode_lock = (cp->c_lockowner == current_thread());
+
+	error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock);
+	if (error)
+		return error;
+
+	if (!caller_has_cnode_lock) {
+		error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error)
+			return error;
+	}
 
 	// have to loop truncating or growing files that are
 	// really big because otherwise transactions can get
@@ -3614,7 +3896,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
 
 	if (length < filebytes) {
 		while (filebytes > length) {
-			if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+			if ((filebytes - length) > HFS_BIGFILE_SIZE) {
 		    		filebytes -= HFS_BIGFILE_SIZE;
 			} else {
 		    		filebytes = length;
@@ -3626,7 +3908,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
 		}
 	} else if (length > filebytes) {
 		while (filebytes < length) {
-			if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+			if ((length - filebytes) > HFS_BIGFILE_SIZE) {
 				filebytes += HFS_BIGFILE_SIZE;
 			} else {
 				filebytes = length;
@@ -3645,9 +3927,16 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
 		fp->ff_bytesread = 0;
 	}
 
-	return (error);
-}
+	if (!caller_has_cnode_lock)
+		hfs_unlock(cp);
 
+	// Make sure UBC's size matches up (in case we didn't completely succeed)
+	errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock);
+	if (!error)
+		error = err2;
+
+	return error;
+}
 
 
 /*
@@ -3785,13 +4074,13 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 		    /* Protect extents b-tree and allocation bitmap */
 		    lockflags = SFL_BITMAP;
 		    if (overflow_extents(fp))
-			lockflags |= SFL_EXTENTS;
+				lockflags |= SFL_EXTENTS;
 		    lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 
 		    if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
-			bytesRequested = HFS_BIGFILE_SIZE;
+				bytesRequested = HFS_BIGFILE_SIZE;
 		    } else {
-			bytesRequested = moreBytesRequested;
+				bytesRequested = moreBytesRequested;
 		    }
 
 		    if (extendFlags & kEFContigMask) {
@@ -3848,14 +4137,18 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 
 	} else { /* Shorten the size of the file */
 
-		if (fp->ff_size > length) {
-			/*
-			 * Any buffers that are past the truncation point need to be
-			 * invalidated (to maintain buffer cache consistency).
-			 */
-		}
+		/*
+		 * N.B. At present, this code is never called.  If and when we
+		 * do start using it, it looks like there might be slightly
+		 * strange semantics with the file size: it's possible for the
+		 * file size to *increase* e.g. if current file size is 5,
+		 * length is 1024 and filebytes is 4096, the file size will
+		 * end up being 1024 bytes.  This isn't necessarily a problem
+		 * but it's not consistent with the code above which doesn't
+		 * change the file size.
+		 */
 
-		retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
+		retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
 		filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 
 		/*
@@ -3871,9 +4164,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 		if (fp->ff_size > filebytes) {
 			fp->ff_size = filebytes;
 
-			hfs_unlock(cp);
-			ubc_setsize(vp, fp->ff_size);
-			hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+			hfs_ubc_setsize(vp, fp->ff_size, true);
 		}
 	}
 
@@ -3915,8 +4206,10 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap)
 	upl_t 		upl;
 	upl_page_info_t	*pl;
 	off_t		f_offset;
+	off_t		page_needed_f_offset;
 	int		offset;
 	int		isize; 
+	int		upl_size; 
 	int		pg_index;
 	boolean_t	truncate_lock_held = FALSE;
 	boolean_t 	file_converted = FALSE;
@@ -3965,6 +4258,8 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap)
 		goto pagein_done;
 	}
 
+	page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset;
+
 retry_pagein:
 	/*
 	 * take truncate lock (shared/recursive) to guard against 
@@ -4027,9 +4322,9 @@ retry_pagein:
 	}
 	ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
 
-	isize = ap->a_size;
+	upl_size = isize = ap->a_size;
 
-	/* 
+	/*
 	 * Scan from the back to find the last page in the UPL, so that we 
 	 * aren't looking at a UPL that may have already been freed by the
 	 * preceding aborts/completions.
@@ -4097,6 +4392,7 @@ retry_pagein:
 			int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
 
 			if (compressed) {
+
 				if (truncate_lock_held) {
 					/*
 					 * can't hold the truncate lock when calling into the decmpfs layer
@@ -4136,6 +4432,19 @@ retry_pagein:
 						 * indication that the pagein needs to be redriven
 						 */
 			        		ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
+					} else if (error == ENOSPC) {
+
+						if (upl_size == PAGE_SIZE)
+							panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
+
+						ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
+
+						ap->a_size = PAGE_SIZE;
+						ap->a_pl = NULL;
+						ap->a_pl_offset = 0;
+						ap->a_f_offset = page_needed_f_offset;
+
+						goto retry_pagein;
 					}
 					goto pagein_next_range;
 				}
@@ -4271,10 +4580,6 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
 	a_flags = ap->a_flags;
 	a_pl_offset = ap->a_pl_offset;
 
-	if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
-		hfs_incr_gencount (cp);
-	}
-
 	/*
 	 * we can tell if we're getting the new or old behavior from the UPL
 	 */
@@ -4503,20 +4808,41 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
 	}
 
 	/*
-	 * If data was written, update the modification time of the file.
-	 * If setuid or setgid bits are set and this process is not the 
-	 * superuser then clear the setuid and setgid bits as a precaution 
-	 * against tampering.
+	 * If data was written, update the modification time of the file
+	 * but only if it's mapped writable; we will have touched the
+	 * modifcation time for direct writes.
 	 */
-	if (retval == 0) {
-		cp->c_touch_modtime = TRUE;
-		cp->c_touch_chgtime = TRUE;
-		if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
-		    (vfs_context_suser(ap->a_context) != 0)) {
-			hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-			cp->c_mode &= ~(S_ISUID | S_ISGID);
-			hfs_unlock(cp);
+	if (retval == 0 && (ubc_is_mapped_writable(vp)
+						|| ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) {
+		hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+		// Check again with lock
+		bool mapped_writable = ubc_is_mapped_writable(vp);
+		if (mapped_writable
+			|| ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) {
+			cp->c_touch_modtime = TRUE;
+			cp->c_touch_chgtime = TRUE;
+
+			/*
+			 * We only need to increment the generation counter if
+			 * it's currently mapped writable because we incremented
+			 * the counter in hfs_vnop_mnomap.
+			 */
+			if (mapped_writable)
+				hfs_incr_gencount(VTOC(vp));
+
+			/*
+			 * If setuid or setgid bits are set and this process is
+			 * not the superuser then clear the setuid and setgid bits
+			 * as a precaution against tampering.
+			 */
+			if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
+				(vfs_context_suser(ap->a_context) != 0)) {
+				cp->c_mode &= ~(S_ISUID | S_ISGID);
+			}
 		}
+
+		hfs_unlock(cp);
 	}
 
 pageout_done:
@@ -4972,7 +5298,7 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
 		ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
 	} else {
 		/*
-		 * No need to call ubc_sync_range or hfs_invalbuf
+		 * No need to call ubc_msync or hfs_invalbuf
 		 * since the file was copied using IO_NOCACHE and
 		 * the copy was done starting and ending on a page
 		 * boundary in the file.