X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/4452a7af2eac33dbad800bcc91f2399d62c18f53..d41d1dae2cd00cc08c7982087d1c445180cad9f5:/bsd/hfs/hfs_readwrite.c

diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c
index b9c8bf912..9fcd6a02d 100644
--- a/bsd/hfs/hfs_readwrite.c
+++ b/bsd/hfs/hfs_readwrite.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -44,69 +44,55 @@
 #include <sys/proc.h>
 #include <sys/kauth.h>
 #include <sys/vnode.h>
+#include <sys/vnode_internal.h>
 #include <sys/uio.h>
 #include <sys/vfs_context.h>
+#include <sys/fsevents.h>
+#include <kern/kalloc.h>
 #include <sys/disk.h>
 #include <sys/sysctl.h>
+#include <sys/fsctl.h>
 
 #include <miscfs/specfs/specdev.h>
 
 #include <sys/ubc.h>
+#include <sys/ubc_internal.h>
+
 #include <vm/vm_pageout.h>
 #include <vm/vm_kern.h>
 
 #include <sys/kdebug.h>
 
 #include	"hfs.h"
+#include	"hfs_attrlist.h"
 #include	"hfs_endian.h"
-#include  "hfs_fsctl.h"
+#include  	"hfs_fsctl.h"
 #include	"hfs_quota.h"
 #include	"hfscommon/headers/FileMgrInternal.h"
 #include	"hfscommon/headers/BTreesInternal.h"
 #include	"hfs_cnode.h"
 #include	"hfs_dbg.h"
 
-extern int overflow_extents(struct filefork *fp);
-
 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
 
 enum {
 	MAXHFSFILESIZE = 0x7FFFFFFF		/* this needs to go in the mount structure */
 };
 
-extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
-
-extern int  hfs_setextendedsecurity(struct hfsmount *, int);
-
+/* from bsd/hfs/hfs_vfsops.c */
+extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
 
 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
 static int  hfs_clonefile(struct vnode *, int, int, int);
 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
+static int  hfs_minorupdate(struct vnode *vp);
+static int  do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
 
 
 int flush_cache_on_write = 0;
 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
 
 
-/*****************************************************************************
-*
-*	I/O Operations on vnodes
-*
-*****************************************************************************/
-int  hfs_vnop_read(struct vnop_read_args *);
-int  hfs_vnop_write(struct vnop_write_args *);
-int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
-int  hfs_vnop_select(struct vnop_select_args *);
-int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
-int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
-int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
-int  hfs_vnop_strategy(struct vnop_strategy_args *);
-int  hfs_vnop_allocate(struct vnop_allocate_args *);
-int  hfs_vnop_pagein(struct vnop_pagein_args *);
-int  hfs_vnop_pageout(struct vnop_pageout_args *);
-int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
-
-
 /*
  * Read data from a file.
  */
@@ -124,7 +110,6 @@ hfs_vnop_read(struct vnop_read_args *ap)
 	off_t offset = uio_offset(uio);
 	int retval = 0;
 
-
 	/* Preflight checks */
 	if (!vnode_isreg(vp)) {
 		/* can only read regular files */
@@ -137,6 +122,34 @@ hfs_vnop_read(struct vnop_read_args *ap)
 		return (0);		/* Nothing left to do */
 	if (offset < 0)
 		return (EINVAL);	/* cant read from a negative offset */
+	
+#if HFS_COMPRESSION
+	if (VNODE_IS_RSRC(vp)) {
+		if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
+			return 0;
+		}
+		/* otherwise read the resource fork normally */
+	} else {
+		int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+		if (compressed) {
+			retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
+			if (compressed) {
+				if (retval == 0) {
+					/* successful read, update the access time */
+					VTOC(vp)->c_touch_acctime = TRUE;
+					
+					/* compressed files are not hot file candidates */
+					if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+						VTOF(vp)->ff_bytesread = 0;
+					}
+				}
+				return retval;
+			}
+			/* otherwise the file was converted back to a regular file while we were reading it */
+			retval = 0;
+		}
+	}
+#endif /* HFS_COMPRESSION */
 
 	cp = VTOC(vp);
 	fp = VTOF(vp);
@@ -158,7 +171,7 @@ hfs_vnop_read(struct vnop_read_args *ap)
 	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 		(int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 
-	retval = cluster_read(vp, uio, filesize, 0);
+	retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
 
 	cp->c_touch_acctime = TRUE;
 
@@ -168,7 +181,7 @@ hfs_vnop_read(struct vnop_read_args *ap)
 	/*
 	 * Keep track blocks read
 	 */
-	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
+	if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
 		int took_cnode_lock = 0;
 		off_t bytesread;
 
@@ -183,7 +196,7 @@ hfs_vnop_read(struct vnop_read_args *ap)
 		 * If this file hasn't been seen since the start of
 		 * the current sampling period then start over.
 		 */
-		if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
+		if (cp->c_atime < hfsmp->hfc_timebase) {
 			struct timeval tv;
 
 			fp->ff_bytesread = bytesread;
@@ -196,7 +209,7 @@ hfs_vnop_read(struct vnop_read_args *ap)
 			hfs_unlock(cp);
 	}
 exit:
-	hfs_unlock_truncate(cp);
+	hfs_unlock_truncate(cp, 0);
 	return (retval);
 }
 
@@ -214,21 +227,43 @@ hfs_vnop_write(struct vnop_write_args *ap)
 	kauth_cred_t cred = NULL;
 	off_t origFileSize;
 	off_t writelimit;
-	off_t bytesToAdd;
+	off_t bytesToAdd = 0;
 	off_t actualBytesAdded;
 	off_t filebytes;
 	off_t offset;
-	size_t resid;
+	ssize_t resid;
 	int eflags;
 	int ioflag = ap->a_ioflag;
 	int retval = 0;
 	int lockflags;
 	int cnode_locked = 0;
+	int partialwrite = 0;
+	int exclusive_lock = 0;
+
+#if HFS_COMPRESSION
+	if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+		int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+		switch(state) {
+			case FILE_IS_COMPRESSED:
+				return EACCES;
+			case FILE_IS_CONVERTING:
+				/* if FILE_IS_CONVERTING, we allow writes */
+				break;
+			default:
+				printf("invalid state %d for compressed file\n", state);
+				/* fall through */
+		}
+	}
+#endif
 
 	// LP64todo - fix this! uio_resid may be 64-bit value
 	resid = uio_resid(uio);
 	offset = uio_offset(uio);
 
+	if (ioflag & IO_APPEND) {
+	    exclusive_lock = 1;
+	}
+	
 	if (offset < 0)
 		return (EINVAL);
 	if (resid == 0)
@@ -236,31 +271,11 @@ hfs_vnop_write(struct vnop_write_args *ap)
 	if (!vnode_isreg(vp))
 		return (EPERM);  /* Can only write regular files */
 
-	/* Protect against a size change. */
-	hfs_lock_truncate(VTOC(vp), TRUE);
-
-	if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
-		hfs_unlock_truncate(VTOC(vp));
-		return (retval);
-	}
-	cnode_locked = 1;
 	cp = VTOC(vp);
 	fp = VTOF(vp);
 	hfsmp = VTOHFS(vp);
-	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 
-	if (ioflag & IO_APPEND) {
-		uio_setoffset(uio, fp->ff_size);
-		offset = fp->ff_size;
-	}
-	if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
-		retval = EPERM;
-		goto exit;
-	}
-
-	origFileSize = fp->ff_size;
 	eflags = kEFDeferMask;	/* defer file block allocations */
-
 #ifdef HFS_SPARSE_DEV
 	/* 
 	 * When the underlying device is sparse and space
@@ -274,19 +289,59 @@ hfs_vnop_write(struct vnop_write_args *ap)
 	}
 #endif /* HFS_SPARSE_DEV */
 
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
-		(int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
+again:
+	/* Protect against a size change. */
+	hfs_lock_truncate(cp, exclusive_lock);
 
-	/* Now test if we need to extend the file */
-	/* Doing so will adjust the filebytes for us */
+	if (ioflag & IO_APPEND) {
+		uio_setoffset(uio, fp->ff_size);
+		offset = fp->ff_size;
+	}
+	if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
+		retval = EPERM;
+		goto exit;
+	}
 
+	origFileSize = fp->ff_size;
 	writelimit = offset + resid;
-	if (writelimit <= filebytes)
+	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+
+	/* If the truncate lock is shared, and if we either have virtual 
+	 * blocks or will need to extend the file, upgrade the truncate 
+	 * to exclusive lock.  If upgrade fails, we lose the lock and 
+	 * have to get exclusive lock again.  Note that we want to
+	 * grab the truncate lock exclusive even if we're not allocating new blocks
+	 * because we could still be growing past the LEOF.
+	 */
+	if ((exclusive_lock == 0) && 
+	    ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) {
+	    	exclusive_lock = 1;
+		/* Lock upgrade failed and we lost our shared lock, try again */
+		if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
+			goto again;
+		} 
+	}
+
+	if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
+		goto exit;
+	}
+	cnode_locked = 1;
+	
+	if (!exclusive_lock) {
+		KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
+		             (int)offset, uio_resid(uio), (int)fp->ff_size,
+		             (int)filebytes, 0);
+	}
+
+	/* Check if we do not need to extend the file */
+	if (writelimit <= filebytes) {
 		goto sizeok;
+	}
 
 	cred = vfs_context_ucred(ap->a_context);
-#if QUOTA
 	bytesToAdd = writelimit - filebytes;
+
+#if QUOTA
 	retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)), 
 			   cred, 0);
 	if (retval)
@@ -330,6 +385,17 @@ hfs_vnop_write(struct vnop_write_args *ap)
 	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 	(void) hfs_end_transaction(hfsmp);
 
+	/*
+	 * If we didn't grow the file enough try a partial write.
+	 * POSIX expects this behavior.
+	 */
+	if ((retval == ENOSPC) && (filebytes > offset)) {
+		retval = 0;
+		partialwrite = 1;
+		uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
+		resid -= bytesToAdd;
+		writelimit = filebytes;
+	}
 sizeok:
 	if (retval == E_NONE) {
 		off_t filesize;
@@ -346,7 +412,7 @@ sizeok:
 		else
 			filesize = fp->ff_size;
 
-		lflag = (ioflag & IO_SYNC);
+		lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
 
 		if (offset <= fp->ff_size) {
 			zero_off = offset & ~PAGE_MASK_64;
@@ -460,30 +526,67 @@ sizeok:
 
 		hfs_unlock(cp);
 		cnode_locked = 0;
+		
+		/*
+		 * We need to tell UBC the fork's new size BEFORE calling
+		 * cluster_write, in case any of the new pages need to be
+		 * paged out before cluster_write completes (which does happen
+		 * in embedded systems due to extreme memory pressure).
+		 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
+		 * will be, so that it can pass that on to cluster_pageout, and
+		 * allow those pageouts.
+		 *
+		 * We don't update ff_size yet since we don't want pageins to
+		 * be able to see uninitialized data between the old and new
+		 * EOF, until cluster_write has completed and initialized that
+		 * part of the file.
+		 *
+		 * The vnode pager relies on the file size last given to UBC via
+		 * ubc_setsize.  hfs_vnop_pageout relies on fp->ff_new_size or
+		 * ff_size (whichever is larger).  NOTE: ff_new_size is always
+		 * zero, unless we are extending the file via write.
+		 */
+		if (filesize > fp->ff_size) {
+			fp->ff_new_size = filesize;
+			ubc_setsize(vp, filesize);
+		}
 		retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 				tail_off, lflag | IO_NOZERODIRTY);
-		offset = uio_offset(uio);
-		if (offset > fp->ff_size) {
-			fp->ff_size = offset;
-
-			ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
+		if (retval) {
+			fp->ff_new_size = 0;	/* no longer extending; use ff_size */
+			if (filesize > origFileSize) {
+				ubc_setsize(vp, origFileSize);
+			}
+			goto ioerr_exit;
+		}
+		
+		if (filesize > origFileSize) {
+			fp->ff_size = filesize;
+			
 			/* Files that are changing size are not hot file candidates. */
-			if (hfsmp->hfc_stage == HFC_RECORDING)
+			if (hfsmp->hfc_stage == HFC_RECORDING) {
 				fp->ff_bytesread = 0;
+			}
 		}
+		fp->ff_new_size = 0;	/* ff_size now has the correct size */
+		
+		/* If we wrote some bytes, then touch the change and mod times */
 		if (resid > uio_resid(uio)) {
 			cp->c_touch_chgtime = TRUE;
 			cp->c_touch_modtime = TRUE;
 		}
 	}
+	if (partialwrite) {
+		uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
+		resid += bytesToAdd;
+	}
 
-	// XXXdbg - testing for vivek and paul lambert
+	// XXXdbg - see radar 4871353 for more info
 	{
 	    if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
 		VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
 	    }
 	}
-	HFS_KNOTE(vp, NOTE_WRITE);
 
 ioerr_exit:
 	/*
@@ -508,7 +611,7 @@ ioerr_exit:
 				cnode_locked = 1;
 			}
 			(void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
-			                   0, ap->a_context);
+			                   0, 0, ap->a_context);
 			// LP64todo - fix this!  resid needs to by user_ssize_t
 			uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 			uio_setresid(uio, resid);
@@ -529,31 +632,22 @@ ioerr_exit:
 exit:
 	if (cnode_locked)
 		hfs_unlock(cp);
-	hfs_unlock_truncate(cp);
+	hfs_unlock_truncate(cp, exclusive_lock);
 	return (retval);
 }
 
 /* support for the "bulk-access" fcntl */
 
-#define CACHE_ELEMS 64
 #define CACHE_LEVELS 16
+#define NUM_CACHE_ENTRIES (64*16)
 #define PARENT_IDS_FLAG 0x100
 
-/* from hfs_attrlist.c */
-extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
-			mode_t obj_mode, struct mount *mp,
-			kauth_cred_t cred, struct proc *p);
-
-/* from vfs/vfs_fsevents.c */
-extern char *get_pathbuff(void);
-extern void release_pathbuff(char *buff);
-
 struct access_cache {
        int numcached;
        int cachehits; /* these two for statistics gathering */
        int lookups;
        unsigned int *acache;
-       Boolean *haveaccess;
+       unsigned char *haveaccess;
 };
 
 struct access_t {
@@ -564,80 +658,142 @@ struct access_t {
 	int       *file_ids;        /* IN: array of file ids */
 	gid_t     *groups;          /* IN: array of groups */
 	short     *access;          /* OUT: access info for each file (0 for 'has access') */
+} __attribute__((unavailable)); // this structure is for reference purposes only
+
+struct user32_access_t {
+	uid_t     uid;              /* IN: effective user id */
+	short     flags;            /* IN: access requested (i.e. R_OK) */
+	short     num_groups;       /* IN: number of groups user belongs to */
+	int       num_files;        /* IN: number of files to process */
+	user32_addr_t      file_ids;        /* IN: array of file ids */
+	user32_addr_t      groups;          /* IN: array of groups */
+	user32_addr_t      access;          /* OUT: access info for each file (0 for 'has access') */
 };
 
-struct user_access_t {
+struct user64_access_t {
 	uid_t		uid;			/* IN: effective user id */
 	short		flags;			/* IN: access requested (i.e. R_OK) */
 	short		num_groups;		/* IN: number of groups user belongs to */
-	int			num_files;		/* IN: number of files to process */
-	user_addr_t	file_ids;		/* IN: array of file ids */
-	user_addr_t	groups;			/* IN: array of groups */
-	user_addr_t	access;			/* OUT: access info for each file (0 for 'has access') */
+	int		num_files;		/* IN: number of files to process */
+	user64_addr_t	file_ids;		/* IN: array of file ids */
+	user64_addr_t	groups;			/* IN: array of groups */
+	user64_addr_t	access;			/* OUT: access info for each file (0 for 'has access') */
+};
+
+
+// these are the "extended" versions of the above structures
+// note that it is crucial that they be different sized than
+// the regular version
+struct ext_access_t {
+	uint32_t   flags;           /* IN: access requested (i.e. R_OK) */
+	uint32_t   num_files;       /* IN: number of files to process */
+	uint32_t   map_size;        /* IN: size of the bit map */
+	uint32_t  *file_ids;        /* IN: Array of file ids */
+	char      *bitmap;          /* OUT: hash-bitmap of interesting directory ids */
+	short     *access;          /* OUT: access info for each file (0 for 'has access') */
+	uint32_t   num_parents;   /* future use */
+	cnid_t      *parents;   /* future use */
+} __attribute__((unavailable)); // this structure is for reference purposes only
+
+struct user32_ext_access_t {
+	uint32_t   flags;           /* IN: access requested (i.e. R_OK) */
+	uint32_t   num_files;       /* IN: number of files to process */
+	uint32_t   map_size;        /* IN: size of the bit map */
+	user32_addr_t  file_ids;        /* IN: Array of file ids */
+	user32_addr_t     bitmap;          /* OUT: hash-bitmap of interesting directory ids */
+	user32_addr_t access;          /* OUT: access info for each file (0 for 'has access') */
+	uint32_t   num_parents;   /* future use */
+	user32_addr_t parents;   /* future use */
 };
 
+struct user64_ext_access_t {
+	uint32_t      flags;        /* IN: access requested (i.e. R_OK) */
+	uint32_t      num_files;    /* IN: number of files to process */
+	uint32_t      map_size;     /* IN: size of the bit map */
+	user64_addr_t   file_ids;     /* IN: array of file ids */
+	user64_addr_t   bitmap;       /* IN: array of groups */
+	user64_addr_t   access;       /* OUT: access info for each file (0 for 'has access') */
+	uint32_t      num_parents;/* future use */
+	user64_addr_t   parents;/* future use */
+};
+
+
 /*
  * Perform a binary search for the given parent_id. Return value is 
- * found/not found boolean, and indexp will be the index of the item 
- * or the index at which to insert the item if it's not found.
+ * the index if there is a match.  If no_match_indexp is non-NULL it
+ * will be assigned with the index to insert the item (even if it was
+ * not found).
  */
-static int
-lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
+static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
 {
-	unsigned int lo, hi;
-	int index, matches = 0;
+    int index=-1;
+    unsigned int lo=0;
 	
-	if (cache->numcached == 0) {
-		*indexp = 0;
-		return 0; // table is empty, so insert at index=0 and report no match
+    do {
+	unsigned int mid = ((hi - lo)/2) + lo;
+	unsigned int this_id = array[mid];
+		
+	if (parent_id == this_id) {
+	    hi = mid;
+	    break;
 	}
-	
-	if (cache->numcached > CACHE_ELEMS) {
-		/*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
-		  cache->numcached, CACHE_ELEMS);*/
-		cache->numcached = CACHE_ELEMS;
+		
+	if (parent_id < this_id) {
+	    hi = mid;
+	    continue;
+	}
+		
+	if (parent_id > this_id) {
+	    lo = mid + 1;
+	    continue;
 	}
+    } while(lo < hi);
+
+    /* check if lo and hi converged on the match */
+    if (parent_id == array[hi]) {
+	index = hi;
+    }
 	
-	lo = 0;
-	hi = cache->numcached - 1;
-	index = -1;
+    if (no_match_indexp) {
+	*no_match_indexp = hi;
+    }
+
+    return index;
+}
+ 
+ 
+static int
+lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
+{
+    unsigned int hi;
+    int matches = 0;
+    int index, no_match_index;
 	
-	/* perform binary search for parent_id */
-	do {
-		unsigned int mid = (hi - lo)/2 + lo;
-		unsigned int this_id = cache->acache[mid];
-		
-		if (parent_id == this_id) {
-			index = mid;
-			break;
-		}
-		
-		if (parent_id < this_id) {
-			hi = mid;
-			continue;
-		}
-		
-		if (parent_id > this_id) {
-			lo = mid + 1;
-			continue;
-		}
-	} while(lo < hi);
+    if (cache->numcached == 0) {
+	*indexp = 0;
+	return 0; // table is empty, so insert at index=0 and report no match
+    }
 	
-	/* check if lo and hi converged on the match */
-	if (parent_id == cache->acache[hi]) {
-		index = hi;
-	}
+    if (cache->numcached > NUM_CACHE_ENTRIES) {
+	/*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
+	  cache->numcached, NUM_CACHE_ENTRIES);*/
+	cache->numcached = NUM_CACHE_ENTRIES;
+    }
 	
-	/* if no existing entry found, find index for new one */
-	if (index == -1) {
-		index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
-		matches = 0;
-	} else {
-		matches = 1;
-	}
+    hi = cache->numcached - 1;
 	
-	*indexp = index;
-	return matches;
+    index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
+
+    /* if no existing entry found, find index for new one */
+    if (index == -1) {
+	index = no_match_index;
+	matches = 0;
+    } else {
+	matches = 1;
+    }
+	
+    *indexp = index;
+    return matches;
 }
 
 /*
@@ -648,63 +804,71 @@ lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 static void
 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 {
-       int lookup_index = -1;
-
-       /* need to do a lookup first if -1 passed for index */
-       if (index == -1) {
-               if (lookup_bucket(cache, &lookup_index, nodeID)) {
-                       if (cache->haveaccess[lookup_index] != access) {
-                               /* change access info for existing entry... should never happen */
-			       cache->haveaccess[lookup_index] = access;
-                       }
-
-		       /* mission accomplished */
-                       return;
-               } else {
-                       index = lookup_index;
-               }
-
-       }
-
-       /* if the cache is full, do a replace rather than an insert */
-       if (cache->numcached >= CACHE_ELEMS) {
-               //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
-               cache->numcached = CACHE_ELEMS-1;
-
-               if (index > cache->numcached) {
-                 //    printf("index %d pinned to %d\n", index, cache->numcached);
-                       index = cache->numcached;
-               }
-       } else if (index >= 0 && index < cache->numcached) {
-               /* only do bcopy if we're inserting */
-               bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
-               bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
-       }
-
-       cache->acache[index] = nodeID;
-       cache->haveaccess[index] = access;
-       cache->numcached++;
+    int lookup_index = -1;
+
+    /* need to do a lookup first if -1 passed for index */
+    if (index == -1) {
+	if (lookup_bucket(cache, &lookup_index, nodeID)) {
+	    if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
+		// only update an entry if the previous access was ESRCH (i.e. a scope checking error)
+		cache->haveaccess[lookup_index] = access;
+	    }
+
+	    /* mission accomplished */
+	    return;
+	} else {
+	    index = lookup_index;
+	}
+
+    }
+
+    /* if the cache is full, do a replace rather than an insert */
+    if (cache->numcached >= NUM_CACHE_ENTRIES) {
+	//printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
+	cache->numcached = NUM_CACHE_ENTRIES-1;
+
+	if (index > cache->numcached) {
+	    //    printf("hfs: index %d pinned to %d\n", index, cache->numcached);
+	    index = cache->numcached;
+	}
+    }
+
+    if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
+	index++;
+    }
+
+    if (index >= 0 && index < cache->numcached) {
+	/* only do bcopy if we're inserting */
+	bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
+	bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
+    }
+
+    cache->acache[index] = nodeID;
+    cache->haveaccess[index] = access;
+    cache->numcached++;
 }
 
 
 struct cinfo {
-	uid_t   uid;
-	gid_t   gid;
-	mode_t  mode;
-	cnid_t  parentcnid;
+    uid_t   uid;
+    gid_t   gid;
+    mode_t  mode;
+    cnid_t  parentcnid;
+    u_int16_t recflags;
 };
 
 static int
 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 {
-	struct cinfo *cip = (struct cinfo *)arg;
+    struct cinfo *cip = (struct cinfo *)arg;
 
-	cip->uid = attrp->ca_uid;
-	cip->gid = attrp->ca_gid;
-	cip->mode = attrp->ca_mode;
-	cip->parentcnid = descp->cd_parentcnid;
+    cip->uid = attrp->ca_uid;
+    cip->gid = attrp->ca_gid;
+    cip->mode = attrp->ca_mode;
+    cip->parentcnid = descp->cd_parentcnid;
+    cip->recflags = attrp->ca_recflags;
 	
-	return (0);
+    return (0);
 }
 
 /*
@@ -712,135 +876,510 @@ snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void
  * isn't incore, then go to the catalog.
  */ 
 static int
-do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid, 
-	       struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
+do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid, 
+    struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
 {
-	int error = 0;
-
-	/* if this id matches the one the fsctl was called with, skip the lookup */
-	if (cnid == skip_cp->c_cnid) {
-		cnattrp->ca_uid = skip_cp->c_uid;
-		cnattrp->ca_gid = skip_cp->c_gid;
-		cnattrp->ca_mode = skip_cp->c_mode;
-		keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
+    int error = 0;
+
+    /* if this id matches the one the fsctl was called with, skip the lookup */
+    if (cnid == skip_cp->c_cnid) {
+	cnattrp->ca_uid = skip_cp->c_uid;
+	cnattrp->ca_gid = skip_cp->c_gid;
+	cnattrp->ca_mode = skip_cp->c_mode;
+	cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
+	keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
+    } else {
+	struct cinfo c_info;
+
+	/* otherwise, check the cnode hash incase the file/dir is incore */
+	if (hfs_chash_snoop(hfsmp, cnid, snoop_callback, &c_info) == 0) {
+	    cnattrp->ca_uid = c_info.uid;
+	    cnattrp->ca_gid = c_info.gid;
+	    cnattrp->ca_mode = c_info.mode;
+	    cnattrp->ca_recflags = c_info.recflags;
+	    keyp->hfsPlus.parentID = c_info.parentcnid;
 	} else {
-		struct cinfo c_info;
-
-		/* otherwise, check the cnode hash incase the file/dir is incore */
-		if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
-			cnattrp->ca_uid = c_info.uid;
-			cnattrp->ca_gid = c_info.gid;
-			cnattrp->ca_mode = c_info.mode;
-			keyp->hfsPlus.parentID = c_info.parentcnid;
-		} else {
-			int lockflags;
+	    int lockflags;
 			
-			lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+	    lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 			
-			/* lookup this cnid in the catalog */
-			error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
+	    /* lookup this cnid in the catalog */
+	    error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 			
-			hfs_systemfile_unlock(hfsmp, lockflags);
+	    hfs_systemfile_unlock(hfsmp, lockflags);
 			
-			cache->lookups++;
-		}
+	    cache->lookups++;
 	}
+    }
 	
-	return (error);
+    return (error);
 }
 
+
 /*
  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
  * up to CACHE_LEVELS as we progress towards the root.
  */
 static int 
 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID, 
-		struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
+    struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
+    struct vfs_context *my_context,
+    char *bitmap,
+    uint32_t map_size,
+    cnid_t* parents,
+    uint32_t num_parents)
 {
-       int                     myErr = 0;
-       int                     myResult;
-       HFSCatalogNodeID        thisNodeID;
-       unsigned long           myPerms;
-       struct cat_attr         cnattr;
-       int                     cache_index = -1;
-       CatalogKey              catkey;
-
-       int i = 0, ids_to_cache = 0;
-       int parent_ids[CACHE_LEVELS];
-
-       /* root always has access */
-       if (!suser(myp_ucred, NULL)) {
-               return (1);
-       }
-
-       thisNodeID = nodeID;
-       while (thisNodeID >=  kRootDirID) {
-               myResult = 0;   /* default to "no access" */
-       
-               /* check the cache before resorting to hitting the catalog */
-
-               /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
-                * to look any further after hitting cached dir */
-
-               if (lookup_bucket(cache, &cache_index, thisNodeID)) {
-                       cache->cachehits++;
-                       myResult = cache->haveaccess[cache_index];
-                       goto ExitThisRoutine;
-               }
-
-               /* remember which parents we want to cache */
-               if (ids_to_cache < CACHE_LEVELS) {
-                       parent_ids[ids_to_cache] = thisNodeID;
-                       ids_to_cache++;
-               }
+    int                     myErr = 0;
+    int                     myResult;
+    HFSCatalogNodeID        thisNodeID;
+    unsigned int            myPerms;
+    struct cat_attr         cnattr;
+    int                     cache_index = -1, scope_index = -1, scope_idx_start = -1;
+    CatalogKey              catkey;
+
+    int i = 0, ids_to_cache = 0;
+    int parent_ids[CACHE_LEVELS];
+
+    thisNodeID = nodeID;
+    while (thisNodeID >=  kRootDirID) {
+	myResult = 0;   /* default to "no access" */
+	       
+	/* check the cache before resorting to hitting the catalog */
+
+	/* ASSUMPTION: access info of cached entries is "final"... i.e. no need
+	 * to look any further after hitting cached dir */
+
+	if (lookup_bucket(cache, &cache_index, thisNodeID)) {
+	    cache->cachehits++;
+	    myErr = cache->haveaccess[cache_index];
+	    if (scope_index != -1) {
+		if (myErr == ESRCH) {
+		    myErr = 0;
+		}
+	    } else {
+		scope_index = 0;   // so we'll just use the cache result 
+		scope_idx_start = ids_to_cache;
+	    }
+	    myResult = (myErr == 0) ? 1 : 0;
+	    goto ExitThisRoutine;
+	}
+
+
+	if (parents) {
+	    int tmp;
+	    tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
+	    if (scope_index == -1)
+		scope_index = tmp;
+	    if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
+		scope_idx_start = ids_to_cache;
+	    }
+	}	   
+
+	/* remember which parents we want to cache */
+	if (ids_to_cache < CACHE_LEVELS) {
+	    parent_ids[ids_to_cache] = thisNodeID;
+	    ids_to_cache++;
+	}
+	// Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
+	if (bitmap && map_size) {
+	    bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));	       
+	}
 	       
-	       /* do the lookup (checks the cnode hash, then the catalog) */
-	       myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
-	       if (myErr) {
-		       goto ExitThisRoutine; /* no access */
-	       }
-
-               myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
-                                                 cnattr.ca_mode, hfsmp->hfs_mp,
-                                                 myp_ucred, theProcPtr);
-
-               if ( (myPerms & X_OK) == 0 ) {
-		       myResult = 0;
-                       goto ExitThisRoutine;   /* no access */
-	       } 
-
-               /* up the hierarchy we go */
-               thisNodeID = catkey.hfsPlus.parentID;
-       }
-
-       /* if here, we have access to this node */
-       myResult = 1;
-
- ExitThisRoutine:
-       if (myErr) {
-	       //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
-               myResult = 0;
-       }
-       *err = myErr;
-
-       /* cache the parent directory(ies) */
-       for (i = 0; i < ids_to_cache; i++) {
-               /* small optimization: get rid of double-lookup for all these */
-	       // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
-               add_node(cache, -1, parent_ids[i], myResult);
-       }
-
-       return (myResult);
+
+	/* do the lookup (checks the cnode hash, then the catalog) */
+	myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
+	if (myErr) {
+	    goto ExitThisRoutine; /* no access */
+	}
+
+	/* Root always gets access. */
+	if (suser(myp_ucred, NULL) == 0) {
+		thisNodeID = catkey.hfsPlus.parentID;
+		myResult = 1;
+		continue;
+	}
+
+	// if the thing has acl's, do the full permission check
+	if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
+	    struct vnode *vp;
+
+	    /* get the vnode for this cnid */
+	    myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
+	    if ( myErr ) {
+		myResult = 0;
+		goto ExitThisRoutine;
+	    }
+
+	    thisNodeID = VTOC(vp)->c_parentcnid;
+
+	    hfs_unlock(VTOC(vp));
+
+	    if (vnode_vtype(vp) == VDIR) {
+		myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
+	    } else {
+		myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
+	    }
+
+	    vnode_put(vp);
+	    if (myErr) {
+		myResult = 0;
+		goto ExitThisRoutine;
+	    }
+	} else {
+	    unsigned int flags;
+		   
+	    myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
+		cnattr.ca_mode, hfsmp->hfs_mp,
+		myp_ucred, theProcPtr);
+
+	    if (cnattr.ca_mode & S_IFDIR) {
+		flags = R_OK | X_OK;
+	    } else {
+		flags = R_OK;
+	    }
+	    if ( (myPerms & flags) != flags) {
+		myResult = 0;
+		myErr = EACCES;
+		goto ExitThisRoutine;   /* no access */
+	    }
+
+	    /* up the hierarchy we go */
+	    thisNodeID = catkey.hfsPlus.parentID;
+	}
+    }
+
+    /* if here, we have access to this node */
+    myResult = 1;
+
+  ExitThisRoutine:
+    if (parents && myErr == 0 && scope_index == -1) {
+	myErr = ESRCH;
+    }
+				
+    if (myErr) {
+	myResult = 0;
+    }
+    *err = myErr;
+
+    /* cache the parent directory(ies) */
+    for (i = 0; i < ids_to_cache; i++) {
+	if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
+	    add_node(cache, -1, parent_ids[i], ESRCH);
+	} else {
+	    add_node(cache, -1, parent_ids[i], myErr);
+	}
+    }
+
+    return (myResult);
+}
+
+static int
+do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
+    struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
+{
+    boolean_t is64bit;
+
+    /*
+     * NOTE: on entry, the vnode is locked. Incase this vnode
+     * happens to be in our list of file_ids, we'll note it
+     * avoid calling hfs_chashget_nowait() on that id as that
+     * will cause a "locking against myself" panic.
+     */
+    Boolean check_leaf = true;
+		
+    struct user64_ext_access_t *user_access_structp;
+    struct user64_ext_access_t tmp_user_access;
+    struct access_cache cache;
+		
+    int error = 0, prev_parent_check_ok=1;
+    unsigned int i;
+		
+    short flags;
+    unsigned int num_files = 0;
+    int map_size = 0;
+    int num_parents = 0;
+    int *file_ids=NULL;
+    short *access=NULL;
+    char *bitmap=NULL;
+    cnid_t *parents=NULL;
+    int leaf_index;
+	
+    cnid_t cnid;
+    cnid_t prevParent_cnid = 0;
+    unsigned int myPerms;
+    short myaccess = 0;
+    struct cat_attr cnattr;
+    CatalogKey catkey;
+    struct cnode *skip_cp = VTOC(vp);
+    kauth_cred_t cred = vfs_context_ucred(context);
+    proc_t p = vfs_context_proc(context);
+
+    is64bit = proc_is64bit(p);
+
+    /* initialize the local cache and buffers */
+    cache.numcached = 0;
+    cache.cachehits = 0;
+    cache.lookups = 0;
+    cache.acache = NULL;
+    cache.haveaccess = NULL;
+		
+    /* struct copyin done during dispatch... need to copy file_id array separately */
+    if (ap->a_data == NULL) {
+	error = EINVAL;
+	goto err_exit_bulk_access;
+    }
+
+    if (is64bit) {
+	if (arg_size != sizeof(struct user64_ext_access_t)) {
+	    error = EINVAL;
+	    goto err_exit_bulk_access;
+	}
+
+	user_access_structp = (struct user64_ext_access_t *)ap->a_data;
+
+    } else if (arg_size == sizeof(struct user32_access_t)) {
+	struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
+
+	// convert an old style bulk-access struct to the new style
+	tmp_user_access.flags     = accessp->flags;
+	tmp_user_access.num_files = accessp->num_files;
+	tmp_user_access.map_size  = 0;
+	tmp_user_access.file_ids  = CAST_USER_ADDR_T(accessp->file_ids);
+	tmp_user_access.bitmap    = USER_ADDR_NULL;
+	tmp_user_access.access    = CAST_USER_ADDR_T(accessp->access);
+	tmp_user_access.num_parents = 0;
+	user_access_structp = &tmp_user_access;
+
+    } else if (arg_size == sizeof(struct user32_ext_access_t)) {
+	struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
+
+	// up-cast from a 32-bit version of the struct
+	tmp_user_access.flags     = accessp->flags;
+	tmp_user_access.num_files = accessp->num_files;
+	tmp_user_access.map_size  = accessp->map_size;
+	tmp_user_access.num_parents  = accessp->num_parents;
+
+	tmp_user_access.file_ids  = CAST_USER_ADDR_T(accessp->file_ids);
+	tmp_user_access.bitmap    = CAST_USER_ADDR_T(accessp->bitmap);
+	tmp_user_access.access    = CAST_USER_ADDR_T(accessp->access);
+	tmp_user_access.parents    = CAST_USER_ADDR_T(accessp->parents);
+
+	user_access_structp = &tmp_user_access;
+    } else {
+	error = EINVAL;
+	goto err_exit_bulk_access;
+    }
+		
+    map_size = user_access_structp->map_size;
+
+    num_files = user_access_structp->num_files;
+
+    num_parents= user_access_structp->num_parents;
+
+    if (num_files < 1) {
+	goto err_exit_bulk_access;
+    }
+    if (num_files > 1024) {
+	error = EINVAL;
+	goto err_exit_bulk_access;
+    }
+
+    if (num_parents > 1024) {
+	error = EINVAL;
+	goto err_exit_bulk_access;
+    }
+		
+    file_ids = (int *) kalloc(sizeof(int) * num_files);
+    access = (short *) kalloc(sizeof(short) * num_files);
+    if (map_size) {
+	bitmap = (char *) kalloc(sizeof(char) * map_size);
+    }
+
+    if (num_parents) {
+	parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
+    }
+
+    cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
+    cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+		
+    if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
+	if (file_ids) {
+	    kfree(file_ids, sizeof(int) * num_files);
+	}
+	if (bitmap) {
+	    kfree(bitmap, sizeof(char) * map_size);
+	}
+	if (access) {
+	    kfree(access, sizeof(short) * num_files);
+	}
+	if (cache.acache) {
+	    kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
+	}
+	if (cache.haveaccess) {
+	    kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+	}
+	if (parents) {
+	    kfree(parents, sizeof(cnid_t) * num_parents);
+	}			
+	return ENOMEM;
+    }
+		
+    // make sure the bitmap is zero'ed out...
+    if (bitmap) {
+	bzero(bitmap, (sizeof(char) * map_size));
+    }
+
+    if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
+		num_files * sizeof(int)))) {
+	goto err_exit_bulk_access;
+    }
+	
+    if (num_parents) {
+	if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
+		    num_parents * sizeof(cnid_t)))) {
+	    goto err_exit_bulk_access;
+	}
+    }
+	
+    flags = user_access_structp->flags;
+    if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
+	flags = R_OK;
+    }
+		
+    /* check if we've been passed leaf node ids or parent ids */
+    if (flags & PARENT_IDS_FLAG) {
+	check_leaf = false;
+    }
+		
+    /* Check access to each file_id passed in */
+    for (i = 0; i < num_files; i++) {
+	leaf_index=-1;
+	cnid = (cnid_t) file_ids[i];
+			
+	/* root always has access */
+	if ((!parents) && (!suser(cred, NULL))) {
+	    access[i] = 0;
+	    continue;
+	}
+			
+	if (check_leaf) {
+	    /* do the lookup (checks the cnode hash, then the catalog) */
+	    error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
+	    if (error) {
+		access[i] = (short) error;
+		continue;
+	    }
+	    
+	    if (parents) {
+		// Check if the leaf matches one of the parent scopes
+		leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
+ 		if (leaf_index >= 0 && parents[leaf_index] == cnid)
+ 		    prev_parent_check_ok = 0;
+ 		else if (leaf_index >= 0)
+ 		    prev_parent_check_ok = 1;
+	    }
+
+	    // if the thing has acl's, do the full permission check
+	    if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
+		struct vnode *cvp;
+		int myErr = 0;
+		/* get the vnode for this cnid */
+		myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
+		if ( myErr ) {
+		    access[i] = myErr;
+		    continue;
+		}
+		
+		hfs_unlock(VTOC(cvp));
+		
+		if (vnode_vtype(cvp) == VDIR) {
+		    myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
+		} else {
+		    myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
+		}
+		
+		vnode_put(cvp);
+		if (myErr) {
+		    access[i] = myErr;
+		    continue;
+		}
+	    } else {
+		/* before calling CheckAccess(), check the target file for read access */
+		myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
+		    cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
+		
+		/* fail fast if no access */ 
+		if ((myPerms & flags) == 0) {
+		    access[i] = EACCES;
+		    continue;
+		}		  					
+	    }
+	} else {
+	    /* we were passed an array of parent ids */
+	    catkey.hfsPlus.parentID = cnid;
+	}
+			
+	/* if the last guy had the same parent and had access, we're done */
+ 	if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
+	    cache.cachehits++;
+	    access[i] = 0;
+	    continue;
+	}
+			
+	myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, 
+	    skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
+			
+	if (myaccess || (error == ESRCH && leaf_index != -1)) {
+	    access[i] = 0; // have access.. no errors to report
+	} else {
+	    access[i] = (error != 0 ? (short) error : EACCES);
+	}
+			
+	prevParent_cnid = catkey.hfsPlus.parentID;
+    }
+		
+    /* copyout the access array */
+    if ((error = copyout((caddr_t)access, user_access_structp->access, 
+		num_files * sizeof (short)))) {
+	goto err_exit_bulk_access;
+    }
+    if (map_size && bitmap) {
+	if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap, 
+		    map_size * sizeof (char)))) {
+	    goto err_exit_bulk_access;
+	}
+    }
+	
+		
+  err_exit_bulk_access:
+		
+    //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
+		
+    if (file_ids) 
+	kfree(file_ids, sizeof(int) * num_files);
+    if (parents) 
+	kfree(parents, sizeof(cnid_t) * num_parents);
+    if (bitmap) 
+	kfree(bitmap, sizeof(char) * map_size);
+    if (access)
+	kfree(access, sizeof(short) * num_files);
+    if (cache.acache)
+	kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
+    if (cache.haveaccess)
+	kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+		
+    return (error);
 }
-/* end "bulk-access" support */
 
 
+/* end "bulk-access" support */
+
 
 /*
  * Callback for use with freeze ioctl.
  */
 static int
-hfs_freezewrite_callback(struct vnode *vp, void *cargs)
+hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
 {
 	vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 
@@ -866,11 +1405,103 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 	proc_t p = vfs_context_proc(context);
 	struct vfsstatfs *vfsp;
 	boolean_t is64bit;
+	off_t jnl_start, jnl_size;
+	struct hfs_journal_info *jip;
+#if HFS_COMPRESSION
+	int compressed = 0;
+	off_t uncompressed_size = -1;
+	int decmpfs_error = 0;
+	
+	if (ap->a_command == F_RDADVISE) {
+		/* we need to inspect the decmpfs state of the file as early as possible */
+		compressed = hfs_file_is_compressed(VTOC(vp), 0);
+		if (compressed) {
+			if (VNODE_IS_RSRC(vp)) {
+				/* if this is the resource fork, treat it as if it were empty */
+				uncompressed_size = 0;
+			} else {
+				decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
+				if (decmpfs_error != 0) {
+					/* failed to get the uncompressed size, we'll check for this later */
+					uncompressed_size = -1;
+				}
+			}
+		}
+	}
+#endif /* HFS_COMPRESSION */
 
 	is64bit = proc_is64bit(p);
 
 	switch (ap->a_command) {
 
+	case HFS_GETPATH:
+	{
+		struct vnode *file_vp;
+		cnid_t  cnid;
+		int  outlen;
+		char *bufptr;
+		int error;
+
+		/* Caller must be owner of file system. */
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (suser(cred, NULL) &&
+			kauth_cred_getuid(cred) != vfsp->f_owner) {
+			return (EACCES);
+		}
+		/* Target vnode must be file system's root. */
+		if (!vnode_isvroot(vp)) {
+			return (EINVAL);
+		}
+		bufptr = (char *)ap->a_data;
+		cnid = strtoul(bufptr, NULL, 10);
+
+		/* We need to call hfs_vfs_vget to leverage the code that will
+		 * fix the origin list for us if needed, as opposed to calling
+		 * hfs_vget, since we will need the parent for build_path call.
+		 */
+
+		if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
+			return (error);
+		}
+		error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
+		vnode_put(file_vp);
+
+		return (error);
+	}
+
+	case HFS_PREV_LINK:
+	case HFS_NEXT_LINK:
+	{
+		cnid_t linkfileid;
+		cnid_t nextlinkid;
+		cnid_t prevlinkid;
+		int error;
+
+		/* Caller must be owner of file system. */
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (suser(cred, NULL) &&
+			kauth_cred_getuid(cred) != vfsp->f_owner) {
+			return (EACCES);
+		}
+		/* Target vnode must be file system's root. */
+		if (!vnode_isvroot(vp)) {
+			return (EINVAL);
+		}
+		linkfileid = *(cnid_t *)ap->a_data;
+		if (linkfileid < kHFSFirstUserCatalogNodeID) {
+			return (EINVAL);
+		}
+		if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
+			return (error);
+		}
+		if (ap->a_command == HFS_NEXT_LINK) {
+			*(cnid_t *)ap->a_data = nextlinkid;
+		} else {
+			*(cnid_t *)ap->a_data = prevlinkid;
+		}
+		return (0);
+	}
+
 	case HFS_RESIZE_PROGRESS: {
 
 		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
@@ -881,8 +1512,14 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		if (!vnode_isvroot(vp)) {
 			return (EINVAL);
 		}
+		/* file system must not be mounted read-only */
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+
 		return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
 	}
+
 	case HFS_RESIZE_VOLUME: {
 		u_int64_t newsize;
 		u_int64_t cursize;
@@ -895,6 +1532,11 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		if (!vnode_isvroot(vp)) {
 			return (EINVAL);
 		}
+		
+		/* filesystem must not be mounted read only */
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
 		newsize = *(u_int64_t *)ap->a_data;
 		cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 		
@@ -907,6 +1549,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		}
 	}
 	case HFS_CHANGE_NEXT_ALLOCATION: {
+		int error = 0;		/* Assume success */
 		u_int32_t location;
 
 		if (vnode_vfsisrdonly(vp)) {
@@ -920,17 +1563,32 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		if (!vnode_isvroot(vp)) {
 			return (EINVAL);
 		}
+		HFS_MOUNT_LOCK(hfsmp, TRUE);
 		location = *(u_int32_t *)ap->a_data;
-		if (location > hfsmp->totalBlocks - 1) {
-			return (EINVAL);
+		if ((location >= hfsmp->allocLimit) &&
+			(location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
+			error = EINVAL;
+			goto fail_change_next_allocation;
 		}
 		/* Return previous value. */
 		*(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
-		HFS_MOUNT_LOCK(hfsmp, TRUE);
-		hfsmp->nextAllocation = location;
-		hfsmp->vcbFlags |= 0xFF00;
+		if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
+			/* On magic value for location, set nextAllocation to next block
+			 * after metadata zone and set flag in mount structure to indicate 
+			 * that nextAllocation should not be updated again.
+			 */
+			if (hfsmp->hfs_metazone_end != 0) {
+				HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
+			}
+			hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION; 
+		} else {
+			hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION; 
+			HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
+		}
+		MarkVCBDirty(hfsmp);
+fail_change_next_allocation:
 		HFS_MOUNT_UNLOCK(hfsmp, TRUE);
-		return (0);
+		return (error);
 	}
 
 #ifdef HFS_SPARSE_DEV
@@ -940,6 +1598,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		struct hfs_backingstoreinfo *bsdata;
 		int error = 0;
 		
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
 		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 			return (EALREADY);
 		}
@@ -984,6 +1645,27 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 		hfsmp->hfs_sparsebandblks *= 4;
 
+		vfs_markdependency(hfsmp->hfs_mp);
+
+		/*
+		 * If the sparse image is on a sparse image file (as opposed to a sparse
+		 * bundle), then we may need to limit the free space to the maximum size
+		 * of a file on that volume.  So we query (using pathconf), and if we get
+		 * a meaningful result, we cache the number of blocks for later use in
+		 * hfs_freeblks().
+		 */
+		hfsmp->hfs_backingfs_maxblocks = 0;
+		if (vnode_vtype(di_vp) == VREG) {
+			int terr;
+			int hostbits;
+			terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
+			if (terr == 0 && hostbits != 0 && hostbits < 64) {
+				u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
+				
+				hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
+			}
+		}
+				
 		(void)vnode_put(di_vp);
 		file_drop(bsdata->backingfd);
 		return (0);
@@ -996,6 +1678,10 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 			kauth_cred_getuid(cred) != vfsp->f_owner) {
 			return (EACCES); /* must be owner of file system */
 		}
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+
 		if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 		    hfsmp->hfs_backingfs_rootvp) {
 
@@ -1011,28 +1697,30 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 
 	case F_FREEZE_FS: {
 		struct mount *mp;
-		task_t task;
  
-		if (!is_suser())
-			return (EACCES);
-
 		mp = vnode_mount(vp);
 		hfsmp = VFSTOHFS(mp);
 
 		if (!(hfsmp->jnl))
 			return (ENOTSUP);
 
+		vfsp = vfs_statfs(mp);
+	
+		if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+			!kauth_cred_issuser(cred))
+			return (EACCES);
+
 		lck_rw_lock_exclusive(&hfsmp->hfs_insync);
  
-		task = current_task();
-		task_working_set_disable(task);
-
 		// flush things before we get started to try and prevent
 		// dirty data from being paged out while we're frozen.
 		// note: can't do this after taking the lock as it will
 		// deadlock against ourselves.
 		vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
 		hfs_global_exclusive_lock_acquire(hfsmp);
+
+		// DO NOT call hfs_journal_flush() because that takes a
+		// shared lock on the global exclusive lock!
 		journal_flush(hfsmp->jnl);
 
 		// don't need to iterate on all vnodes, we just need to
@@ -1053,7 +1741,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 	}
 
 	case F_THAW_FS: {
-		if (!is_suser())
+		vfsp = vfs_statfs(vnode_mount(vp));
+		if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+			!kauth_cred_issuser(cred))
 			return (EACCES);
 
 		// if we're not the one who froze the fs then we
@@ -1072,245 +1762,37 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		return (0);
 	}
 
-#define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
-#define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
-
-	case HFS_BULKACCESS_FSCTL:
-	case HFS_BULKACCESS: {
-		/*
-		 * NOTE: on entry, the vnode is locked. Incase this vnode
-		 * happens to be in our list of file_ids, we'll note it
-		 * avoid calling hfs_chashget_nowait() on that id as that
-		 * will cause a "locking against myself" panic.
-		 */
-		Boolean check_leaf = true;
-		
-		struct user_access_t *user_access_structp;
-		struct user_access_t tmp_user_access_t;
-		struct access_cache cache;
-		
-		int error = 0, i;
-		
-		dev_t dev = VTOC(vp)->c_dev;
-		
-		short flags;
-		struct ucred myucred;
-		int num_files;
-		int *file_ids = NULL;
-		short *access = NULL;
-		
-		cnid_t cnid;
-		cnid_t prevParent_cnid = 0;
-		unsigned long myPerms;
-		short myaccess = 0;
-		struct cat_attr cnattr;
-		CatalogKey catkey;
-		struct cnode *skip_cp = VTOC(vp);
-		struct vfs_context	my_context;
-
-		/* set up front for common exit code */
-		my_context.vc_ucred = NOCRED;
-
-		/* first, return error if not run as root */
-		if (cred->cr_ruid != 0) {
-			return EPERM;
-		}
-		
-		/* initialize the local cache and buffers */
-		cache.numcached = 0;
-		cache.cachehits = 0;
-		cache.lookups = 0;
-		
-		file_ids = (int *) get_pathbuff();
-		access = (short *) get_pathbuff();
-		cache.acache = (int *) get_pathbuff();
-		cache.haveaccess = (Boolean *) get_pathbuff();
-		
-		if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
-			release_pathbuff((char *) file_ids);
-			release_pathbuff((char *) access);
-			release_pathbuff((char *) cache.acache);
-			release_pathbuff((char *) cache.haveaccess);
-			
-			return ENOMEM;
-		}
-		
-		/* struct copyin done during dispatch... need to copy file_id array separately */
-		if (ap->a_data == NULL) {
-			error = EINVAL;
-			goto err_exit_bulk_access;
-		}
-
-		if (is64bit) {
-			user_access_structp = (struct user_access_t *)ap->a_data;
-		}
-		else {
-			struct access_t *       accessp = (struct access_t *)ap->a_data;
-			tmp_user_access_t.uid = accessp->uid;
-			tmp_user_access_t.flags = accessp->flags;
-			tmp_user_access_t.num_groups = accessp->num_groups;
-			tmp_user_access_t.num_files = accessp->num_files;
-			tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
-			tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
-			tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
-			user_access_structp = &tmp_user_access_t;
-		}
-		
-		num_files = user_access_structp->num_files;
-		if (num_files < 1) {
-			goto err_exit_bulk_access;
-		}
-		if (num_files > 256) {
-			error = EINVAL;
-			goto err_exit_bulk_access;
-		}
-		
-		if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
-							num_files * sizeof(int)))) {
-			goto err_exit_bulk_access;
-		}
-		
-		/* fill in the ucred structure */
-		flags = user_access_structp->flags;
-		if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
-			flags = R_OK;
-		}
-		
-		/* check if we've been passed leaf node ids or parent ids */
-		if (flags & PARENT_IDS_FLAG) {
-			check_leaf = false;
-		}
-		
-		/*
-		 * Create a templated credential; this credential may *NOT*
-		 * be used unless instantiated with a kauth_cred_create();
-		 * there must be a correcponding kauth_cred_unref() when it
-		 * is no longer in use (i.e. before it goes out of scope).
-		 */
-		memset(&myucred, 0, sizeof(myucred));
-		myucred.cr_ref = 1;
-		myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
-		myucred.cr_ngroups = user_access_structp->num_groups;
-		if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
-			myucred.cr_ngroups = 0;
-		} else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
-					  myucred.cr_ngroups * sizeof(gid_t)))) {
-			goto err_exit_bulk_access;
-		}
-		myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
-		myucred.cr_gmuid = myucred.cr_uid;
-		
-		my_context.vc_proc = p;
-		my_context.vc_ucred = kauth_cred_create(&myucred);
+	case HFS_BULKACCESS_FSCTL: {
+	    int size;
+	    
+	    if (hfsmp->hfs_flags & HFS_STANDARD) {
+		return EINVAL;
+	    }
 
-		/* Check access to each file_id passed in */
-		for (i = 0; i < num_files; i++) {
-#if 0
-			cnid = (cnid_t) file_ids[i];
-			
-			/* root always has access */
-			if (!suser(my_context.vc_ucred, NULL)) {
-				access[i] = 0;
-				continue;
-			}
-			
-			if (check_leaf) {
-				
-				/* do the lookup (checks the cnode hash, then the catalog) */
-				error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
-				if (error) {
-					access[i] = (short) error;
-					continue;
-				}
-							
-				/* before calling CheckAccess(), check the target file for read access */
-				myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
-								  cnattr.ca_mode, hfsmp->hfs_mp, my_context.vc_ucred, p  );
-				
-				
-				/* fail fast if no access */ 
-				if ((myPerms & flags) == 0) {
-					access[i] = EACCES;
-					continue;
-				}
-			} else {
-				/* we were passed an array of parent ids */
-				catkey.hfsPlus.parentID = cnid;
-			}
-			
-			/* if the last guy had the same parent and had access, we're done */
-			if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
-				cache.cachehits++;
-				access[i] = 0;
-				continue;
-			}
-			
-			myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, 
-						   skip_cp, p, my_context.vc_ucred, dev);
-			
-			if ( myaccess ) {
-				access[i] = 0; // have access.. no errors to report
-			} else {
-				access[i] = (error != 0 ? (short) error : EACCES);
-			}
-			
-			prevParent_cnid = catkey.hfsPlus.parentID;
-#else
-			int myErr;
-			
-			cnid = (cnid_t)file_ids[i];
-			
-			while (cnid >= kRootDirID) {
-			    /* get the vnode for this cnid */
-			    myErr = hfs_vget(hfsmp, cnid, &vp, 0);
-			    if ( myErr ) {
-				access[i] = EACCES;
-				break;
-			    }
+	    if (is64bit) {
+		size = sizeof(struct user64_access_t);
+	    } else {
+		size = sizeof(struct user32_access_t);
+	    }
+	    
+	    return do_bulk_access_check(hfsmp, vp, ap, size, context);
+	} 
 
-			    cnid = VTOC(vp)->c_parentcnid;
+	case HFS_EXT_BULKACCESS_FSCTL: {
+	    int size;
+	    
+	    if (hfsmp->hfs_flags & HFS_STANDARD) {
+		return EINVAL;
+	    }
 
-			    hfs_unlock(VTOC(vp));
-			    if (vnode_vtype(vp) == VDIR) {
-			    	/*
-				 * XXX This code assumes that none of the
-				 * XXX callbacks from vnode_authorize() will
-				 * XXX take a persistent ref on the context
-				 * XXX credential, which is a bad assumption.
-				 */
-				myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
-			    } else {
-				myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
-			    }
-			    vnode_put(vp);
-			    access[i] = myErr;
-			    if (myErr) {
-				break;
-			    }
-			}
-#endif			
-		}
-		
-		/* copyout the access array */
-		if ((error = copyout((caddr_t)access, user_access_structp->access, 
-				     num_files * sizeof (short)))) {
-			goto err_exit_bulk_access;
-		}
-		
-	err_exit_bulk_access:
-		
-		//printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
-		
-		release_pathbuff((char *) cache.acache);
-		release_pathbuff((char *) cache.haveaccess);
-		release_pathbuff((char *) file_ids);
-		release_pathbuff((char *) access);
-		/* clean up local context, if needed */
-		if (IS_VALID_CRED(my_context.vc_ucred))
-			kauth_cred_unref(&my_context.vc_ucred);
-		
-		return (error);
-	} /* HFS_BULKACCESS */
+	    if (is64bit) {
+		size = sizeof(struct user64_ext_access_t);
+	    } else {
+		size = sizeof(struct user32_ext_access_t);
+	    }
+	    
+	    return do_bulk_access_check(hfsmp, vp, ap, size, context);
+	} 
 
 	case HFS_SETACLSTATE: {
 		int state;
@@ -1322,23 +1804,54 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 		state = *(int *)ap->a_data;
 
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
 		// super-user can enable or disable acl's on a volume.
 		// the volume owner can only enable acl's
 		if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
 			return (EPERM);
 		}
 		if (state == 0 || state == 1)
-			return hfs_setextendedsecurity(hfsmp, state);
+			return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
+		else
+			return (EINVAL);	
+	}
+
+	case HFS_SET_XATTREXTENTS_STATE: {
+		int state;
+
+		if (ap->a_data == NULL) {
+			return (EINVAL);
+		}
+
+		state = *(int *)ap->a_data;
+		
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+
+		/* Super-user can enable or disable extent-based extended 
+		 * attribute support on a volume 
+		 */
+		if (!is_suser()) {
+			return (EPERM);
+		}
+		if (state == 0 || state == 1)
+			return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
 		else
 			return (EINVAL);	
 	}
 
 	case F_FULLFSYNC: {
 		int error;
-
+		
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
 		error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
 		if (error == 0) {
-			error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
+			error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
 			hfs_unlock(VTOC(vp));
 		}
 
@@ -1380,13 +1893,24 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		/* Protect against a size change. */
 		hfs_lock_truncate(VTOC(vp), TRUE);
 
+#if HFS_COMPRESSION
+		if (compressed && (uncompressed_size == -1)) {
+			/* fetching the uncompressed size failed above, so return the error */
+			error = decmpfs_error;
+		} else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
+				   (!compressed && (ra->ra_offset >= fp->ff_size))) {
+			error = EFBIG;
+		}
+#else /* HFS_COMPRESSION */
 		if (ra->ra_offset >= fp->ff_size) {
 			error = EFBIG;
-		} else {
+		}
+#endif /* HFS_COMPRESSION */
+		else {
 			error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
 		}
 
-		hfs_unlock_truncate(VTOC(vp));
+		hfs_unlock_truncate(VTOC(vp), TRUE);
 		return (error);
 	}
 
@@ -1399,8 +1923,8 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 	    int error;
 	    uio_t auio;
 	    daddr64_t blockNumber;
-	    u_long blockOffset;
-	    u_long xfersize;
+	    u_int32_t blockOffset;
+	    u_int32_t xfersize;
 	    struct buf *bp;
 	    user_fbootstraptransfer_t user_bootstrap;
 
@@ -1410,16 +1934,25 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		 * to a user_fbootstraptransfer_t else we get a pointer to a 
 		 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
 		 */
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
 		if (is64bit) {
 			user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
 		}
 		else {
-	    	fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
+	    	user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
 			user_bootstrapp = &user_bootstrap;
 			user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
 			user_bootstrap.fbt_length = bootstrapp->fbt_length;
 			user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
 		}
+
+		if ((user_bootstrapp->fbt_offset < 0) || (user_bootstrapp->fbt_offset > 1024) ||
+				(user_bootstrapp->fbt_length > 1024)) {
+			return EINVAL;
+		}
+
 		if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024) 
 			return EINVAL;
 	    
@@ -1468,17 +2001,47 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 			*(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
 		}
 		else {
-			*(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
+			*(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
 		}
 		return 0;
 	}
 
-	case HFS_GET_MOUNT_TIME:
-	    return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
+	case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
+	    *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
 	    break;
 
-	case HFS_GET_LAST_MTIME:
-	    return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
+	case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
+	    *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
+	    break;
+
+	case HFS_FSCTL_SET_VERY_LOW_DISK:
+	    if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
+		return EINVAL;
+	    }
+
+	    hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
+	    break;
+
+	case HFS_FSCTL_SET_LOW_DISK:
+	    if (   *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
+		|| *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
+
+		return EINVAL;
+	    }
+
+	    hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
+	    break;
+
+	case HFS_FSCTL_SET_DESIRED_DISK:
+	    if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
+		return EINVAL;
+	    }
+
+	    hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
+	    break;
+
+	case HFS_VOLUME_STATUS:
+	    *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
 	    break;
 
 	case HFS_SET_BOOT_INFO:
@@ -1486,6 +2049,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 			return(EINVAL);
 		if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
 			return(EACCES);	/* must be superuser or owner of filesystem */
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
 		HFS_MOUNT_LOCK(hfsmp, TRUE);
 		bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
 		HFS_MOUNT_UNLOCK(hfsmp, TRUE);
@@ -1500,11 +2066,60 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 		break;
 
+	case HFS_MARK_BOOT_CORRUPT:
+		/* Mark the boot volume corrupt by setting 
+		 * kHFSVolumeInconsistentBit in the volume header.  This will 
+		 * force fsck_hfs on next mount.
+		 */
+		if (!is_suser()) {
+			return EACCES;
+		}
+			
+		/* Allowed only on the root vnode of the boot volume */
+		if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) || 
+		    !vnode_isvroot(vp)) {
+			return EINVAL;
+		}
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+		printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
+		hfs_mark_volume_inconsistent(hfsmp);
+		break;
+
+	case HFS_FSCTL_GET_JOURNAL_INFO:
+		jip = (struct hfs_journal_info*)ap->a_data;
+
+		if (vp == NULLVP)
+		        return EINVAL;
+
+	    if (hfsmp->jnl == NULL) {
+			jnl_start = 0;
+			jnl_size  = 0;
+	    } else {
+			jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
+			jnl_size  = (off_t)hfsmp->jnl_size;
+	    }
+
+		jip->jstart = jnl_start;
+		jip->jsize = jnl_size;
+		break;
+
+	case HFS_SET_ALWAYS_ZEROFILL: {
+	    struct cnode *cp = VTOC(vp);
+
+	    if (*(int *)ap->a_data) {
+		cp->c_flag |= C_ALWAYS_ZEROFILL;
+	    } else {
+		cp->c_flag &= ~C_ALWAYS_ZEROFILL;
+	    }
+	    break;
+	}    
+
 	default:
 		return (ENOTTY);
 	}
 
-    /* Should never get here */
 	return 0;
 }
 
@@ -1536,13 +2151,12 @@ hfs_vnop_select(__unused struct vnop_select_args *ap)
  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
  */
 int
-hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
+hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
 {
-	struct cnode *cp = VTOC(vp);
 	struct filefork *fp = VTOF(vp);
 	struct hfsmount *hfsmp = VTOHFS(vp);
 	int  retval = E_NONE;
-	daddr_t  logBlockSize;
+	u_int32_t  logBlockSize;
 	size_t  bytesContAvail = 0;
 	off_t  blockposition;
 	int lockExtBtree;
@@ -1553,17 +2167,17 @@ hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *
 	 * to physical mapping is requested.
 	 */
 	if (vpp != NULL)
-		*vpp = cp->c_devvp;
+		*vpp = hfsmp->hfs_devvp;
 	if (bnp == NULL)
 		return (0);
 
 	logBlockSize = GetLogicalBlockSize(vp);
-	blockposition = (off_t)bn * (off_t)logBlockSize;
+	blockposition = (off_t)bn * logBlockSize;
 
 	lockExtBtree = overflow_extents(fp);
 
 	if (lockExtBtree)
-		lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
 
 	retval = MacToVFSError(
                             MapFileBlockC (HFSTOVCB(hfsmp),
@@ -1633,6 +2247,15 @@ hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
 /*
  * Map file offset to physical block number.
  *
+ * If this function is called for write operation, and if the file
+ * had virtual blocks allocated (delayed allocation), real blocks
+ * are allocated by calling ExtendFileC().
+ * 
+ * If this function is called for read operation, and if the file
+ * had virtual blocks allocated (delayed allocation), no change 
+ * to the size of file is done, and if required, rangelist is 
+ * searched for mapping.
+ *
  * System file cnodes are expected to be locked (shared or exclusive).
  */
 int
@@ -1663,6 +2286,26 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
 	int started_tr = 0;
 	int tooklock = 0;
 
+#if HFS_COMPRESSION
+	if (VNODE_IS_RSRC(vp)) {
+		/* allow blockmaps to the resource fork */
+	} else {
+		if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+			int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+			switch(state) {
+				case FILE_IS_COMPRESSED:
+					return ENOTSUP;
+				case FILE_IS_CONVERTING:
+					/* if FILE_IS_CONVERTING, we allow blockmap */
+					break;
+				default:
+					printf("invalid state %d for compressed file\n", state);
+					/* fall through */
+			}
+		}
+	}
+#endif /* HFS_COMPRESSION */
+
 	/* Do not allow blockmap operation on a directory */
 	if (vnode_isdir(vp)) {
 		return (ENOTSUP);
@@ -1675,14 +2318,10 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
 	if (ap->a_bpn == NULL)
 		return (0);
 
-	if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
+	if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
 		if (VTOC(vp)->c_lockowner != current_thread()) {
 			hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
 			tooklock = 1;
-		} else {
-			cp = VTOC(vp);
-			panic("blockmap: %s cnode lock already held!\n",
-				cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
 		}
 	}
 	hfsmp = VTOHFS(vp);
@@ -1690,7 +2329,8 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
 	fp = VTOF(vp);
 
 retry:
-	if (fp->ff_unallocblocks) {
+	/* Check virtual blocks only when performing write operation */
+	if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
 		if (hfs_start_transaction(hfsmp) != 0) {
 			retval = EINVAL;
 			goto exit;
@@ -1709,8 +2349,8 @@ retry:
 	/*
 	 * Check for any delayed allocations.
 	 */
-	if (fp->ff_unallocblocks) {
-		SInt64 actbytes;
+	if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+		int64_t actbytes;
 		u_int32_t loanedBlocks;
 
 		// 
@@ -1746,9 +2386,7 @@ retry:
 			HFS_MOUNT_LOCK(hfsmp, TRUE);
 			hfsmp->loanedBlocks += loanedBlocks;
 			HFS_MOUNT_UNLOCK(hfsmp, TRUE);
-		}
 
-		if (retval) {
 			hfs_systemfile_unlock(hfsmp, lockflags);
 			cp->c_flag |= C_MODIFIED;
 			if (started_tr) {
@@ -1756,6 +2394,7 @@ retry:
 				(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 
 				hfs_end_transaction(hfsmp);
+				started_tr = 0;
 			}
 			goto exit;
 		}
@@ -1775,10 +2414,63 @@ retry:
 		started_tr = 0;
 	}	
 	if (retval) {
+		/* On write, always return error because virtual blocks, if any, 
+		 * should have been allocated in ExtendFileC().  We do not 
+		 * allocate virtual blocks on read, therefore return error 
+		 * only if no virtual blocks are allocated.  Otherwise we search
+		 * rangelist for zero-fills
+		 */
+		if ((MacToVFSError(retval) != ERANGE) ||
+		    (ap->a_flags & VNODE_WRITE) ||
+		    ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
+			goto exit;
+		} 
+		
+		/* Validate if the start offset is within logical file size */
+		if (ap->a_foffset > fp->ff_size) {
+		    	goto exit;
+		}
+
+		/* Searching file extents has failed for read operation, therefore 
+		 * search rangelist for any uncommitted holes in the file. 
+		 */
+		overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
+	        	              ap->a_foffset + (off_t)(ap->a_size - 1),
+	                	      &invalid_range);
+		switch(overlaptype) {
+		case RL_OVERLAPISCONTAINED:
+			/* start_offset <= rl_start, end_offset >= rl_end */
+			if (ap->a_foffset != invalid_range->rl_start) {
+				break;
+			}
+		case RL_MATCHINGOVERLAP:
+			/* start_offset = rl_start, end_offset = rl_end */
+		case RL_OVERLAPCONTAINSRANGE:
+			/* start_offset >= rl_start, end_offset <= rl_end */
+		case RL_OVERLAPSTARTSBEFORE:
+			/* start_offset > rl_start, end_offset >= rl_start */
+			if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
+				bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
+			} else {
+				bytesContAvail = fp->ff_size - ap->a_foffset;
+			}
+			if (bytesContAvail > ap->a_size) {
+				bytesContAvail = ap->a_size;
+			}
+			*ap->a_bpn = (daddr64_t)-1;
+			retval = 0;
+			break;
+		case RL_OVERLAPENDSAFTER:
+			/* start_offset < rl_start, end_offset < rl_end */
+		case RL_NOOVERLAP:
+			break;
+		}
 		goto exit;
 	}
 
-	/* Adjust the mapping information for invalid file ranges: */
+	/* MapFileC() found a valid extent in the filefork.  Search the 
+	 * mapping information further for invalid file ranges 
+	 */
 	overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
 	                      ap->a_foffset + (off_t)bytesContAvail - 1,
 	                      &invalid_range);
@@ -1787,7 +2479,7 @@ retry:
 		case RL_MATCHINGOVERLAP:
 		case RL_OVERLAPCONTAINSRANGE:
 		case RL_OVERLAPSTARTSBEFORE:
-			/* There's no valid block for this byte offset: */
+			/* There's no valid block for this byte offset */
 			*ap->a_bpn = (daddr64_t)-1;
 			/* There's no point limiting the amount to be returned
 			 * if the invalid range that was hit extends all the way 
@@ -1795,7 +2487,7 @@ retry:
 			 * end of this range and the file's EOF):
 			 */
 			if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
-			    (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
+			    ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
 				bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
 			}
 			break;
@@ -1807,7 +2499,7 @@ retry:
 				/* There's actually no valid information to be had starting here: */
 				*ap->a_bpn = (daddr64_t)-1;
 				if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
-				    (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
+				    ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
 					bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
 				}
 			} else {
@@ -1820,13 +2512,17 @@ retry:
 		} /* end switch */
 		if (bytesContAvail > ap->a_size)
 			bytesContAvail = ap->a_size;
+	} 
+		
+exit:
+	if (retval == 0) {
+		if (ap->a_run)
+			*ap->a_run = bytesContAvail;
+
+		if (ap->a_poff)
+			*(int *)ap->a_poff = 0;
 	}
-	if (ap->a_run)
-		*ap->a_run = bytesContAvail;
 
-	if (ap->a_poff)
-		*(int *)ap->a_poff = 0;
-exit:
 	if (tooklock)
 		hfs_unlock(cp);
 
@@ -1845,14 +2541,23 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap)
 {
 	buf_t	bp = ap->a_bp;
 	vnode_t	vp = buf_vnode(bp);
-	struct cnode *cp = VTOC(vp);
 
-	return (buf_strategy(cp->c_devvp, ap));
+	return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
 }
 
+static int 
+hfs_minorupdate(struct vnode *vp) {
+	struct cnode *cp = VTOC(vp);
+	cp->c_flag &= ~C_MODIFIED;
+	cp->c_touch_acctime = 0;
+	cp->c_touch_chgtime = 0;
+	cp->c_touch_modtime = 0;
+	
+	return 0;
+}
 
 static int
-do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
+do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
 {
 	register struct cnode *cp = VTOC(vp);
     	struct filefork *fp = VTOF(vp);
@@ -1862,8 +2567,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_
 	off_t bytesToAdd;
 	off_t actualBytesAdded;
 	off_t filebytes;
-	u_int64_t old_filesize;
-	u_long fileblocks;
+	u_int32_t fileblocks;
 	int blksize;
 	struct hfsmount *hfsmp;
 	int lockflags;
@@ -1871,7 +2575,6 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_
 	blksize = VTOVCB(vp)->blockSize;
 	fileblocks = fp->ff_blocks;
 	filebytes = (off_t)fileblocks * (off_t)blksize;
-	old_filesize = fp->ff_size;
 
 	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
 		 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
@@ -1922,7 +2625,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_
 		 */
 		if (length > filebytes) {
 			int eflags;
-			u_long blockHint = 0;
+			u_int32_t blockHint = 0;
 
 			/* All or nothing and don't round up to clumpsize. */
 			eflags = kEFAllMask | kEFNoClumpMask;
@@ -1970,8 +2673,13 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_
 			hfs_systemfile_unlock(hfsmp, lockflags);
 
 			if (hfsmp->jnl) {
-			    (void) hfs_update(vp, TRUE);
-			    (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+				if (skipupdate) {
+					(void) hfs_minorupdate(vp);
+				}
+				else {
+					(void) hfs_update(vp, TRUE);
+					(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+				}
 			}
 
 			hfs_end_transaction(hfsmp);
@@ -1984,7 +2692,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_
 		}
  
 		if (!(flags & IO_NOZEROFILL)) {
-			if (UBCINFOEXISTS(vp) && retval == E_NONE) {
+			if (UBCINFOEXISTS(vp)  && (vnode_issystem(vp) == 0) && retval == E_NONE) {
 				struct rl_entry *invalid_range;
 				off_t zero_limit;
 			
@@ -2034,36 +2742,9 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_
 		cp->c_touch_modtime = TRUE;
 		fp->ff_size = length;
 
-		/* Nested transactions will do their own ubc_setsize. */
-		if (!skipsetsize) {
-			/*
-			 * ubc_setsize can cause a pagein here 
-			 * so we need to drop cnode lock. 
-			 */
-			hfs_unlock(cp);
-			ubc_setsize(vp, length);
-			hfs_lock(cp, HFS_FORCE_LOCK);
-		}
-
 	} else { /* Shorten the size of the file */
 
 		if ((off_t)fp->ff_size > length) {
-			/*
-			 * Any buffers that are past the truncation point need to be
-			 * invalidated (to maintain buffer cache consistency).
-			 */
-
-		         /* Nested transactions will do their own ubc_setsize. */
-		         if (!skipsetsize) {
-		         	/*
-		         	 * ubc_setsize can cause a pageout here 
-		         	 * so we need to drop cnode lock. 
-		         	 */
-				hfs_unlock(cp);
-				ubc_setsize(vp, length);
-				hfs_lock(cp, HFS_FORCE_LOCK);
-			}
-	    
 			/* Any space previously marked as invalid is now irrelevant: */
 			rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
 		}
@@ -2129,10 +2810,14 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_
 				if (retval == 0) {
 					fp->ff_size = length;
 				}
-				(void) hfs_update(vp, TRUE);
-				(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+				if (skipupdate) {
+					(void) hfs_minorupdate(vp);
+				}
+				else {
+					(void) hfs_update(vp, TRUE);
+					(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+				}
 			}
-
 			hfs_end_transaction(hfsmp);
 
 			filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
@@ -2144,12 +2829,24 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_
 #endif /* QUOTA */
 		}
 		/* Only set update flag if the logical length changes */
-		if (old_filesize != length)
+		if ((off_t)fp->ff_size != length)
 			cp->c_touch_modtime = TRUE;
 		fp->ff_size = length;
 	}
-	cp->c_touch_chgtime = TRUE;
-	retval = hfs_update(vp, MNT_WAIT);
+	if (cp->c_mode & (S_ISUID | S_ISGID)) {
+		if (!vfs_context_issuser(context)) {
+			cp->c_mode &= ~(S_ISUID | S_ISGID);
+			skipupdate = 0;
+		}
+	}
+	if (skipupdate) {
+		retval = hfs_minorupdate(vp);
+	}
+	else {
+		cp->c_touch_chgtime = TRUE;	/* status changed */
+		cp->c_touch_modtime = TRUE;	/* file data was modified */
+		retval = hfs_update(vp, MNT_WAIT);
+	}
 	if (retval) {
 		KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
 		     -1, -1, -1, retval, 0);
@@ -2172,21 +2869,46 @@ Err_Exit:
 __private_extern__
 int
 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
-             vfs_context_t context)
+             int skipupdate, vfs_context_t context)
 {
     	struct filefork *fp = VTOF(vp);
 	off_t filebytes;
-	u_long fileblocks;
+	u_int32_t fileblocks;
 	int blksize, error = 0;
 	struct cnode *cp = VTOC(vp);
 
-	if (vnode_isdir(vp))
-		return (EISDIR);	/* cannot truncate an HFS directory! */
+	/* Cannot truncate an HFS directory! */
+	if (vnode_isdir(vp)) {
+		return (EISDIR);
+	}
+	/* A swap file cannot change size. */
+	if (vnode_isswap(vp) && (length != 0)) {
+		return (EPERM);
+	}
 
 	blksize = VTOVCB(vp)->blockSize;
 	fileblocks = fp->ff_blocks;
 	filebytes = (off_t)fileblocks * (off_t)blksize;
 
+	//
+	// Have to do this here so that we don't wind up with
+	// i/o pending for blocks that are about to be released
+	// if we truncate the file.
+	//
+	// If skipsetsize is set, then the caller is responsible
+	// for the ubc_setsize.
+	//
+	// Even if skipsetsize is set, if the length is zero we
+	// want to call ubc_setsize() because as of SnowLeopard
+	// it will no longer cause any page-ins and it will drop
+	// any dirty pages so that we don't do any i/o that we
+	// don't have to.  This also prevents a race where i/o
+	// for truncated blocks may overwrite later data if the
+	// blocks get reallocated to a different file.
+	//
+	if (!skipsetsize || length == 0)
+		ubc_setsize(vp, length);
+
 	// have to loop truncating or growing files that are
 	// really big because otherwise transactions can get
 	// enormous and consume too many kernel resources.
@@ -2199,7 +2921,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
 		    		filebytes = length;
 			}
 			cp->c_flag |= C_FORCEUPDATE;
-			error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
+			error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
 			if (error)
 				break;
 		}
@@ -2211,13 +2933,13 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
 				filebytes = length;
 			}
 			cp->c_flag |= C_FORCEUPDATE;
-			error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
+			error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
 			if (error)
 				break;
 		}
 	} else /* Same logical size */ {
 
-		error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
+		error = do_hfs_truncate(vp, length, flags, skipupdate, context);
 	}
 	/* Files that are changing size are not hot file candidates. */
 	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
@@ -2251,10 +2973,10 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 	off_t moreBytesRequested;
 	off_t actualBytesAdded;
 	off_t filebytes;
-	u_long fileblocks;
+	u_int32_t fileblocks;
 	int retval, retval2;
-	UInt32 blockHint;
-	UInt32 extendFlags;   /* For call to ExtendFileC */
+	u_int32_t blockHint;
+	u_int32_t extendFlags;   /* For call to ExtendFileC */
 	struct hfsmount *hfsmp;
 	kauth_cred_t cred = vfs_context_ucred(ap->a_context);
 	int lockflags;
@@ -2265,10 +2987,15 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 		return (EISDIR);
 	if (length < (off_t)0)
 		return (EINVAL);
-
-	if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
-		return (retval);
+	
 	cp = VTOC(vp);
+
+	hfs_lock_truncate(cp, TRUE);
+
+	if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+		goto Err_Exit;
+	}
+	
 	fp = VTOF(vp);
 	hfsmp = VTOHFS(vp);
 	vcb = VTOVCB(vp);
@@ -2290,6 +3017,8 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 		extendFlags |= kEFAllMask;
 	if (cred && suser(cred, NULL) != 0)
 		extendFlags |= kEFReserveMask;
+	if (hfs_virtualmetafile(cp))
+		extendFlags |= kEFMetadataMask;
 
 	retval = E_NONE;
 	blockHint = 0;
@@ -2310,7 +3039,9 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 	 * value of filebytes is 0, length will be at least 1.
 	 */
 	if (length > filebytes) {
-		moreBytesRequested = length - filebytes;
+		off_t total_bytes_added = 0, orig_request_size;
+
+		orig_request_size = moreBytesRequested = length - filebytes;
 		
 #if QUOTA
 		retval = hfs_chkdq(cp,
@@ -2328,7 +3059,6 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 			 * Allocate Journal and Quota files in metadata zone.
 			 */
 			if (hfs_virtualmetafile(cp)) {
-				extendFlags |= kEFMetadataMask;
 				blockHint = hfsmp->hfs_metazone_start;
 			} else if ((blockHint >= hfsmp->hfs_metazone_start) &&
 				   (blockHint <= hfsmp->hfs_metazone_end)) {
@@ -2339,35 +3069,60 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 			}
 		}
 
-		if (hfs_start_transaction(hfsmp) != 0) {
-		    retval = EINVAL;
-		    goto Err_Exit;
-		}
 
-		/* Protect extents b-tree and allocation bitmap */
-		lockflags = SFL_BITMAP;
-		if (overflow_extents(fp))
+		while ((length > filebytes) && (retval == E_NONE)) {
+		    off_t bytesRequested;
+		    
+		    if (hfs_start_transaction(hfsmp) != 0) {
+			retval = EINVAL;
+			goto Err_Exit;
+		    }
+
+		    /* Protect extents b-tree and allocation bitmap */
+		    lockflags = SFL_BITMAP;
+		    if (overflow_extents(fp))
 			lockflags |= SFL_EXTENTS;
-		lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+		    lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+		    if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
+			bytesRequested = HFS_BIGFILE_SIZE;
+		    } else {
+			bytesRequested = moreBytesRequested;
+		    }
+
+		    if (extendFlags & kEFContigMask) {
+			    // if we're on a sparse device, this will force it to do a
+			    // full scan to find the space needed.
+			    hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
+		    }
 
-		retval = MacToVFSError(ExtendFileC(vcb,
+		    retval = MacToVFSError(ExtendFileC(vcb,
 						(FCB*)fp,
-						moreBytesRequested,
+						bytesRequested,
 						blockHint,
 						extendFlags,
 						&actualBytesAdded));
 
-		*(ap->a_bytesallocated) = actualBytesAdded;
-		filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
-
-		hfs_systemfile_unlock(hfsmp, lockflags);
+		    if (retval == E_NONE) {
+			*(ap->a_bytesallocated) += actualBytesAdded;
+			total_bytes_added += actualBytesAdded;
+			moreBytesRequested -= actualBytesAdded;
+			if (blockHint != 0) {
+			    blockHint += actualBytesAdded / vcb->blockSize;
+			}
+		    }
+		    filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+		    
+		    hfs_systemfile_unlock(hfsmp, lockflags);
 
-		if (hfsmp->jnl) {
+		    if (hfsmp->jnl) {
 			(void) hfs_update(vp, TRUE);
 			(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+		    }
+
+		    hfs_end_transaction(hfsmp);
 		}
 
-		hfs_end_transaction(hfsmp);
 
 		/*
 		 * if we get an error and no changes were made then exit
@@ -2383,9 +3138,9 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 		 * until the file is closed, when we truncate the file to allocation
 		 * block size.
 		 */
-		if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
+		if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
 			*(ap->a_bytesallocated) =
-				roundup(moreBytesRequested, (off_t)vcb->blockSize);
+				roundup(orig_request_size, (off_t)vcb->blockSize);
 
 	} else { /* Shorten the size of the file */
 
@@ -2396,31 +3151,9 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 			 */
 		}
 
-		if (hfs_start_transaction(hfsmp) != 0) {
-		    retval = EINVAL;
-		    goto Err_Exit;
-		}
-
-		/* Protect extents b-tree and allocation bitmap */
-		lockflags = SFL_BITMAP;
-		if (overflow_extents(fp))
-			lockflags |= SFL_EXTENTS;
-		lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
-
-		retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
-
-		hfs_systemfile_unlock(hfsmp, lockflags);
-
+		retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
 		filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 
-		if (hfsmp->jnl) {
-			(void) hfs_update(vp, TRUE);
-			(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
-		}
-
-		hfs_end_transaction(hfsmp);
-		
-
 		/*
 		 * if we get an error and no changes were made then exit
 		 * otherwise we must do the hfs_update to reflect the changes
@@ -2448,6 +3181,7 @@ Std_Exit:
 	if (retval == 0)
 		retval = retval2;
 Err_Exit:
+	hfs_unlock_truncate(cp, TRUE);
 	hfs_unlock(cp);
 	return (retval);
 }
@@ -2473,12 +3207,36 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap)
 	vnode_t vp = ap->a_vp;
 	int error;
 
+#if HFS_COMPRESSION
+	if (VNODE_IS_RSRC(vp)) {
+		/* allow pageins of the resource fork */
+	} else {
+		int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+		if (compressed) {
+			error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
+			if (compressed) {
+				if (error == 0) {
+					/* successful page-in, update the access time */
+					VTOC(vp)->c_touch_acctime = TRUE;
+					
+					/* compressed files are not hot file candidates */
+					if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+						VTOF(vp)->ff_bytesread = 0;
+					}
+				}
+				return error;
+			}
+			/* otherwise the file was converted back to a regular file while we were reading it */
+		}
+	}
+#endif
+
 	error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
 	                       ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
 	/*
 	 * Keep track of blocks read.
 	 */
-	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
+	if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
 		struct cnode *cp;
 		struct filefork *fp;
 		int bytesread;
@@ -2493,7 +3251,7 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap)
 			bytesread = ap->a_size;
 
 		/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
-		if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
+		if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
 			hfs_lock(cp, HFS_FORCE_LOCK);
 			took_cnode_lock = 1;
 		}
@@ -2537,53 +3295,269 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
 	vnode_t vp = ap->a_vp;
 	struct cnode *cp;
 	struct filefork *fp;
-	int retval;
-	off_t end_of_range;
+	int retval = 0;
 	off_t filesize;
+	upl_t 		upl;
+	upl_page_info_t* pl;
+	vm_offset_t	a_pl_offset;
+	int		a_flags;
+	int is_pageoutv2 = 0;
+	kern_return_t kret;
 
 	cp = VTOC(vp);
-	if (cp->c_lockowner == current_thread()) {
-		panic("pageout: %s cnode lock already held!\n",
-		      cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
-	}
-	if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
-		if (!(ap->a_flags & UPL_NOCOMMIT)) {
-		        ubc_upl_abort_range(ap->a_pl,
-					    ap->a_pl_offset,
-					    ap->a_size,
-					    UPL_ABORT_FREE_ON_EMPTY);
-		}
-		return (retval);
-	}
 	fp = VTOF(vp);
-
+	
+	/*
+	 * Figure out where the file ends, for pageout purposes.  If
+	 * ff_new_size > ff_size, then we're in the middle of extending the
+	 * file via a write, so it is safe (and necessary) that we be able
+	 * to pageout up to that point.
+	 */
 	filesize = fp->ff_size;
-	end_of_range = ap->a_f_offset + ap->a_size - 1;
+	if (fp->ff_new_size > filesize)
+		filesize = fp->ff_new_size;
 
-	if (end_of_range >= filesize) {
-	        end_of_range = (off_t)(filesize - 1);
-	}
-	if (ap->a_f_offset < filesize) {
-	        rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
-	        cp->c_flag |= C_MODIFIED;  /* leof is dirty */
+	a_flags = ap->a_flags;
+	a_pl_offset = ap->a_pl_offset;
+
+	/*
+	 * we can tell if we're getting the new or old behavior from the UPL
+	 */
+	if ((upl = ap->a_pl) == NULL) {
+		int request_flags; 
+
+		is_pageoutv2 = 1;
+		/*
+		 * we're in control of any UPL we commit
+		 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT 
+		 */
+		a_flags &= ~UPL_NOCOMMIT;
+		a_pl_offset = 0;
+
+		/*
+		 * take truncate lock (shared) to guard against 
+		 * zero-fill thru fsync interfering, but only for v2 
+		 */
+		hfs_lock_truncate(cp, 0);
+
+		if (a_flags & UPL_MSYNC) {
+			request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
+		}
+		else {
+			request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
+		}
+		kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 
+
+		if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
+			retval = EINVAL;
+			goto pageout_done;
+		}
 	}
-	hfs_unlock(cp);
+	/*
+	 * from this point forward upl points at the UPL we're working with
+	 * it was either passed in or we succesfully created it
+	 */
+
+	/* 
+	 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own  
+	 * UPL instead of relying on the UPL passed into us.  We go ahead and do that here,
+	 * scanning for dirty ranges.  We'll issue our own N cluster_pageout calls, for
+	 * N dirty ranges in the UPL.  Note that this is almost a direct copy of the 
+	 * logic in vnode_pageout except that we need to do it after grabbing the truncate 
+	 * lock in HFS so that we don't lock invert ourselves.  
+	 * 
+	 * Note that we can still get into this function on behalf of the default pager with
+	 * non-V2 behavior (swapfiles).  However in that case, we did not grab locks above 
+	 * since fsync and other writing threads will grab the locks, then mark the 
+	 * relevant pages as busy.  But the pageout codepath marks the pages as busy, 
+	 * and THEN would attempt to grab the truncate lock, which would result in deadlock.  So
+	 * we do not try to grab anything for the pre-V2 case, which should only be accessed
+	 * by the paging/VM system.
+	 */
+
+	if (is_pageoutv2) {
+		off_t f_offset;
+		int offset;
+		int isize; 
+		int pg_index;
+		int error;
+		int error_ret = 0;
+
+		isize = ap->a_size;
+		f_offset = ap->a_f_offset;
+
+		/* 
+		 * Scan from the back to find the last page in the UPL, so that we 
+		 * aren't looking at a UPL that may have already been freed by the
+		 * preceding aborts/completions.
+		 */ 
+		for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
+			if (upl_page_present(pl, --pg_index))
+				break;
+			if (pg_index == 0) {
+				ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
+				goto pageout_done;
+			}
+		}
+
+		/* 
+		 * initialize the offset variables before we touch the UPL.
+		 * a_f_offset is the position into the file, in bytes
+		 * offset is the position into the UPL, in bytes
+		 * pg_index is the pg# of the UPL we're operating on.
+		 * isize is the offset into the UPL of the last non-clean page. 
+		 */
+		isize = ((pg_index + 1) * PAGE_SIZE);	
 
-	retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
-	                         ap->a_size, filesize, ap->a_flags);
+		offset = 0;
+		pg_index = 0;
+
+		while (isize) {
+			int  xsize;
+			int  num_of_pages;
+
+			if ( !upl_page_present(pl, pg_index)) {
+				/*
+				 * we asked for RET_ONLY_DIRTY, so it's possible
+				 * to get back empty slots in the UPL.
+				 * just skip over them
+				 */
+				f_offset += PAGE_SIZE;
+				offset   += PAGE_SIZE;
+				isize    -= PAGE_SIZE;
+				pg_index++;
+
+				continue;
+			}
+			if ( !upl_dirty_page(pl, pg_index)) {
+				panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
+			}
+
+			/* 
+			 * We know that we have at least one dirty page.
+			 * Now checking to see how many in a row we have
+			 */
+			num_of_pages = 1;
+			xsize = isize - PAGE_SIZE;
+
+			while (xsize) {
+				if ( !upl_dirty_page(pl, pg_index + num_of_pages))
+					break;
+				num_of_pages++;
+				xsize -= PAGE_SIZE;
+			}
+			xsize = num_of_pages * PAGE_SIZE;
+
+			if (!vnode_isswap(vp)) {
+				off_t end_of_range;
+				int tooklock;
+
+				tooklock = 0;
+
+				if (cp->c_lockowner != current_thread()) {
+					if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+						/*
+						 * we're in the v2 path, so we are the
+						 * owner of the UPL... we may have already
+						 * processed some of the UPL, so abort it
+						 * from the current working offset to the
+						 * end of the UPL
+						 */
+						ubc_upl_abort_range(upl,
+								    offset,
+								    ap->a_size - offset,
+								    UPL_ABORT_FREE_ON_EMPTY);
+						goto pageout_done;
+					}
+					tooklock = 1;
+				}
+				end_of_range = f_offset + xsize - 1;
+	
+				if (end_of_range >= filesize) {
+					end_of_range = (off_t)(filesize - 1);
+				}
+				if (f_offset < filesize) {
+					rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
+					cp->c_flag |= C_MODIFIED;  /* leof is dirty */
+				}
+				if (tooklock) {
+					hfs_unlock(cp);
+				}
+			}
+			if ((error = cluster_pageout(vp, upl, offset, f_offset,
+							xsize, filesize, a_flags))) {
+				if (error_ret == 0)
+					error_ret = error;
+			}
+			f_offset += xsize;
+			offset   += xsize;
+			isize    -= xsize;
+			pg_index += num_of_pages;
+		}
+		/* capture errnos bubbled out of cluster_pageout if they occurred */
+		if (error_ret != 0) {
+			retval = error_ret;
+		}
+	} /* end block for v2 pageout behavior */
+	else {
+		if (!vnode_isswap(vp)) {
+			off_t end_of_range;
+			int tooklock = 0;
+
+			if (cp->c_lockowner != current_thread()) {
+				if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+					if (!(a_flags & UPL_NOCOMMIT)) {
+						ubc_upl_abort_range(upl,
+								    a_pl_offset,
+								    ap->a_size,
+								    UPL_ABORT_FREE_ON_EMPTY);
+					}
+					goto pageout_done;
+				}
+				tooklock = 1;
+			}
+			end_of_range = ap->a_f_offset + ap->a_size - 1;
+	
+			if (end_of_range >= filesize) {
+				end_of_range = (off_t)(filesize - 1);
+			}
+			if (ap->a_f_offset < filesize) {
+				rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
+				cp->c_flag |= C_MODIFIED;  /* leof is dirty */
+			}
+
+			if (tooklock) {
+				hfs_unlock(cp);
+			}
+		}
+		/* 
+		 * just call cluster_pageout for old pre-v2 behavior
+		 */
+		retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
+				ap->a_size, filesize, a_flags);		
+	}
 
 	/*
-	 * If data was written, and setuid or setgid bits are set and
-	 * this process is not the superuser then clear the setuid and
-	 * setgid bits as a precaution against tampering.
+	 * If data was written, update the modification time of the file.
+	 * If setuid or setgid bits are set and this process is not the 
+	 * superuser then clear the setuid and setgid bits as a precaution 
+	 * against tampering.
 	 */
-	if ((retval == 0) &&
-	    (cp->c_mode & (S_ISUID | S_ISGID)) &&
-	    (vfs_context_suser(ap->a_context) != 0)) {
-		hfs_lock(cp, HFS_FORCE_LOCK);
-		cp->c_mode &= ~(S_ISUID | S_ISGID);
+	if (retval == 0) {
+		cp->c_touch_modtime = TRUE;
 		cp->c_touch_chgtime = TRUE;
-		hfs_unlock(cp);
+		if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
+		    (vfs_context_suser(ap->a_context) != 0)) {
+			hfs_lock(cp, HFS_FORCE_LOCK);
+			cp->c_mode &= ~(S_ISUID | S_ISGID);
+			hfs_unlock(cp);
+		}
+	}
+
+pageout_done:
+	if (is_pageoutv2) {
+		/* release truncate lock (shared) */
+		hfs_unlock_truncate(cp, 0);
 	}
 	return (retval);
 }
@@ -2609,10 +3583,10 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
 		 * Swap and validate the node if it is in native byte order.
 		 * This is always be true on big endian, so we always validate
 		 * before writing here.  On little endian, the node typically has
-		 * been swapped and validatated when it was written to the journal,
+		 * been swapped and validated when it was written to the journal,
 		 * so we won't do anything here.
 		 */
-		if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
+		if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
 			/* Prepare the block pointer */
 			block.blockHeader = bp;
 			block.buffer = (char *)buf_dataptr(bp);
@@ -2622,7 +3596,7 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
 			block.blockSize = buf_count(bp);
     
 			/* Endian un-swap B-Tree node */
-			retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
+			retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
 			if (retval)
 				panic("hfs_vnop_bwrite: about to write corrupt node!\n");
 		}
@@ -2632,7 +3606,7 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
 	if ((buf_flags(bp) & B_LOCKED)) {
 	        // XXXdbg
 	        if (VTOHFS(vp)->jnl) {
-		        panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
+		        panic("hfs: CLEARING the lock bit on bp %p\n", bp);
 		}
 		buf_clearflags(bp, B_LOCKED);
 	}
@@ -2656,7 +3630,7 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
  * 0               N (file offset)
  *
  * -----------------     -----------------
- * |///////////////|     |               |     STEP 1 (aquire new blocks)
+ * |///////////////|     |               |     STEP 1 (acquire new blocks)
  * -----------------     -----------------
  * 0               N     N+1             2N
  *
@@ -2673,7 +3647,7 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
  * During steps 2 and 3 page-outs to file offsets less
  * than or equal to N are suspended.
  *
- * During step 3 page-ins to the file get supended.
+ * During step 3 page-ins to the file get suspended.
  */
 __private_extern__
 int
@@ -2689,7 +3663,6 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 	u_int32_t  growsize;
 	u_int32_t  nextallocsave;
 	daddr64_t  sector_a,  sector_b;
-	int disabled_caching = 0;
 	int eflags;
 	off_t  newbytes;
 	int  retval;
@@ -2716,7 +3689,7 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 	if (blockHint == 0)
 		blockHint = hfsmp->nextAllocation;
 
-	if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
+	if ((fp->ff_size > 0x7fffffff) ||
 	    ((fp->ff_size > blksize) && vnodetype == VLNK)) {
 		return (EFBIG);
 	}
@@ -2735,10 +3708,16 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 	if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
 		hfs_unlock(cp);
 		hfs_lock_truncate(cp, TRUE);
-		if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
-			hfs_unlock_truncate(cp);
+		/* Force lock since callers expects lock to be held. */
+		if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
+			hfs_unlock_truncate(cp, TRUE);
 			return (retval);
 		}
+		/* No need to continue if file was removed. */
+		if (cp->c_flag & C_NOEXISTS) {
+			hfs_unlock_truncate(cp, TRUE);
+			return (ENOENT);
+		}
 		took_trunc_lock = 1;
 	}
 	headblks = fp->ff_blocks;
@@ -2751,7 +3730,7 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 
 	if (hfs_start_transaction(hfsmp) != 0) {
 		if (took_trunc_lock)
-	    		hfs_unlock_truncate(cp);
+			hfs_unlock_truncate(cp, TRUE);
 	    return (EINVAL);
 	}
 	started_tr = 1;
@@ -2771,19 +3750,14 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 	}
 
 	/*
-	 * STEP 1 - aquire new allocation blocks.
+	 * STEP 1 - acquire new allocation blocks.
 	 */
-	if (!vnode_isnocache(vp)) {
-		vnode_setnocache(vp);
-		disabled_caching = 1;
-
-	}
 	nextallocsave = hfsmp->nextAllocation;
 	retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
 	if (eflags & kEFMetadataMask) {
 		HFS_MOUNT_LOCK(hfsmp, TRUE);
-		hfsmp->nextAllocation = nextallocsave;
-		hfsmp->vcbFlags |= 0xFF00;
+		HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
+		MarkVCBDirty(hfsmp);
 		HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 	}
 
@@ -2806,9 +3780,20 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 			retval = ENOSPC;
 			goto restore;
 		} else if ((eflags & kEFMetadataMask) &&
-		           ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
+		           ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
 		              hfsmp->hfs_metazone_end)) {
-			printf("hfs_relocate: didn't move into metadata zone\n");
+#if 0
+			const char * filestr;
+			char emptystr = '\0';
+
+			if (cp->c_desc.cd_nameptr != NULL) {
+				filestr = (const char *)&cp->c_desc.cd_nameptr[0];
+			} else if (vnode_name(vp) != NULL) {
+				filestr = vnode_name(vp);
+			} else {
+				filestr = &emptystr;
+			}
+#endif
 			retval = ENOSPC;
 			goto restore;
 		}
@@ -2865,7 +3850,7 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 		goto restore;
 out:
 	if (took_trunc_lock)
-		hfs_unlock_truncate(cp);
+		hfs_unlock_truncate(cp, TRUE);
 
 	if (lockflags) {
 		hfs_systemfile_unlock(hfsmp, lockflags);
@@ -2876,7 +3861,6 @@ out:
 	if (retval == 0) {
 		(void) hfs_update(vp, MNT_WAIT);
 	}
-
 	if (hfsmp->jnl) {
 		if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
 			(void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
@@ -2884,17 +3868,17 @@ out:
 			(void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
 	}
 exit:
-	if (disabled_caching) {
-		vnode_clearnocache(vp);
-	}
 	if (started_tr)
 		hfs_end_transaction(hfsmp);
 
 	return (retval);
 
 restore:
-	if (fp->ff_blocks == headblks)
+	if (fp->ff_blocks == headblks) {
+		if (took_trunc_lock)
+			hfs_unlock_truncate(cp, TRUE);
 		goto exit;
+	}
 	/*
 	 * Give back any newly allocated space.
 	 */
@@ -2911,7 +3895,7 @@ restore:
 	lockflags = 0;
 
 	if (took_trunc_lock)
-		hfs_unlock_truncate(cp);
+		hfs_unlock_truncate(cp, TRUE);
 	goto exit;
 }
 
@@ -2921,7 +3905,7 @@ restore:
  *
  */
 static int
-hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
+hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
 {
 	struct buf *head_bp = NULL;
 	struct buf *tail_bp = NULL;
@@ -2957,16 +3941,14 @@ static int
 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
 {
 	caddr_t  bufp;
-	size_t  writebase;
 	size_t  bufsize;
 	size_t  copysize;
         size_t  iosize;
-	off_t	filesize;
 	size_t  offset;
+	off_t	writebase;
 	uio_t auio;
 	int  error = 0;
 
-	filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
 	writebase = blkstart * blksize;
 	copysize = blkcnt * blksize;
 	iosize = bufsize = MIN(copysize, 128 * 1024);
@@ -2977,30 +3959,30 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
 	}	
 	hfs_unlock(VTOC(vp));
 
-	auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
+	auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
 
 	while (offset < copysize) {
 		iosize = MIN(copysize - offset, iosize);
 
-		uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
+		uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
 		uio_addiov(auio, (uintptr_t)bufp, iosize);
 
-		error = cluster_read(vp, auio, copysize, 0);
+		error = cluster_read(vp, auio, copysize, IO_NOCACHE);
 		if (error) {
 			printf("hfs_clonefile: cluster_read failed - %d\n", error);
 			break;
 		}
 		if (uio_resid(auio) != 0) {
-			printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
+			printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio));
 			error = EIO;		
 			break;
 		}
 
-		uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
+		uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
 		uio_addiov(auio, (uintptr_t)bufp, iosize);
 
-		error = cluster_write(vp, auio, filesize + offset,
-		                      filesize + offset + iosize,
+		error = cluster_write(vp, auio, writebase + offset,
+		                      writebase + offset + iosize,
 		                      uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
 		if (error) {
 			printf("hfs_clonefile: cluster_write failed - %d\n", error);
@@ -3015,11 +3997,25 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
 	}
 	uio_free(auio);
 
-	/*
-	 * No need to call ubc_sync_range or hfs_invalbuf
-	 * since the file was copied using IO_NOCACHE.
-	 */
-
+	if ((blksize & PAGE_MASK)) {
+		/*
+		 * since the copy may not have started on a PAGE
+		 * boundary (or may not have ended on one), we 
+		 * may have pages left in the cache since NOCACHE
+		 * will let partially written pages linger...
+		 * lets just flush the entire range to make sure
+		 * we don't have any pages left that are beyond
+		 * (or intersect) the real LEOF of this file
+		 */
+		ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
+	} else {
+		/*
+		 * No need to call ubc_sync_range or hfs_invalbuf
+		 * since the file was copied using IO_NOCACHE and
+		 * the copy was done starting and ending on a page
+		 * boundary in the file.
+		 */
+	}
 	kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
 
 	hfs_lock(VTOC(vp), HFS_FORCE_LOCK);