From 0af7c7673fb2c9da95a48e7e8092324dc441ed24 Mon Sep 17 00:00:00 2001
From: Apple <opensource@apple.com>
Date: Tue, 20 Apr 2021 00:35:11 +0000
Subject: [PATCH] hfs-556.100.11.tar.gz

---
 core/hfs.h                                  |  52 +-
 core/hfs_readwrite.c                        |  97 ++-
 core/hfs_vfsops.c                           |   6 +
 core/hfs_vnops.c                            |  20 +-
 core/hfs_xattr.c                            | 831 +++++++++++++++++++-
 hfs.xcodeproj/project.pbxproj               |   5 +-
 livefiles_hfs_plugin/lf_hfs_readwrite_ops.c |   3 +-
 livefiles_hfs_plugin/lf_hfs_vnops.c         |  34 +-
 tests/cases/test-cas-bsdflags.c             |  72 +-
 tests/cases/test-symlinks.c                 |  60 ++
 10 files changed, 1129 insertions(+), 51 deletions(-)
 create mode 100644 tests/cases/test-symlinks.c

diff --git a/core/hfs.h b/core/hfs.h
index c450aea..9584c81 100644
--- a/core/hfs.h
+++ b/core/hfs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -159,6 +159,48 @@ extern struct timezone gTimeZone;
 
 /* Internal Data structures*/
 
+#define NEW_XATTR TARGET_CPU_ARM64
+
+#if NEW_XATTR
+
+// number of concurrent xattr IOs permitted
+#define XATTR_NUM_FEXTS 64u
+// bitmap size, in 64bit words, used to track concurrent xattr IOs
+#define XATTR_BM_SIZE ((XATTR_NUM_FEXTS) / (NBBY * sizeof(uint64_t)))
+
+/*
+ * State for all stream based xattr IO on a volume.
+ *
+ * Fixed size thread local storage for use across the IO stack.
+ *
+ * Fields:
+ * - lock
+ *   A spin lock used to access both the free file extent bitmap, `free_bm',
+ *   and the xattr file extents themselves, `xattr_fexts'.
+ * - free_bm
+ *   This bitmap represents the unused indices of `xattr_fexts'. The name is
+ *   somewhat backwards: set bits (with a value of one) denote *used* indices.
+ * - xattr_fexts
+ *   These are references to xattr file extents used as thread local storage to
+ *   communicate from top level xattr IO functions, through the cluster layer,
+ *   down into lower level IO functions (hfs_vnop_blockmap()).
+ *   Each index which contains a non-NULL fext should have its corresponding
+ *   bit within `free_bm' set. Though the reverse is not strictly true: a bit
+ *   within the bitmap may be set before the corresponding fext is. This is
+ *   the case when memory for a file extent reference is preallocated before
+ *   the value of the file extent is available.
+ *   This is an array of pointers rather than an array of file extents because:
+ *   - it avoids both copying a full fext into and out from the array
+ *   - it prints more nicely in lldb
+ */
+typedef struct {
+	lck_spin_t lock;
+	uint64_t free_bm[XATTR_BM_SIZE];
+	const HFSPlusExtentDescriptor *xattr_fexts[XATTR_NUM_FEXTS];
+} xattr_io_info_t;
+
+#endif
+
 /* This structure describes the HFS specific mount structure data. */
 typedef struct hfsmount {
 	u_int32_t     hfs_flags;              /* see below */
@@ -191,6 +233,9 @@ typedef struct hfsmount {
 	struct vnode *		hfs_attribute_vp;
 	struct vnode *		hfs_startup_vp;
 	struct vnode *		hfs_attrdata_vp;   /* pseudo file */
+#if NEW_XATTR
+	xattr_io_info_t		hfs_xattr_io;   /* Note, this is a big structure ~(64 * 8 bytes) */
+#endif
 	struct cnode *		hfs_extents_cp;
 	struct cnode *		hfs_catalog_cp;
 	struct cnode *		hfs_allocation_cp;
@@ -1140,6 +1185,11 @@ extern int hfs_isrbtree_active (struct hfsmount *hfsmp);
 extern errno_t hfs_get_fsinfo(struct hfsmount *hfsmp, void *a_data);
 extern void hfs_fsinfo_data_add(struct hfs_fsinfo_data *fsinfo, uint64_t entry);
 
+#if NEW_XATTR
+bool hfs_xattr_fext_find(xattr_io_info_t *info, uint32_t fs_bsize,
+		uint64_t off, HFSPlusExtentDescriptor *out, uint64_t *off_out);
+#endif
+
 struct hfs_sysctl_chain {
 	struct sysctl_oid *oid;
 	struct hfs_sysctl_chain *next;
diff --git a/core/hfs_readwrite.c b/core/hfs_readwrite.c
index d7a070e..068fca3 100644
--- a/core/hfs_readwrite.c
+++ b/core/hfs_readwrite.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -2911,22 +2911,53 @@ fail_change_next_allocation:
 							 NULL);
 
 	case FSIOC_CAS_BSDFLAGS: {
-		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
-			return (EROFS);
-		}
-
-#if 0
 		struct fsioc_cas_bsdflags *cas = (void *)ap->a_data;
 		struct cnode *cp = VTOC(vp);
 		u_int32_t document_id = 0;
+		bool need_truncate = false;
 		int decmpfs_reset_state = 0;
 		int error;
 
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+
 		/* Don't allow modification of the journal. */
 		if (hfs_is_journal_file(hfsmp, cp)) {
 			return (EPERM);
 		}
 
+		// Check if we need to set UF_COMPRESSED.
+		// If so, ask decmpfs if we're allowed to (and if so, if we need to truncate
+		// the data fork to 0).
+		if (!(cas->expected_flags & UF_COMPRESSED) && (cas->new_flags & UF_COMPRESSED)) {
+			struct vnode_attr vap;
+			VATTR_INIT(&vap);
+			VATTR_SET(&vap, va_flags, cas->new_flags);
+
+			error = decmpfs_update_attributes(vp, &vap);
+			if (error) {
+				return (error);
+			}
+
+			// Similar to hfs_vnop_setattr(), we call decmpfs_update_attributes()
+			// as it is the ultimate arbiter of whether or not UF_COMPRESSED can be set.
+			// (If the decmpfs xattr is not present or invalid, for example,
+			// UF_COMPRESSED should *not* be set.)
+			// It will also tell us if we need to truncate the data fork to 0.
+			if (!(vap.va_flags & UF_COMPRESSED)) {
+				// The request to update UF_COMPRESSED is denied.
+				// (Note that decmpfs_update_attributes() won't touch va_active
+				// in this case.) Error out.
+				return (EPERM);
+			}
+
+			if (VATTR_IS_ACTIVE(&vap, va_data_size) && (vap.va_data_size == 0)) {
+				// We must also truncate this file's data fork to 0.
+				need_truncate = true;
+			}
+		}
+
 		if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
 			return (error);
 		}
@@ -2973,17 +3004,11 @@ fail_change_next_allocation:
 			}
 		}
 
-		bool setting_compression = false;
-
-		if (!(cas->actual_flags & UF_COMPRESSED) && (cas->new_flags & UF_COMPRESSED))
-			setting_compression = true;
-
-		if (setting_compression) {
+		// Attempt to truncate our data fork to 0 length, if necessary.
+		if (need_truncate && (VTOF(vp)->ff_size)) {
 			hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
-			if (VTOF(vp)->ff_size) {
-				// hfs_truncate will deal with the cnode lock
-				error = hfs_truncate(vp, 0, IO_NDELAY, 0, ap->a_context);
-			}
+			// hfs_truncate will deal with the cnode lock
+			error = hfs_truncate(vp, 0, IO_NDELAY, 0, ap->a_context);
 			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
 		}
 
@@ -3021,9 +3046,7 @@ fail_change_next_allocation:
 			decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
 		}
 #endif
-		break;
-#endif
-		return ENOTSUP;
+		break; // return 0 below
 	}
 
 	default:
@@ -3459,8 +3482,41 @@ retry:
 		}
 	}
 
+#if NEW_XATTR
+	// check for the alternate xattr vnode
+	if (vp == hfsmp->hfs_attrdata_vp) {
+		HFSPlusExtentDescriptor real_fext;
+		size_t availableBytes;
+		u_int32_t sectorsPerBlock;		// Number of sectors per allocation block
+		u_int32_t sectorSize;
+		uint64_t f_offset;
+
+		if (!hfs_xattr_fext_find(&hfsmp->hfs_xattr_io, hfsmp->blockSize,
+								 ap->a_foffset, &real_fext, &f_offset)) {
+			panic("cannot find xattr fext for %llu", f_offset);
+		}
+
+		sectorSize = hfsmp->hfs_logical_block_size;
+		//	Compute the number of sectors in an allocation block
+		sectorsPerBlock = hfsmp->blockSize / sectorSize;
+
+		*ap->a_bpn = (f_offset / hfsmp->blockSize) * sectorsPerBlock;
+		availableBytes = real_fext.blockCount * hfsmp->blockSize - (f_offset - (real_fext.startBlock * hfsmp->blockSize));
+		if (availableBytes < bytesContAvail) {
+			bytesContAvail = availableBytes;
+		}
+
+		goto got_fext;
+	}
+#endif
+
 	retval = MapFileBlockC(hfsmp, (FCB *)fp, bytesContAvail, ap->a_foffset,
 	                       ap->a_bpn, &bytesContAvail);
+
+#if NEW_XATTR
+got_fext:
+#endif
+
 	if (syslocks) {
 		hfs_systemfile_unlock(hfsmp, lockflags);
 		syslocks = 0;
@@ -4279,7 +4335,8 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 	hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
 
 	if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
-		goto Err_Exit;
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+		return (retval);
 	}
 	
 	fp = VTOF(vp);
diff --git a/core/hfs_vfsops.c b/core/hfs_vfsops.c
index 2859ac2..a7cc095 100644
--- a/core/hfs_vfsops.c
+++ b/core/hfs_vfsops.c
@@ -1384,6 +1384,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 	lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
 	lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
 	lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
+#if NEW_XATTR
+	lck_spin_init(&hfsmp->hfs_xattr_io.lock, hfs_spinlock_group, hfs_lock_attr);
+#endif
 
 	if (mp)
 		vfs_setfsprivate(mp, hfsmp);
@@ -2782,6 +2785,9 @@ hfs_locks_destroy(struct hfsmount *hfsmp)
 	lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
 	lck_rw_destroy(&hfsmp->hfs_global_lock, hfs_rwlock_group);
 	lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
+#if NEW_XATTR
+	lck_spin_destroy(&hfsmp->hfs_xattr_io.lock, hfs_spinlock_group);
+#endif
 
 	return;
 }
diff --git a/core/hfs_vnops.c b/core/hfs_vnops.c
index 237f071..cf6d106 100644
--- a/core/hfs_vnops.c
+++ b/core/hfs_vnops.c
@@ -5538,10 +5538,6 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap)
 	if (VTOVCB(dvp)->vcbSigWord != kHFSPlusSigWord)
 		return (ENOTSUP);
 
-	/* Check for empty target name */
-	if (ap->a_target[0] == 0)
-		return (EINVAL);
-
 	hfsmp = VTOHFS(dvp);
 	
 	len = strlen(ap->a_target);
@@ -5569,6 +5565,9 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap)
 	    goto out;
 	}
 
+	if (!len)
+		goto out;
+
 #if QUOTA
 	(void)hfs_getinoquota(cp);
 #endif /* QUOTA */
@@ -6063,10 +6062,21 @@ hfs_vnop_readlink(struct vnop_readlink_args *ap)
 	fp = VTOF(vp);
    
 	/* Zero length sym links are not allowed */
-	if (fp->ff_size == 0 || fp->ff_size > MAXPATHLEN) {
+	if (fp->ff_size > MAXPATHLEN) {
 		error = EINVAL;
 		goto exit;
 	}
+
+	/*
+	 * If this is an empty symlink, we are done. The call to uiomove() is
+	 * just for debug and diagnostics.
+	 */
+	if (fp->ff_size == 0) {
+		static uint8_t empty_str[] = {'\0'};
+
+		error = uiomove((caddr_t)empty_str, 0, ap->a_uio);
+		goto exit;
+	}
     
 	/* Cache the path so we don't waste buffer cache resources */
 	if (fp->ff_symlinkptr == NULL) {
diff --git a/core/hfs_xattr.c b/core/hfs_xattr.c
index 311b901..4dba366 100644
--- a/core/hfs_xattr.c
+++ b/core/hfs_xattr.c
@@ -98,6 +98,91 @@ static int  has_overflow_extents(HFSPlusForkData *forkdata);
 
 static int  count_extent_blocks(int maxblks, HFSPlusExtentRecord extents);
 
+#if NEW_XATTR
+
+/*
+ * Iterator over set bits within an xattr free fext bitmap.
+ *
+ * See xbm_make_iter() for details on creation.
+ */
+typedef struct {
+	const uint64_t *bitmap;
+	size_t i;
+	size_t len;
+} xbm_iter_t;
+
+// xattr IO subsystem assertion
+#define xattr_assert(cond) if(!(cond)) do { \
+	panic("xattr_assert() failed: %s",#cond); \
+} while(0)
+
+#define xattr_infomsg(...) // errmsg(__VA_ARGS__)
+
+/*
+ * Purely for documentation purposes, simulate C++ const_cast<>() in the C
+ * language.
+ * This makes it quite obvious that the `const' part of a cast is being removed
+ * to workaround an api that accepts a non-const pointer but otherwise has no
+ * reason to modify its value.
+ * E.g.
+ *
+ * ancient_sum(a, count)
+ * int *a; int count;
+ * {
+ *   int s;
+ *   s = 0;
+ *   while (count--) {
+ *     s = s + a[i];
+ *   }
+ *   return s;
+ * }
+ *
+ * int modern_sum(const int *a, int count) {
+ *   return ancient_sum(const_cast(int *, a), count);
+ * }
+ */
+#define const_cast(type, expr) ((type)(expr))
+
+static uint64_t fext2cluster(const HFSPlusExtentDescriptor *fext, uint32_t fs_bsize);
+static bool cluster_off_in_fext(uint64_t off, const HFSPlusExtentDescriptor *fext,
+		uint32_t fs_bsize);
+
+static bool fext2cluster_check(const HFSPlusExtentDescriptor *fext, uint32_t fs_bsize,
+		uint64_t *out);
+static uint32_t xattr_cluster_scale(uint32_t fs_bsize);
+
+// xattr fext thread local storage routines
+static const HFSPlusExtentDescriptor **_Nullable xattr_fext_alloc(
+		xattr_io_info_t *info) __attribute__((warn_unused_result));
+static void xattr_fext_free(xattr_io_info_t *info,
+		const HFSPlusExtentDescriptor **xfext);
+static void xattr_fext_set(xattr_io_info_t *info,
+		const HFSPlusExtentDescriptor **xfext, const HFSPlusExtentDescriptor *fext);
+static void xattr_fext_clear(xattr_io_info_t *info,
+		const HFSPlusExtentDescriptor **xfext);
+
+static size_t xattr_fext_index(const xattr_io_info_t *info,
+		const HFSPlusExtentDescriptor **xfext);
+
+// xattr fext free bitmap routines, namespace `xbm_'
+static bool xbm_find_free(const xattr_io_info_t *info, size_t *out);
+static void xbm_set_used(xattr_io_info_t *info, size_t i);
+static void xbm_clear_used(xattr_io_info_t *info, size_t i);
+
+static bool xbm_valid_index(const xattr_io_info_t *info, int64_t i);
+static size_t xbm_size(const xattr_io_info_t *info);
+
+// bitmap iterator functions
+static xbm_iter_t xbm_make_iter(const uint64_t *bitmap, size_t len);
+static bool xbm_iter_next(xbm_iter_t *ier);
+static size_t xbm_iter_peek(const xbm_iter_t *ier);
+
+// bitmap_ wrappers under namespace bm_
+static bool bm_find(const uint64_t *bm,
+		size_t from, size_t len, bool value, size_t *out);
+static bool bm_valid_index(int64_t i, size_t len);
+#endif
+
 #if NAMEDSTREAMS
 /*
  * Obtain the vnode for a stream.
@@ -1080,12 +1165,6 @@ int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsi
 	
 	/* If it won't fit inline then use extent-based attributes. */
 	if (attrsize > hfsmp->hfs_max_inline_attrsize) {
-#if (TARGET_OS_OSX && TARGET_CPU_ARM64)
-		printf("hfs_setxattr: non-inline attributes are not supported\n");
-		//NOTE: ENOTSUP will fool XNU into thinking we need AppleDouble files...
-		result = EPERM;
-		goto exit;
-#else
 		int blkcnt;
 		int extentblks;
 		u_int32_t *keystartblk;
@@ -1186,7 +1265,6 @@ int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsi
 			extentblks = count_extent_blocks(blkcnt, recp->overflowExtents.extents);
 			blkcnt -= extentblks;
 		}
-#endif //(TARGET_OS_OSX && TARGET_CPU_ARM64)
 	} else { /* Inline data */ 
 		if (exists) {
 			result = remove_attribute_records(hfsmp, iterator);
@@ -2379,11 +2457,675 @@ int init_attrdata_vnode(struct hfsmount *hfsmp)
 				 &cat_fork, &vp, &newvnode_flags);
 	if (result == 0) {
 		hfsmp->hfs_attrdata_vp = vp;
+#if NEW_XATTR
+		vnode_setnoreadahead(hfsmp->hfs_attrdata_vp);
+#endif
 		hfs_unlock(VTOC(vp));
 	}
 	return (result);
 }
 
+/* The following code (inside NEW_XATTR) was ported from apfs. */
+#if NEW_XATTR
+/*
+ * This is the same as fext2cluster_check(), but overflow is asserted against.
+ *
+ * This is useful for places which hold the invariant that `fext' is already
+ * representable when translated but additionally want to assert that that is
+ * the case.
+ */
+static uint64_t
+fext2cluster(const HFSPlusExtentDescriptor *fext, uint32_t fs_bsize)
+{
+	uint64_t off;
+	const bool ok = fext2cluster_check(fext, fs_bsize, &off);
+	xattr_assert(ok);
+	return off;
+}
+
+/*
+ * Translate `fext' to a cluster layer virtual offset, set via `out', that is
+ * suitable for IO to the single xattr vnode.
+ *
+ * For any particular file extent, this will map to a unique logical offset
+ * that may be used to key into the ubc. The returned value has the property
+ * such that file extents on adjacent physical blocks will always be mapped to
+ * different page sized multiples. Internally, this just multiplies by the
+ * larger of the pagesize and the blocksize (see xattr_cluster_scale() for the
+ * derivation). For further details on the importance of this in the overall
+ * scheme of xattr IO, see uio_set_fext().
+ *
+ * This return trues if `fext' is representable in cluster layer virtual
+ * offset. It may return false for corrupted file extents:
+ * - extents that likely extend beyond the size of the underlying drive
+ *
+ * The second point should not happen under normal circumstances even for large
+ * drives. Large drives (by the logic in hfs_newfs()) are automatically
+ * formatted to use large block sizes: drives sized >= 16TB use 16kiB fs
+ * blocksize, at which point the virtual offset computed is equal to the device
+ * offset (even on a 16kiB pagesize system).
+ * So as long as a drive does not exceed 2^63 bytes in capacity (which is the
+ * precision of `off_t'), the internal multiplication should not overflow.
+ */
+static bool
+fext2cluster_check(const HFSPlusExtentDescriptor *fext, uint32_t fs_bsize,
+		uint64_t *out)
+{
+	const uint64_t pbn = fext->startBlock;
+	
+	// xattrs has invalid start block
+	if (!pbn) {
+		return false;
+	}
+
+	// scale pbn
+	uint64_t off;
+	if (__builtin_mul_overflow(pbn, xattr_cluster_scale(fs_bsize), &off)) {
+		return false;
+	}
+
+	*out = off;
+
+	// the whole fext should be in range
+	if (__builtin_add_overflow(off, fext->blockCount * fs_bsize, &off)) {
+		return false;
+	}
+
+	// don't exceed signed 64bit precision
+	if (off > INT64_MAX) {
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Return the scale factor for translating xattr physical block numbers into
+ * a logical offset into the single xattr vnode's ubc.
+ *
+ * Translated blocks have two key requirements:
+ * - They must not overlap.
+ *   Otherwise two different blocks' contents will collide.
+ * - They must not fall within the same page.
+ *   Otherwise reading a whole page may pull in multiple xattr's blocks, but
+ *   only decrypt with one of the xattr's keys.
+ *
+ * A table of all possible configurations:
+ *   pagesize, fs blocksize, scale
+ *   4k, 4k, 4k
+ *   4k, 8k, 8k
+ *   ...
+ *   4k, 64k, 64k
+ *   16k, 4k, 16k
+ *   16k, 8k, 16k
+ *   16k, 16k, 16k
+ *   16k, 32k, 32k
+ *   16k, 64k, 64k
+ *
+ * This may be expressed as
+ *   scale = max(pagesize, fs blocksize)
+ */
+static uint32_t
+xattr_cluster_scale(uint32_t fs_bsize)
+{
+	return MAX(PAGE_SIZE, fs_bsize);
+}
+
+/*
+ * See off_in_range().
+ */
+static bool
+off_in_range2(uint64_t off, uint64_t start, uint64_t len)
+{
+	return (start <= off) && (off < (start + len));
+}
+
+/*
+ * Return true if `off' returned from fext2cluster() was produced from
+ * `fext'.
+ */
+static bool
+cluster_off_in_fext(uint64_t off, const HFSPlusExtentDescriptor *fext,
+		uint32_t fs_bsize)
+{
+	return off_in_range2(off, fext2cluster(fext, fs_bsize), fext->blockCount * fs_bsize);
+}
+
+/*
+ * Allocate space to save a file extent reference for xattr IO.
+ *
+ * This provides a mechanism to communicate file extents from top level, stream
+ * based xattr IO functions down into lower level IO functions
+ * (_blockmap() and _strategy()).
+ *
+ * This doesn't really return a file extent, it returns a reference into
+ * `info->xattr_fexts' which may be pointed to a file extent reference.
+ * Alternatively this could just return an integral index index, but then we'd
+ * need some way to signal failure.
+ *
+ * Note: the returned reference cannot be assigned directly; it must be set
+ * via xattr_fext_set() to correctly synchronize with a racing call to
+ * xattr_fext_find().
+ *
+ * This call will not block; it will return NULL if no free spaces are
+ * available. On success, follow with a call to xattr_fext_free().
+ *
+ * In terms of the implementation, this is a basic zone allocator for thread
+ * local storage in disguise. it supports only one file extent to be
+ * set by up to 64 threads.
+ * For further details, see the documentation for each field above the
+ * definition of `xattr_io_info_t'.
+ */
+static const HFSPlusExtentDescriptor **
+xattr_fext_alloc(xattr_io_info_t *info)
+{
+	const HFSPlusExtentDescriptor **ret;
+	size_t i;
+
+	// search for the first free bit
+	lck_spin_lock(&info->lock);
+	if (!xbm_find_free(info, &i)) {
+		// no free fexts
+		ret = NULL;
+		goto fail;
+	}
+
+	// mark that position as allocated
+	xbm_set_used(info, i);
+	ret = &info->xattr_fexts[i];
+	xattr_assert(!*ret);
+
+fail:
+	lck_spin_unlock(&info->lock);
+	return ret;
+}
+
+/*
+ * Free the memory associated with an xattr fext referenced return from
+ * xattr_fext_alloc().
+ * This simply amounts to clearing the bit within `info->free_bm' that
+ * corresponds to `xfext'. While not strictly neccessary, we also clear out the
+ * xattr fext itself to hold the invariant that a clear bit within the free
+ * bitmap has a corresponding NULL fext reference.
+ */
+static void
+xattr_fext_free(xattr_io_info_t *info, const HFSPlusExtentDescriptor **xfext)
+{
+	lck_spin_lock(&info->lock);
+	const size_t i = xattr_fext_index(info, xfext);
+	xbm_clear_used(info, i);
+	info->xattr_fexts[i] = NULL;
+	lck_spin_unlock(&info->lock);
+}
+
+/*
+ * Given an allocated xattr fext from xattr_fext_alloc() assign it to reference
+ * `fext'. A copy of this fext may be returned by a subsequent call to
+ * xattr_fext_find().
+ *
+ * This may be called multiple times for the same value of `xfext'.
+ * `fext' will be borrowed until a subsequent call to xattr_fext_set() for a
+ * different file extent or xattr_fext_free() for `xfext'. It must have
+ * lifetime that spans at least as long from when it's first set to when it's
+ * cleared either by xattr_fext_free() or xattr_fext_clear().
+ *
+ * Note: `fext' may be introspected by other threads via xattr_fext_find()
+ * (and in terms of getxattr(), two threads may use each other's file extents
+ * if they race to read the same xattr).
+ */
+static void
+xattr_fext_set(xattr_io_info_t *info, const HFSPlusExtentDescriptor **xfext,
+		const HFSPlusExtentDescriptor *fext)
+{
+	xattr_assert(xbm_valid_index(info, xattr_fext_index(info, xfext)));
+	lck_spin_lock(&info->lock);
+	*xfext = fext;
+	lck_spin_unlock(&info->lock);
+}
+
+/*
+ * Given a cluster layer virtual offset, attempt to look up a file extent set
+ * via a previous call to xattr_fext_set().
+ *
+ * If such a fext is found, its value is copied to `out' and true is returned.
+ * Note: `out' should reference wired memory: it will be stored to while a spin
+ * lock is held; accesses must not fault.
+ *
+ * off_out will contain the "unvirtualized" offset
+ */
+bool
+hfs_xattr_fext_find(xattr_io_info_t *info, uint32_t fs_bsize, uint64_t off,
+				HFSPlusExtentDescriptor *out, uint64_t *off_out)
+{
+	bool found = false;
+	lck_spin_lock(&info->lock);
+
+	// search through all in-use fexts
+	xbm_iter_t iter = xbm_make_iter(info->free_bm, xbm_size(info));
+	while(xbm_iter_next(&iter)) {
+		const HFSPlusExtentDescriptor *fext = info->xattr_fexts[xbm_iter_peek(&iter)];
+		if (!fext || !cluster_off_in_fext(off, fext, fs_bsize)) {
+			continue;
+		}
+		// `off' intersects; return `fext'
+		*out = *fext;
+		found = true;
+		break;
+	}
+
+	lck_spin_unlock(&info->lock);
+
+	if (found) {
+		*off_out = ((uint64_t)out->startBlock * fs_bsize) + off - fext2cluster(out, fs_bsize);
+	}
+
+	return found;
+}
+
+/*
+ * Given an allocated xattr fext, clear its reference to any `fext' passed to a
+ * previous call to xattr_fext_set().
+ *
+ * This will end the lifetime of such a fext and prevent xattr_fext_find() from
+ * taking a reference to it. From here, its backing memory can be deallocated.
+ *
+ * Unlike xattr_fext_free(), `xfext' will remain allocated and it may passed to
+ * xattr_fext_set() again.
+ */
+static void
+xattr_fext_clear(xattr_io_info_t *info, const HFSPlusExtentDescriptor **xfext)
+{
+	xattr_assert(xbm_valid_index(info, xattr_fext_index(info, xfext)));
+	lck_spin_lock(&info->lock);
+	*xfext = NULL;
+	lck_spin_unlock(&info->lock);
+}
+
+/*
+ * For an xattr file extent, `xfext', returned from a previous call to
+ * xattr_fext_alloc(), return its index within `info->xattr_fexts'.
+ */
+static size_t
+xattr_fext_index(const xattr_io_info_t *info, const HFSPlusExtentDescriptor **xfext)
+{
+	xattr_assert((info->xattr_fexts <= xfext) &&
+			(xfext < &info->xattr_fexts[xbm_size(info)]));
+	return ((uintptr_t)xfext - (uintptr_t)info->xattr_fexts) / sizeof(*xfext);
+}
+
+static void
+bitmap_set_range(uint64_t *bm, int64_t index, int64_t count)
+{
+	int dstshift0, dstshift1;
+	uint64_t dstmask0, dstmask1;
+	int64_t bmi;
+
+	dstshift0 = index % 64;
+	dstshift1 = 64 - (index % 64);
+	dstmask0 = ~0ULL << (index % 64);
+	dstmask1 = (dstshift1 == 64) ? 0ULL : (~0ULL >> (64 - (index % 64)));
+
+	bmi = index / 64;
+	while (count >= 64) {
+		bm[bmi] = (bm[bmi] & ~dstmask0) | ((~0ULL << dstshift0) & dstmask0);
+		if (dstmask1)
+			bm[bmi + 1] = (bm[bmi + 1] & ~dstmask1) | ((~0ULL >> dstshift1) & dstmask1);
+		bmi++;
+		count -= 64;
+	}
+	if (count) {
+		// adjust dstmask to cover just the bits remaining
+		dstmask0 = ((1ULL << count) - 1) << (index % 64);
+		dstmask1 = (dstshift1 == 64) ? 0ULL : (((1ULL << count) - 1) >> (64 - (index % 64)));
+		bm[bmi] = (bm[bmi] & ~dstmask0) | ((~0ULL << dstshift0) & dstmask0);
+		if ((count > (64 - dstshift0)) && dstmask1)
+			bm[bmi + 1] = (bm[bmi + 1] & ~dstmask1) | ((~0ULL >> dstshift1) & dstmask1);
+	}
+}
+
+static void
+bitmap_clear_range(uint64_t *bm, int64_t index, int64_t count)
+{
+	int dstshift0, dstshift1;
+	uint64_t dstmask0, dstmask1;
+	int64_t bmi;
+
+	dstshift0 = index % 64;
+	dstshift1 = 64 - (index % 64);
+	dstmask0 = ~0ULL << (index % 64);
+	dstmask1 = (dstshift1 == 64) ? 0ULL : (~0ULL >> (64 - (index % 64)));
+
+	bmi = index / 64;
+	while (count >= 64) {
+		bm[bmi] = (bm[bmi] & ~dstmask0) | ((0ULL << dstshift0) & dstmask0);
+		if (dstmask1)
+			bm[bmi + 1] = (bm[bmi + 1] & ~dstmask1) | ((0ULL >> dstshift1) & dstmask1);
+		bmi++;
+		count -= 64;
+	}
+	if (count) {
+		// adjust dstmask to cover just the bits remaining
+		dstmask0 = ((1ULL << count) - 1) << (index % 64);
+		dstmask1 = (dstshift1 == 64) ? 0ULL : (((1ULL << count) - 1) >> (64 - (index % 64)));
+		bm[bmi] = (bm[bmi] & ~dstmask0) | ((0ULL << dstshift0) & dstmask0);
+		if ((count > (64 - dstshift0)) && dstmask1)
+			bm[bmi + 1] = (bm[bmi + 1] & ~dstmask1) | ((0ULL >> dstshift1) & dstmask1);
+	}
+}
+
+static int
+ctzll(uint64_t val)
+{
+	return (val == 0) ? 64 : __builtin_ctzll(val);
+}
+
+// search forwards through range and return index of first "bit" (0 or 1) encountered
+static int
+bitmap_range_find_first(int bit, const uint64_t *bm, int64_t index, int64_t count, int64_t *clear_index)
+{
+	int64_t bmi, check_count;
+	uint64_t val;
+	int pos;
+
+	bmi = index / 64;
+	while (count > 0) {
+		check_count = 64 - (index % 64);
+		if (check_count > count)
+			check_count = count;
+		val = bm[bmi] >> (index % 64);
+		if (!bit)
+			val = ~val;
+		pos = ctzll(val);
+		if (pos < check_count) {
+			*clear_index = index + pos;
+			return 1;
+		}
+		index += check_count;
+		count -= check_count;
+		bmi++;
+	}
+	return 0;
+}
+
+/*
+ * Search for the first free (clear) bit within `info's free xattr fext bitmap.
+ * Return false if no bits are clear.
+ * If some bit is found, its index is set to `*out' and true is returned.
+ */
+static bool
+xbm_find_free(const xattr_io_info_t *info, size_t *out)
+{
+	return bm_find(info->free_bm, 0, xbm_size(info), false, out);
+}
+
+/*
+ * Set the bit at index `i' within `info's underlying free xattr fext bitmap.
+ *
+ * info->lock must be held.
+ * It only makes sense to operate on one bit at a time, so this wraps
+ * bitmap_set_range().
+ */
+static void
+xbm_set_used(xattr_io_info_t *info, size_t i)
+{
+	xattr_assert(xbm_valid_index(info, i));
+	bitmap_set_range(info->free_bm, i, 1);
+}
+
+/*
+ * Clear the bit at index `i' within `info's underlying free xattr fext bitmap.
+ * This is the opposite of xbm_set_used().
+ */
+static void
+xbm_clear_used(xattr_io_info_t *info, size_t i)
+{
+	xattr_assert(xbm_valid_index(info, i));
+	bitmap_clear_range(info->free_bm, i, 1);
+}
+
+/*
+ * Return whether the given bitmap index is a valid index into `info's free
+ * bitmap.
+ */
+static bool
+xbm_valid_index(const xattr_io_info_t *info, int64_t i)
+{
+	return bm_valid_index(i, xbm_size(info));
+}
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0]))
+#endif
+
+/*
+ * Return the total number of *bits* in `info's free xattr fext bitmap.
+ */
+static size_t
+xbm_size(const xattr_io_info_t *info)
+{
+	// one bit per xattr fext
+	return ARRAY_SIZE(info->xattr_fexts);
+}
+
+/*
+ * Factory for an iterator over all the set (true value) bits in `bitmap'.
+ * `len' is the length in *bits* of `bitmap'.
+ *
+ * The iterator is created in an uninitialized state, a call to xbm_iter_next()
+ * is required to find the first set bit (this is different than
+ * make_fext_iterator()). As a consequence of this, an iterator may iterate
+ * zero times if no bits within `bitmap' are set. After each successful call to
+ * xbm_iter_next(), xbm_iter_peek() will return the zero-based index of each
+ * set bit.
+ *
+ * The intended use is along the lines of:
+ * uint64_t *bm = ...; // a bitmap 123 bits (two uint64_ts) long
+ * xbm_iterator_t iter = xbm_make_iter(bm, 123);
+ * while(xbm_iter_next(&iter)) {
+ *   size_t i = xbm_iter_peek(&iter);
+ *   printf("index %lu is set\n", i);
+ * }
+ *
+ * In terms of the iterator internals, we hold the invariant that a valid
+ * iterator always has `i' in [0, len). A valid iterator is one for which
+ * xbm_iter_peek() will return an index in range of `bitmap'. To bootstrap the
+ * first iteration, `i' is set to SIZE_MAX; further details are in
+ * xbm_iter_next().
+ */
+static xbm_iter_t
+xbm_make_iter(const uint64_t *bitmap, size_t len)
+{
+	xattr_assert(len);
+	return (xbm_iter_t) {
+		.bitmap = bitmap,
+		.i = SIZE_MAX, // This will overflow-to-zero on the first call to xbm_iter_next()
+		.len = len
+	};
+}
+
+/*
+ * Advance `iter' to internally reference the next set bit within the bitmap.
+ * If there are no more set bits, this returns false.
+ *
+ * On success, the index of the next set bit can be retrieved by
+ * xbm_iter_peek().
+ *
+ * Internally, this searches for the first bit set at bit index >= iter->i+1.
+ * For a new iterator, the value of `i' is initialized to SIZE_MAX so `i'+1
+ * will unsigned integer overflow (which is well defined) to zero.
+ */
+static bool
+xbm_iter_next(xbm_iter_t *iter)
+{
+	size_t i;
+	// find the next set bit > `i'
+	const bool found = bm_find(iter->bitmap, iter->i + 1, iter->len, true, &i);
+
+	// if no bit is found, invalidate the iterator by setting i=len
+	iter->i = found ? i : iter->len;
+	return found;
+}
+
+/*
+ * Return the index a the set bit after a successful call to xbm_iter_next().
+ */
+static size_t
+xbm_iter_peek(const xbm_iter_t *iter)
+{
+	xattr_assert(iter->i < iter->len);
+	return iter->i;
+}
+
+/*
+ * Search for the first bit with `value' within bitmap `bm' >= bit index `from'
+ * for at most `len' *bits*. Whether such a bit exists is returned and if
+ * that's the case, its bit index is written via `out'.
+ *
+ * This is just a fancy wrapper around bitmap_range_find_first().
+ */
+static bool
+bm_find(const uint64_t *bm, size_t from, size_t len, bool value, size_t *out)
+{
+	xattr_assert(bm_valid_index(from, len));
+
+	// search for `value' in [from, len)
+	int64_t i;
+	if (!bitmap_range_find_first(value,
+				const_cast(uint64_t *, bm), from, len, &i)) {
+		return false;
+	}
+
+	// some bit found; check the returned index is valid
+	xattr_assert(bm_valid_index(i, len));
+	*out = (size_t)i;
+	return true;
+}
+
+/*
+ * Return true if `i' is a valid index into a bit of `len' bits.
+ *
+ * The underlying bitmap_ routines operate on `int64_t' indices. This is
+ * mainly to safely convert to `size_t'.
+ */
+static bool
+bm_valid_index(int64_t i, size_t len)
+{
+	return (i >= 0) && ((uint64_t)i < len);
+}
+
+/*
+ * Virtualize `uio' offset to target xattr `fext' before a call to
+ * cluster_xattr().
+ *
+ * The computation of the IO offset is somewhat subtle. The reason for this
+ * fact largely has to do with how exactly the single xattr vnode
+ * (hfsmp->hfs_attrdata_vp) caches data for multiple xattrs. First,
+ * some discussion on the motivation for the single xattr vnode design. At the
+ * top level, xattr apis are quite different from normal file data apis. Some
+ * key properties are:
+ * - xattr IO apis do no support editing or random reads
+ * - xattrs may not be mmapped
+ * - any one file may have an arbitrary number of xattrs
+ * To contrast with a normal file, each file has a corresponding vnode which in
+ * turn has its own private ubc. The only way in which xattrs are actually like
+ * files is that they the have the same size limits and in
+ * terms of their implementation, they use the same on-disk structures.
+ * The result of this is that it is too high overhead to have one vnode per
+ * xattr, but to instead to maintain a disk block-type cache for xattr data.
+ * This cache is implemented as the ubc of a virtual file known as the single
+ * xattr vnode. Reads and writes are serviced by the cluster layer. The cluster
+ * layer operates in units of the vm pagesize. On a system for which
+ * pagesize > fs blocksize, then the naÃ¯ve approach of using an identity
+ * mapping between ubc logical offset and device offset poses a challenge.
+ * Consider the following scenario:
+ * - 16k vm pagesize
+ * - 4k fs blocksize
+ *
+ * On disk, we have two xattrs that reside on adjacent blocks:
+ *       xattr A        xattr B
+ *          [aaaa|aaaa|bbbb|bbbb]
+ *      pbn 4    5    6    7    8
+ *
+ * Suppose we want to just read xattr A -- pbn 4 and 5 -- the cluster layer can
+ * issue just an 8k IO, but it will store it in memory as a whole page, so we
+ * would result in
+ *
+ *       xattr A        xattr B
+ * in memory:
+ *          [aaaa|aaaa|0000|0000]
+ *
+ * on disk:
+ *          [aaaa|aaaa|bbbb|bbbb]
+ *      pbn 4    5    6    7    8
+ *
+ * A subsequent read for pbn 6 or 7, as a part of xattr B, will find the page
+ * already cached in the ubc and erroneously return zeros.
+ * Instead, we could have the cluster layer issue the full 16k IO, but then we
+ * run into encryption issues on per-file (really per-xattr) key volumes
+ *
+ *       xattr A        xattr B
+ * in memory:
+ *          [aaaa|aaaa|O!#W|JF%R]
+ *
+ * on disk:
+ *          [asdf|ghjk|ZXCV|BNM,]  encrypted
+ *          [aaaa|aaaa|bbbb|bbbb]  unencrypted
+ *      pbn 4    5    6    7    8
+ *
+ * In this case, the issue is that we have the crypto state available to read
+ * xattr A, but we do not have the crypto state for xattr B, so we would
+ * incorrectly decrypt pbn 6, 7 in the same IO.
+ *
+ * The solution to this is to use a scaled mapping between ubc logical offset
+ * and device offset. In this case, we use
+ *   logical offset = physical block number * pagesize
+ * and result looks like
+ *
+ *       xattr A        xattr B
+ * in memory:
+ *          [aaaa|aaaa|0000|0000] ... [bbbb|bbbb|0000|0000]
+ *          64k                       96k
+ *
+ * on disk:
+ *          [asdf|ghjk|ZXCV|BNM,]  encrypted
+ *          [aaaa|aaaa|bbbb|bbbb]  unencrypted
+ *      pbn 4    5    6    7    8
+ *
+ * In memory, xattr A occupies the virtual range of [64k, 96k), but its
+ * contents are representable in the first 8k out of [64k, 80k). Note that the
+ * mapping here is not per block but rather per xattr file extent. The contents
+ * tracked by an individual file extent are logically contiguous in memory. In
+ * the example above, xattr A has one file extent spanning [0, 8k). Suppose it
+ * instead had two file extents -- [0, 4k) at pbn 4 and [4k, 8k) at pbn 5 --
+ * the above diagram would instead look like
+ * in memory:
+ *          [aaaa|0000|0000|0000][aaaa|0000|0000|0000]
+ *          64k                  80k
+ * on disk:
+ *          [aaaa|aaaa]            unencrypted
+ *      pbn 4    5
+ *
+ * This scaled mapping approach guarantees that xattrs are always on different
+ * pages from other xattrs, but it comes at an increased memory cost for
+ * non-page multiple sized xattrs.
+ * --
+ *
+ * If `fext' is not representable as a virtual offset (e.g. its phys_block_num
+ * is corrupt), this function returns false.
+ */
+static bool
+uio_set_fext(uio_t uio, const HFSPlusExtentDescriptor *fext, uint32_t fs_bsize)
+{
+	uint64_t off;
+	if (!fext2cluster_check(fext, fs_bsize, &off)) {
+		// `fext' is out of range
+		return false;
+	}
+
+	uio_setoffset(uio, off);
+	return true;
+}
+#endif  // NEW_XATTR
 /*
  * Read an extent based attribute.
  */
@@ -2391,6 +3133,7 @@ static int
 read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtentDescriptor *extents)
 {
 	vnode_t evp = hfsmp->hfs_attrdata_vp;
+	off_t filesize;
 	int bufsize;
 	int64_t iosize;
 	int attrsize;
@@ -2403,7 +3146,16 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent
 	bufsize = (int)uio_resid(uio);
 	attrsize = (int)datasize;
 	blksize = (int)hfsmp->blockSize;
+	filesize = VTOF(evp)->ff_size;
 
+#if NEW_XATTR
+	// allocate an xattr fext for tls through the cluster layer
+	const HFSPlusExtentDescriptor **xattr_fext;
+	if (!(xattr_fext = xattr_fext_alloc(&hfsmp->hfs_xattr_io))) {
+		result = ENOMEM;
+		goto exit;
+	}
+#endif
 	/*
 	 * Read the attribute data one extent at a time.
 	 * For the typical case there is only one extent.
@@ -2413,9 +3165,28 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent
 		iosize = MIN(iosize, attrsize);
 		iosize = MIN(iosize, bufsize);
 		uio_setresid(uio, iosize);
+#if NEW_XATTR
+		// virtualize the IO offset to target this fext
+		if (!uio_set_fext(uio, &extents[i], blksize)) {
+			// `fext' is corrupted
+			result = EILSEQ;
+			break;
+		}
+
+		// stage the next xattr fext for IO
+		xattr_fext_set(&hfsmp->hfs_xattr_io, xattr_fext, &extents[i]);
+
+		// Set filesize to end of data read to prevent cluster read-ahead
+		filesize = uio_offset(uio) + iosize;
+#else
 		uio_setoffset(uio, (u_int64_t)extents[i].startBlock * (u_int64_t)blksize);
+#endif
+		result = cluster_read(evp, uio, filesize, IO_SYNC | IO_UNIT);
 
-		result = cluster_read(evp, uio, VTOF(evp)->ff_size, IO_SYNC | IO_UNIT);
+#if NEW_XATTR
+		// post IO, unstage this xattr fext
+		xattr_fext_clear(&hfsmp->hfs_xattr_io, xattr_fext);
+#endif
 
 #if HFS_XATTR_VERBOSE
 		printf("hfs: read_attr_data: cr iosize %lld [%d, %d] (%d)\n",
@@ -2429,6 +3200,11 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent
 	uio_setresid(uio, bufsize);
 	uio_setoffset(uio, datasize);
 
+#if NEW_XATTR
+	xattr_fext_free(&hfsmp->hfs_xattr_io, xattr_fext);
+
+exit:
+#endif
 	hfs_unlock_truncate(VTOC(evp), HFS_LOCK_DEFAULT);
 	return (result);
 }
@@ -2436,7 +3212,7 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent
 /*
  * Write an extent based attribute.
  */
-__unused static int
+static int
 write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtentDescriptor *extents)
 {
 	vnode_t evp = hfsmp->hfs_attrdata_vp;
@@ -2455,6 +3231,15 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten
 	blksize = (int) hfsmp->blockSize;
 	filesize = VTOF(evp)->ff_size;
 
+#if NEW_XATTR
+	// allocate an xattr fext for tls through the cluster layer
+	const HFSPlusExtentDescriptor **xattr_fext;
+	if (!(xattr_fext = xattr_fext_alloc(&hfsmp->hfs_xattr_io))) {
+		result = ENOMEM;
+		goto exit;
+	}
+#endif
+
 	/*
 	 * Write the attribute data one extent at a time.
 	 */
@@ -2463,10 +3248,29 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten
 		iosize = MIN(iosize, attrsize);
 		iosize = MIN(iosize, bufsize);
 		uio_setresid(uio, iosize);
-		uio_setoffset(uio, (u_int64_t)extents[i].startBlock * (u_int64_t)blksize);
+#if NEW_XATTR
+		// virtualize the IO offset to target this fext
+		if (!uio_set_fext(uio, &extents[i], blksize)) {
+			// `fext' is corrupted
+			result = EILSEQ;
+			break;
+		}
+
+		// stage the next xattr fext for IO
+		xattr_fext_set(&hfsmp->hfs_xattr_io, xattr_fext, &extents[i]);
 
+		filesize = uio_offset(uio) + iosize;
+#else
+		uio_setoffset(uio, (u_int64_t)extents[i].startBlock * (u_int64_t)blksize);
+#endif
 		result = cluster_write(evp, uio, filesize, filesize, filesize,
 		                       (off_t) 0, IO_SYNC | IO_UNIT);
+
+#if NEW_XATTR
+		// post IO, unstage this xattr fext
+		xattr_fext_clear(&hfsmp->hfs_xattr_io, xattr_fext);
+#endif
+
 #if HFS_XATTR_VERBOSE
 		printf("hfs: write_attr_data: cw iosize %lld [%d, %d] (%d)\n",
 			iosize, extents[i].startBlock, extents[i].blockCount, result);
@@ -2479,6 +3283,11 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten
 	uio_setresid(uio, bufsize);
 	uio_setoffset(uio, datasize);
 
+#if NEW_XATTR
+	xattr_fext_free(&hfsmp->hfs_xattr_io, xattr_fext);
+
+exit:
+#endif
 	hfs_unlock_truncate(VTOC(evp), HFS_LOCK_DEFAULT);
 	return (result);
 }
@@ -2486,7 +3295,7 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten
 /*
  * Allocate blocks for an extent based attribute.
  */
-__unused static int
+static int
 alloc_attr_blks(struct hfsmount *hfsmp, size_t attrsize, size_t extentbufsize, HFSPlusExtentDescriptor *extents, int *blocks)
 {
 	int blkcnt;
diff --git a/hfs.xcodeproj/project.pbxproj b/hfs.xcodeproj/project.pbxproj
index f699857..1e206d7 100644
--- a/hfs.xcodeproj/project.pbxproj
+++ b/hfs.xcodeproj/project.pbxproj
@@ -328,6 +328,7 @@
 		A6873B9C234286780045680B /* lf_cs_checksum.c in Sources */ = {isa = PBXBuildFile; fileRef = A64B3BFA22E8D538009A2B10 /* lf_cs_checksum.c */; };
 		A6873B9D2342868D0045680B /* lf_cs_vfsops.c in Sources */ = {isa = PBXBuildFile; fileRef = A64B3C0422E8D71B009A2B10 /* lf_cs_vfsops.c */; };
 		A6873B9F234287200045680B /* IOKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = A6873B9E234287200045680B /* IOKit.framework */; };
+		A6BE9C8C24C63369005E033C /* test-symlinks.c in Sources */ = {isa = PBXBuildFile; fileRef = A6BE9C8B24C63369005E033C /* test-symlinks.c */; };
 		A6E6D74020909C72002125B0 /* test-get-volume-create-time.c in Sources */ = {isa = PBXBuildFile; fileRef = A6E6D73F20909C72002125B0 /* test-get-volume-create-time.c */; };
 		C1B6FA0810CC0A0A00778D48 /* hfsutil_jnl.c in Sources */ = {isa = PBXBuildFile; fileRef = C1B6FA0610CC0A0A00778D48 /* hfsutil_jnl.c */; };
 		C1B6FA0910CC0A0A00778D48 /* hfsutil_main.c in Sources */ = {isa = PBXBuildFile; fileRef = C1B6FA0710CC0A0A00778D48 /* hfsutil_main.c */; };
@@ -360,7 +361,6 @@
 		D7978426205FC09A00E93B37 /* lf_hfs_endian.h in Headers */ = {isa = PBXBuildFile; fileRef = D7978424205FC09A00E93B37 /* lf_hfs_endian.h */; };
 		D79784412060037400E93B37 /* lf_hfs_raw_read_write.h in Headers */ = {isa = PBXBuildFile; fileRef = D797843F2060037400E93B37 /* lf_hfs_raw_read_write.h */; };
 		D79784422060037400E93B37 /* lf_hfs_raw_read_write.c in Sources */ = {isa = PBXBuildFile; fileRef = D79784402060037400E93B37 /* lf_hfs_raw_read_write.c */; };
-		D7B2DC81233A3F5B00F12230 /* livefiles_hfs.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 900BDED41FF919C2002F7EC0 /* livefiles_hfs.dylib */; };
 		D7BD8F9C20AC388E00E93640 /* lf_hfs_catalog.c in Sources */ = {isa = PBXBuildFile; fileRef = 906EBF82206409B800B21E94 /* lf_hfs_catalog.c */; };
 		EE73740520644328004C2F0E /* lf_hfs_sbunicode.h in Headers */ = {isa = PBXBuildFile; fileRef = EE73740320644328004C2F0E /* lf_hfs_sbunicode.h */; };
 		EE73740620644328004C2F0E /* lf_hfs_sbunicode.c in Sources */ = {isa = PBXBuildFile; fileRef = EE73740420644328004C2F0E /* lf_hfs_sbunicode.c */; };
@@ -1096,6 +1096,7 @@
 		A64B3C1622EA2C4E009A2B10 /* CoreFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreFoundation.framework; path = Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.15.Internal.sdk/System/Library/Frameworks/CoreFoundation.framework; sourceTree = DEVELOPER_DIR; };
 		A64B3C1822EA2C5E009A2B10 /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.15.Internal.sdk/System/Library/Frameworks/IOKit.framework; sourceTree = DEVELOPER_DIR; };
 		A6873B9E234287200045680B /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.14.Internal.sdk/System/Library/Frameworks/IOKit.framework; sourceTree = DEVELOPER_DIR; };
+		A6BE9C8B24C63369005E033C /* test-symlinks.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = "test-symlinks.c"; sourceTree = "<group>"; };
 		A6E6D73F20909C72002125B0 /* test-get-volume-create-time.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = "test-get-volume-create-time.c"; sourceTree = "<group>"; };
 		C1B6FA0610CC0A0A00778D48 /* hfsutil_jnl.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = hfsutil_jnl.c; sourceTree = "<group>"; };
 		C1B6FA0710CC0A0A00778D48 /* hfsutil_main.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = hfsutil_main.c; sourceTree = "<group>"; };
@@ -1862,6 +1863,7 @@
 				09D6B7D61E317ED2003C20DC /* test_disklevel.c */,
 				A6E6D73F20909C72002125B0 /* test-get-volume-create-time.c */,
 				A64B3C1322E91AF6009A2B10 /* test-lf-cs-plugin.c */,
+				A6BE9C8B24C63369005E033C /* test-symlinks.c */,
 			);
 			path = cases;
 			sourceTree = "<group>";
@@ -3101,6 +3103,7 @@
 				FB76B3EE1B7BE24B00FA9F2B /* disk-image.m in Sources */,
 				FB76B3F21B7BE79800FA9F2B /* systemx.c in Sources */,
 				F90E174921ADFFD100345EE3 /* test-cas-bsdflags.c in Sources */,
+				A6BE9C8C24C63369005E033C /* test-symlinks.c in Sources */,
 				FB285C2A1B7E81180099B2ED /* test-sparse-dev.c in Sources */,
 				FB55AE541B7C271000701D03 /* test-doc-tombstone.c in Sources */,
 				FBD69AFA1B9132E40022ECAD /* test-dateadded.c in Sources */,
diff --git a/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c b/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c
index 45d1eff..6ce7c7e 100644
--- a/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c
+++ b/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c
@@ -785,7 +785,8 @@ hfs_vnop_preallocate(struct vnode * vp, LIFilePreallocateArgs_t* psPreAllocReq,
     hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
 
     if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
-        goto err_exit;
+        hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+        return (retval);
     }
     
     off_t filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
diff --git a/livefiles_hfs_plugin/lf_hfs_vnops.c b/livefiles_hfs_plugin/lf_hfs_vnops.c
index a30b815..d5d795f 100644
--- a/livefiles_hfs_plugin/lf_hfs_vnops.c
+++ b/livefiles_hfs_plugin/lf_hfs_vnops.c
@@ -37,6 +37,8 @@
 #define HFSRM_SKIP_RESERVE  0x01
 #define _PATH_RSRCFORKSPEC     "/..namedfork/rsrc"
 
+static int hfs_set_bsd_flags(struct cnode *cp, u_int32_t new_bsd_flags);
+
 void
 replace_desc(struct cnode *cp, struct cat_desc *cdp)
 {
@@ -1482,6 +1484,34 @@ out:
     return (error);
 }
 
+static int
+hfs_set_bsd_flags(struct cnode *cp, u_int32_t new_bsd_flags)
+{
+    u_int16_t *fdFlags;
+    // Currently we don't support UF_TRACKED in detonator
+    if (new_bsd_flags & UF_TRACKED)
+        new_bsd_flags &= ~UF_TRACKED;
+
+    cp->c_bsdflags = new_bsd_flags;
+    cp->c_flag |= C_MODIFIED;
+    cp->c_touch_chgtime = TRUE;
+
+    /*
+     * Mirror the UF_HIDDEN flag to the invisible bit of the Finder Info.
+     *
+     * The fdFlags for files and frFlags for folders are both 8 bytes
+     * into the userInfo (the first 16 bytes of the Finder Info).  They
+     * are both 16-bit fields.
+     */
+    fdFlags = (u_int16_t *) &cp->c_finderinfo[8];
+    if (new_bsd_flags & UF_HIDDEN)
+        *fdFlags |= OSSwapHostToBigConstInt16(kFinderInvisibleMask);
+    else
+        *fdFlags &= ~OSSwapHostToBigConstInt16(kFinderInvisibleMask);
+
+    return 0;
+}
+
 int hfs_vnop_setattr( vnode_t vp, const UVFSFileAttributes *attr )
 {
     int err = 0;
@@ -1564,7 +1594,7 @@ int hfs_vnop_setattr( vnode_t vp, const UVFSFileAttributes *attr )
 
     if ( attr->fa_validmask & UVFS_FA_VALID_BSD_FLAGS )
     {
-        cp->c_bsdflags = attr->fa_bsd_flags;
+        hfs_set_bsd_flags(cp, attr->fa_bsd_flags);
     }
 
     /*
@@ -1621,7 +1651,7 @@ hfs_update(struct vnode *vp, int options)
     struct cat_fork datafork;
     struct cat_fork rsrcfork;
     struct hfsmount *hfsmp;
-   int lockflags;
+    int lockflags;
     int error = 0;
 
     if (ISSET(cp->c_flag, C_NOEXISTS))
diff --git a/tests/cases/test-cas-bsdflags.c b/tests/cases/test-cas-bsdflags.c
index d9d0e93..cee5432 100644
--- a/tests/cases/test-cas-bsdflags.c
+++ b/tests/cases/test-cas-bsdflags.c
@@ -1,4 +1,5 @@
 #include <unistd.h>
+#include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <sys/mman.h>
@@ -16,11 +17,14 @@
 #include "hfs-tests.h"
 #include "test-utils.h"
 #include "disk-image.h"
+#include "systemx.h"
 
-//TEST(cas_bsdflags)
+#define AFSCUTIL       "/usr/local/bin/afscutil"
+
+TEST(cas_bsdflags)
 
 static bool
-cas_bsd_flags(int fd, uint32_t expected_flags, uint32_t new_flags)
+cas_bsd_flags(int fd, uint32_t expected_flags, uint32_t new_flags, int expected_error)
 {
 	struct fsioc_cas_bsdflags cas;
 
@@ -28,10 +32,34 @@ cas_bsd_flags(int fd, uint32_t expected_flags, uint32_t new_flags)
 	cas.new_flags      = new_flags;
 	cas.actual_flags   = ~0;		/* poison */
 
-	assert_no_err(ffsctl(fd, FSIOC_CAS_BSDFLAGS, &cas, 0));
+	if (expected_error != 0) {
+		// no assert_call_fail() in test_hfs
+		assert(ffsctl(fd, FSIOC_CAS_BSDFLAGS, &cas, 0) == -1);
+		assert(errno == EPERM);
+		return true; // as expected - flags were not changed
+	} else {
+		assert_no_err(ffsctl(fd, FSIOC_CAS_BSDFLAGS, &cas, 0));
+	}
+
 	return (cas.expected_flags == cas.actual_flags);
 }
 
+static void
+write_compressible_data(int fd)
+{
+	// adapted from test_clonefile in apfs
+	char dbuf[4096];
+
+	// write some easily compressable data
+	memset(dbuf + 0*(sizeof(dbuf)/4), 'A', sizeof(dbuf)/4);
+	memset(dbuf + 1*(sizeof(dbuf)/4), 'B', sizeof(dbuf)/4);
+	memset(dbuf + 2*(sizeof(dbuf)/4), 'C', sizeof(dbuf)/4);
+	memset(dbuf + 3*(sizeof(dbuf)/4), 'D', sizeof(dbuf)/4);
+	for (int idx = 0; idx < 32; idx++) {
+		check_io(write(fd, dbuf, sizeof(dbuf)), sizeof(dbuf));
+	}
+}
+
 int run_cas_bsdflags(__unused test_ctx_t *ctx)
 {
 	disk_image_t *di = disk_image_get();
@@ -46,19 +74,43 @@ int run_cas_bsdflags(__unused test_ctx_t *ctx)
 
 	assert_no_err(fchflags(fd, UF_HIDDEN));
 	assert_no_err(fstat(fd, &sb));
-	assert(sb.st_flags == UF_HIDDEN);
+	assert_equal_int(sb.st_flags, UF_HIDDEN);
+
+	assert(cas_bsd_flags(fd, 0, UF_NODUMP, 0) == false);
+	assert_no_err(fstat(fd, &sb));
+	assert_equal_int(sb.st_flags, UF_HIDDEN);
+
+	assert(cas_bsd_flags(fd, UF_HIDDEN, UF_NODUMP, 0) == true);
+	assert_no_err(fstat(fd, &sb));
+	assert_equal_int(sb.st_flags, UF_NODUMP);
+
+	assert(cas_bsd_flags(fd, UF_NODUMP, 0, 0) == true);
+	assert_no_err(fstat(fd, &sb));
+	assert_equal_int(sb.st_flags, 0);
+
+	// Add some data to our (non-compressed) file,
+	// mark it with UF_COMPRESSED,
+	// and check that UF_COMPRESSED is *not* set -
+	// as there is no decmpfs xattr present.
+	check_io(write(fd, "J", 1), 1);
+	assert_no_err(fstat(fd, &sb));
+	assert(sb.st_size > 0);
 
-	assert(cas_bsd_flags(fd, 0, UF_NODUMP) == false);
+	assert(cas_bsd_flags(fd, 0, UF_COMPRESSED, EPERM) == true);
 	assert_no_err(fstat(fd, &sb));
-	assert(sb.st_flags == UF_HIDDEN);
+	assert_equal_int(sb.st_flags, 0);
 
-	assert(cas_bsd_flags(fd, UF_HIDDEN, UF_NODUMP) == true);
+	// Now, add some compressible data to the file and compress it using afscutil.
+	write_compressible_data(fd);
+	assert(!systemx(AFSCUTIL, "-c", file, NULL));
 	assert_no_err(fstat(fd, &sb));
-	assert(sb.st_flags == UF_NODUMP);
+	assert_equal_int(sb.st_flags, UF_COMPRESSED);
 
-	assert(cas_bsd_flags(fd, UF_NODUMP, 0) == true);
+	// Now, remove UF_COMPRESSED from our file and
+	// check that the file is 0-length.
+	assert(cas_bsd_flags(fd, UF_COMPRESSED, 0, 0) == true);
 	assert_no_err(fstat(fd, &sb));
-	assert(sb.st_flags == 0);
+	assert_equal_ll(sb.st_size, 0);
 
 	close(fd);
 	assert_no_err(unlink(file));
diff --git a/tests/cases/test-symlinks.c b/tests/cases/test-symlinks.c
new file mode 100644
index 0000000..8fa0da1
--- /dev/null
+++ b/tests/cases/test-symlinks.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * <rdar://problem/65693863>  UNIX Conformance | hfs_vnop_symlink should not validate empty path.
+ */
+#include <unistd.h>
+#include <sys/stat.h>
+
+
+#include "hfs-tests.h"
+#include "test-utils.h"
+#include "disk-image.h"
+
+#define SYMPLINK_TEST_DIR "symlink.testdir"
+#define SYMLINK_EMPTYSTR "symlink.emptystr"
+TEST(symlinks)
+
+int run_symlinks(__unused test_ctx_t *ctx)
+{
+	disk_image_t *di;
+	struct stat statb;
+	char *parent_dir, *slink;
+	char buf;
+
+	di = disk_image_get();
+
+	//
+	// Create a parent directory to host our test.
+	//
+	asprintf(&parent_dir, "%s/"SYMPLINK_TEST_DIR, di->mount_point);
+	assert(!mkdir(parent_dir, 0777) || errno == EEXIST);
+
+	//
+	// Now check to make sure we support creating a symlink with an empty
+	// target required for UNIX Conformance.
+	//
+	asprintf(&slink, "%s/"SYMLINK_EMPTYSTR, parent_dir);
+	assert_no_err(symlink("", slink));
+
+	//
+	// Test that symlink has "l" as the S_ISLNK flag using lstat
+	//
+	memset(&statb, 0, sizeof(statb));
+	assert(!(lstat(slink, &statb) < 0 ));
+	assert(S_ISLNK(statb.st_mode));
+
+	//
+	// Test that readlink returns zero.
+	//
+	assert(!readlink(slink, &buf, 1));
+
+	//
+	// Delete test symlink, test directory and release all resources.
+	//
+	unlink(slink);
+	unlink(parent_dir);
+	free(slink);
+	free(parent_dir);
+	return 0;
+}
-- 
2.50.0