hfs-556.100.11.tar.gz

author Apple <opensource@apple.com>

Tue, 20 Apr 2021 00:35:11 +0000 (00:35 +0000)

committer Apple <opensource@apple.com>

Tue, 20 Apr 2021 00:35:11 +0000 (00:35 +0000)
author Apple <opensource@apple.com>
Tue, 20 Apr 2021 00:35:11 +0000 (00:35 +0000)
committer Apple <opensource@apple.com>
Tue, 20 Apr 2021 00:35:11 +0000 (00:35 +0000)
diff --git a/core/hfs.h b/core/hfs.h

index c450aea388615d391946145492aa3d5bf3fdce6e..9584c812b39c4a09fc56af87916c4f14351df77d 100644 (file)
--- a/core/hfs.h
+++ b/core/hfs.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -159,6 +159,48 @@ extern struct timezone gTimeZone;
  
  /* Internal Data structures*/
  
+#define NEW_XATTR TARGET_CPU_ARM64
+
+#if NEW_XATTR
+
+// number of concurrent xattr IOs permitted
+#define XATTR_NUM_FEXTS 64u
+// bitmap size, in 64bit words, used to track concurrent xattr IOs
+#define XATTR_BM_SIZE ((XATTR_NUM_FEXTS) / (NBBY * sizeof(uint64_t)))
+
+/*
+ * State for all stream based xattr IO on a volume.
+ *
+ * Fixed size thread local storage for use across the IO stack.
+ *
+ * Fields:
+ * - lock
+ *   A spin lock used to access both the free file extent bitmap, `free_bm',
+ *   and the xattr file extents themselves, `xattr_fexts'.
+ * - free_bm
+ *   This bitmap represents the unused indices of `xattr_fexts'. The name is
+ *   somewhat backwards: set bits (with a value of one) denote *used* indices.
+ * - xattr_fexts
+ *   These are references to xattr file extents used as thread local storage to
+ *   communicate from top level xattr IO functions, through the cluster layer,
+ *   down into lower level IO functions (hfs_vnop_blockmap()).
+ *   Each index which contains a non-NULL fext should have its corresponding
+ *   bit within `free_bm' set. Though the reverse is not strictly true: a bit
+ *   within the bitmap may be set before the corresponding fext is. This is
+ *   the case when memory for a file extent reference is preallocated before
+ *   the value of the file extent is available.
+ *   This is an array of pointers rather than an array of file extents because:
+ *   - it avoids both copying a full fext into and out from the array
+ *   - it prints more nicely in lldb
+ */
+typedef struct {
+       lck_spin_t lock;
+       uint64_t free_bm[XATTR_BM_SIZE];
+       const HFSPlusExtentDescriptor *xattr_fexts[XATTR_NUM_FEXTS];
+} xattr_io_info_t;
+
+#endif
+
  /* This structure describes the HFS specific mount structure data. */
  typedef struct hfsmount {
         u_int32_t     hfs_flags;              /* see below */
@@ -191,6 +233,9 @@ typedef struct hfsmount {
         struct vnode *          hfs_attribute_vp;
         struct vnode *          hfs_startup_vp;
         struct vnode *          hfs_attrdata_vp;   /* pseudo file */
+#if NEW_XATTR
+       xattr_io_info_t         hfs_xattr_io;   /* Note, this is a big structure ~(64 * 8 bytes) */
+#endif
         struct cnode *          hfs_extents_cp;
         struct cnode *          hfs_catalog_cp;
         struct cnode *          hfs_allocation_cp;
@@ -1140,6 +1185,11 @@ extern int hfs_isrbtree_active (struct hfsmount *hfsmp);
  extern errno_t hfs_get_fsinfo(struct hfsmount *hfsmp, void *a_data);
  extern void hfs_fsinfo_data_add(struct hfs_fsinfo_data *fsinfo, uint64_t entry);
  
+#if NEW_XATTR
+bool hfs_xattr_fext_find(xattr_io_info_t *info, uint32_t fs_bsize,
+               uint64_t off, HFSPlusExtentDescriptor *out, uint64_t *off_out);
+#endif
+
  struct hfs_sysctl_chain {
         struct sysctl_oid *oid;
         struct hfs_sysctl_chain *next;
diff --git a/core/hfs_readwrite.c b/core/hfs_readwrite.c

index d7a070e2101a44e50d11c463b22b9b86756a155b..068fca34bc4fa64507676069c9a6e527fad116cd 100644 (file)
--- a/core/hfs_readwrite.c
+++ b/core/hfs_readwrite.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -2911,22 +2911,53 @@ fail_change_next_allocation:
                                                          NULL);
  
         case FSIOC_CAS_BSDFLAGS: {
-               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
-                       return (EROFS);
-               }
-
-#if 0
                 struct fsioc_cas_bsdflags *cas = (void *)ap->a_data;
                 struct cnode *cp = VTOC(vp);
                 u_int32_t document_id = 0;
+               bool need_truncate = false;
                 int decmpfs_reset_state = 0;
                 int error;
  
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+
                 /* Don't allow modification of the journal. */
                 if (hfs_is_journal_file(hfsmp, cp)) {
                         return (EPERM);
                 }
  
+               // Check if we need to set UF_COMPRESSED.
+               // If so, ask decmpfs if we're allowed to (and if so, if we need to truncate
+               // the data fork to 0).
+               if (!(cas->expected_flags & UF_COMPRESSED) && (cas->new_flags & UF_COMPRESSED)) {
+                       struct vnode_attr vap;
+                       VATTR_INIT(&vap);
+                       VATTR_SET(&vap, va_flags, cas->new_flags);
+
+                       error = decmpfs_update_attributes(vp, &vap);
+                       if (error) {
+                               return (error);
+                       }
+
+                       // Similar to hfs_vnop_setattr(), we call decmpfs_update_attributes()
+                       // as it is the ultimate arbiter of whether or not UF_COMPRESSED can be set.
+                       // (If the decmpfs xattr is not present or invalid, for example,
+                       // UF_COMPRESSED should *not* be set.)
+                       // It will also tell us if we need to truncate the data fork to 0.
+                       if (!(vap.va_flags & UF_COMPRESSED)) {
+                               // The request to update UF_COMPRESSED is denied.
+                               // (Note that decmpfs_update_attributes() won't touch va_active
+                               // in this case.) Error out.
+                               return (EPERM);
+                       }
+
+                       if (VATTR_IS_ACTIVE(&vap, va_data_size) && (vap.va_data_size == 0)) {
+                               // We must also truncate this file's data fork to 0.
+                               need_truncate = true;
+                       }
+               }
+
                 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
                         return (error);
                 }
@@ -2973,17 +3004,11 @@ fail_change_next_allocation:
                         }
                 }
  
-               bool setting_compression = false;
-
-               if (!(cas->actual_flags & UF_COMPRESSED) && (cas->new_flags & UF_COMPRESSED))
-                       setting_compression = true;
-
-               if (setting_compression) {
+               // Attempt to truncate our data fork to 0 length, if necessary.
+               if (need_truncate && (VTOF(vp)->ff_size)) {
                         hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
-                       if (VTOF(vp)->ff_size) {
-                               // hfs_truncate will deal with the cnode lock
-                               error = hfs_truncate(vp, 0, IO_NDELAY, 0, ap->a_context);
-                       }
+                       // hfs_truncate will deal with the cnode lock
+                       error = hfs_truncate(vp, 0, IO_NDELAY, 0, ap->a_context);
                         hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
                 }
  
@@ -3021,9 +3046,7 @@ fail_change_next_allocation:
                         decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
                 }
  #endif
-               break;
-#endif
-               return ENOTSUP;
+               break; // return 0 below
         }
  
         default:
@@ -3459,8 +3482,41 @@ retry:
                 }
         }
  
+#if NEW_XATTR
+       // check for the alternate xattr vnode
+       if (vp == hfsmp->hfs_attrdata_vp) {
+               HFSPlusExtentDescriptor real_fext;
+               size_t availableBytes;
+               u_int32_t sectorsPerBlock;              // Number of sectors per allocation block
+               u_int32_t sectorSize;
+               uint64_t f_offset;
+
+               if (!hfs_xattr_fext_find(&hfsmp->hfs_xattr_io, hfsmp->blockSize,
+                                                                ap->a_foffset, &real_fext, &f_offset)) {
+                       panic("cannot find xattr fext for %llu", f_offset);
+               }
+
+               sectorSize = hfsmp->hfs_logical_block_size;
+               //      Compute the number of sectors in an allocation block
+               sectorsPerBlock = hfsmp->blockSize / sectorSize;
+
+               *ap->a_bpn = (f_offset / hfsmp->blockSize) * sectorsPerBlock;
+               availableBytes = real_fext.blockCount * hfsmp->blockSize - (f_offset - (real_fext.startBlock * hfsmp->blockSize));
+               if (availableBytes < bytesContAvail) {
+                       bytesContAvail = availableBytes;
+               }
+
+               goto got_fext;
+       }
+#endif
+
         retval = MapFileBlockC(hfsmp, (FCB *)fp, bytesContAvail, ap->a_foffset,
                                ap->a_bpn, &bytesContAvail);
+
+#if NEW_XATTR
+got_fext:
+#endif
+
         if (syslocks) {
                 hfs_systemfile_unlock(hfsmp, lockflags);
                 syslocks = 0;
@@ -4279,7 +4335,8 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
         hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
  
         if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
-               goto Err_Exit;
+               hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+               return (retval);
         }
         
         fp = VTOF(vp);
diff --git a/core/hfs_vfsops.c b/core/hfs_vfsops.c

index 2859ac2d1c112d18c54d40794c0b597d3268e053..a7cc09599454b439fc4bc94713cf9b7609478fa1 100644 (file)
--- a/core/hfs_vfsops.c
+++ b/core/hfs_vfsops.c
@@ -1384,6 +1384,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
         lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
         lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
         lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
+#if NEW_XATTR
+       lck_spin_init(&hfsmp->hfs_xattr_io.lock, hfs_spinlock_group, hfs_lock_attr);
+#endif
  
         if (mp)
                 vfs_setfsprivate(mp, hfsmp);
@@ -2782,6 +2785,9 @@ hfs_locks_destroy(struct hfsmount *hfsmp)
         lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
         lck_rw_destroy(&hfsmp->hfs_global_lock, hfs_rwlock_group);
         lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
+#if NEW_XATTR
+       lck_spin_destroy(&hfsmp->hfs_xattr_io.lock, hfs_spinlock_group);
+#endif
  
         return;
  }
diff --git a/core/hfs_vnops.c b/core/hfs_vnops.c

index 237f071a12bfe95356231708597dd7eb088c463c..cf6d106c59e3937b208f68af45a38461ad49bda2 100644 (file)
--- a/core/hfs_vnops.c
+++ b/core/hfs_vnops.c
@@ -5538,10 +5538,6 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap)
         if (VTOVCB(dvp)->vcbSigWord != kHFSPlusSigWord)
                 return (ENOTSUP);
  
-       /* Check for empty target name */
-       if (ap->a_target[0] == 0)
-               return (EINVAL);
-
         hfsmp = VTOHFS(dvp);
         
         len = strlen(ap->a_target);
@@ -5569,6 +5565,9 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap)
             goto out;
         }
  
+       if (!len)
+               goto out;
+
  #if QUOTA
         (void)hfs_getinoquota(cp);
  #endif /* QUOTA */
@@ -6063,10 +6062,21 @@ hfs_vnop_readlink(struct vnop_readlink_args *ap)
         fp = VTOF(vp);
     
         /* Zero length sym links are not allowed */
-       if (fp->ff_size == 0 || fp->ff_size > MAXPATHLEN) {
+       if (fp->ff_size > MAXPATHLEN) {
                 error = EINVAL;
                 goto exit;
         }
+
+       /*
+        * If this is an empty symlink, we are done. The call to uiomove() is
+        * just for debug and diagnostics.
+        */
+       if (fp->ff_size == 0) {
+               static uint8_t empty_str[] = {'\0'};
+
+               error = uiomove((caddr_t)empty_str, 0, ap->a_uio);
+               goto exit;
+       }
      
         /* Cache the path so we don't waste buffer cache resources */
         if (fp->ff_symlinkptr == NULL) {
diff --git a/core/hfs_xattr.c b/core/hfs_xattr.c

index 311b901610d3bb56e4d93a4afeb4abd3d8c2c7bc..4dba366771e718d9fa13c3e01436192461d66303 100644 (file)
--- a/core/hfs_xattr.c
+++ b/core/hfs_xattr.c
@@ -98,6 +98,91 @@ static int  has_overflow_extents(HFSPlusForkData *forkdata);
  
  static int  count_extent_blocks(int maxblks, HFSPlusExtentRecord extents);
  
+#if NEW_XATTR
+
+/*
+ * Iterator over set bits within an xattr free fext bitmap.
+ *
+ * See xbm_make_iter() for details on creation.
+ */
+typedef struct {
+       const uint64_t *bitmap;
+       size_t i;
+       size_t len;
+} xbm_iter_t;
+
+// xattr IO subsystem assertion
+#define xattr_assert(cond) if(!(cond)) do { \
+       panic("xattr_assert() failed: %s",#cond); \
+} while(0)
+
+#define xattr_infomsg(...) // errmsg(__VA_ARGS__)
+
+/*
+ * Purely for documentation purposes, simulate C++ const_cast<>() in the C
+ * language.
+ * This makes it quite obvious that the `const' part of a cast is being removed
+ * to workaround an api that accepts a non-const pointer but otherwise has no
+ * reason to modify its value.
+ * E.g.
+ *
+ * ancient_sum(a, count)
+ * int *a; int count;
+ * {
+ *   int s;
+ *   s = 0;
+ *   while (count--) {
+ *     s = s + a[i];
+ *   }
+ *   return s;
+ * }
+ *
+ * int modern_sum(const int *a, int count) {
+ *   return ancient_sum(const_cast(int *, a), count);
+ * }
+ */
+#define const_cast(type, expr) ((type)(expr))
+
+static uint64_t fext2cluster(const HFSPlusExtentDescriptor *fext, uint32_t fs_bsize);
+static bool cluster_off_in_fext(uint64_t off, const HFSPlusExtentDescriptor *fext,
+               uint32_t fs_bsize);
+
+static bool fext2cluster_check(const HFSPlusExtentDescriptor *fext, uint32_t fs_bsize,
+               uint64_t *out);
+static uint32_t xattr_cluster_scale(uint32_t fs_bsize);
+
+// xattr fext thread local storage routines
+static const HFSPlusExtentDescriptor **_Nullable xattr_fext_alloc(
+               xattr_io_info_t *info) __attribute__((warn_unused_result));
+static void xattr_fext_free(xattr_io_info_t *info,
+               const HFSPlusExtentDescriptor **xfext);
+static void xattr_fext_set(xattr_io_info_t *info,
+               const HFSPlusExtentDescriptor **xfext, const HFSPlusExtentDescriptor *fext);
+static void xattr_fext_clear(xattr_io_info_t *info,
+               const HFSPlusExtentDescriptor **xfext);
+
+static size_t xattr_fext_index(const xattr_io_info_t *info,
+               const HFSPlusExtentDescriptor **xfext);
+
+// xattr fext free bitmap routines, namespace `xbm_'
+static bool xbm_find_free(const xattr_io_info_t *info, size_t *out);
+static void xbm_set_used(xattr_io_info_t *info, size_t i);
+static void xbm_clear_used(xattr_io_info_t *info, size_t i);
+
+static bool xbm_valid_index(const xattr_io_info_t *info, int64_t i);
+static size_t xbm_size(const xattr_io_info_t *info);
+
+// bitmap iterator functions
+static xbm_iter_t xbm_make_iter(const uint64_t *bitmap, size_t len);
+static bool xbm_iter_next(xbm_iter_t *ier);
+static size_t xbm_iter_peek(const xbm_iter_t *ier);
+
+// bitmap_ wrappers under namespace bm_
+static bool bm_find(const uint64_t *bm,
+               size_t from, size_t len, bool value, size_t *out);
+static bool bm_valid_index(int64_t i, size_t len);
+#endif
+
  #if NAMEDSTREAMS
  /*
   * Obtain the vnode for a stream.
@@ -1080,12 +1165,6 @@ int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsi
         
         /* If it won't fit inline then use extent-based attributes. */
         if (attrsize > hfsmp->hfs_max_inline_attrsize) {
-#if (TARGET_OS_OSX && TARGET_CPU_ARM64)
-               printf("hfs_setxattr: non-inline attributes are not supported\n");
-               //NOTE: ENOTSUP will fool XNU into thinking we need AppleDouble files...
-               result = EPERM;
-               goto exit;
-#else
                 int blkcnt;
                 int extentblks;
                 u_int32_t *keystartblk;
@@ -1186,7 +1265,6 @@ int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsi
                         extentblks = count_extent_blocks(blkcnt, recp->overflowExtents.extents);
                         blkcnt -= extentblks;
                 }
-#endif //(TARGET_OS_OSX && TARGET_CPU_ARM64)
         } else { /* Inline data */ 
                 if (exists) {
                         result = remove_attribute_records(hfsmp, iterator);
@@ -2379,11 +2457,675 @@ int init_attrdata_vnode(struct hfsmount *hfsmp)
                                  &cat_fork, &vp, &newvnode_flags);
         if (result == 0) {
                 hfsmp->hfs_attrdata_vp = vp;
+#if NEW_XATTR
+               vnode_setnoreadahead(hfsmp->hfs_attrdata_vp);
+#endif
                 hfs_unlock(VTOC(vp));
         }
         return (result);
  }
  
+/* The following code (inside NEW_XATTR) was ported from apfs. */
+#if NEW_XATTR
+/*
+ * This is the same as fext2cluster_check(), but overflow is asserted against.
+ *
+ * This is useful for places which hold the invariant that `fext' is already
+ * representable when translated but additionally want to assert that that is
+ * the case.
+ */
+static uint64_t
+fext2cluster(const HFSPlusExtentDescriptor *fext, uint32_t fs_bsize)
+{
+       uint64_t off;
+       const bool ok = fext2cluster_check(fext, fs_bsize, &off);
+       xattr_assert(ok);
+       return off;
+}
+
+/*
+ * Translate `fext' to a cluster layer virtual offset, set via `out', that is
+ * suitable for IO to the single xattr vnode.
+ *
+ * For any particular file extent, this will map to a unique logical offset
+ * that may be used to key into the ubc. The returned value has the property
+ * such that file extents on adjacent physical blocks will always be mapped to
+ * different page sized multiples. Internally, this just multiplies by the
+ * larger of the pagesize and the blocksize (see xattr_cluster_scale() for the
+ * derivation). For further details on the importance of this in the overall
+ * scheme of xattr IO, see uio_set_fext().
+ *
+ * This return trues if `fext' is representable in cluster layer virtual
+ * offset. It may return false for corrupted file extents:
+ * - extents that likely extend beyond the size of the underlying drive
+ *
+ * The second point should not happen under normal circumstances even for large
+ * drives. Large drives (by the logic in hfs_newfs()) are automatically
+ * formatted to use large block sizes: drives sized >= 16TB use 16kiB fs
+ * blocksize, at which point the virtual offset computed is equal to the device
+ * offset (even on a 16kiB pagesize system).
+ * So as long as a drive does not exceed 2^63 bytes in capacity (which is the
+ * precision of `off_t'), the internal multiplication should not overflow.
+ */
+static bool
+fext2cluster_check(const HFSPlusExtentDescriptor *fext, uint32_t fs_bsize,
+               uint64_t *out)
+{
+       const uint64_t pbn = fext->startBlock;
+       
+       // xattrs has invalid start block
+       if (!pbn) {
+               return false;
+       }
+
+       // scale pbn
+       uint64_t off;
+       if (__builtin_mul_overflow(pbn, xattr_cluster_scale(fs_bsize), &off)) {
+               return false;
+       }
+
+       *out = off;
+
+       // the whole fext should be in range
+       if (__builtin_add_overflow(off, fext->blockCount * fs_bsize, &off)) {
+               return false;
+       }
+
+       // don't exceed signed 64bit precision
+       if (off > INT64_MAX) {
+               return false;
+       }
+
+       return true;
+}
+
+/*
+ * Return the scale factor for translating xattr physical block numbers into
+ * a logical offset into the single xattr vnode's ubc.
+ *
+ * Translated blocks have two key requirements:
+ * - They must not overlap.
+ *   Otherwise two different blocks' contents will collide.
+ * - They must not fall within the same page.
+ *   Otherwise reading a whole page may pull in multiple xattr's blocks, but
+ *   only decrypt with one of the xattr's keys.
+ *
+ * A table of all possible configurations:
+ *   pagesize, fs blocksize, scale
+ *   4k, 4k, 4k
+ *   4k, 8k, 8k
+ *   ...
+ *   4k, 64k, 64k
+ *   16k, 4k, 16k
+ *   16k, 8k, 16k
+ *   16k, 16k, 16k
+ *   16k, 32k, 32k
+ *   16k, 64k, 64k
+ *
+ * This may be expressed as
+ *   scale = max(pagesize, fs blocksize)
+ */
+static uint32_t
+xattr_cluster_scale(uint32_t fs_bsize)
+{
+       return MAX(PAGE_SIZE, fs_bsize);
+}
+
+/*
+ * See off_in_range().
+ */
+static bool
+off_in_range2(uint64_t off, uint64_t start, uint64_t len)
+{
+       return (start <= off) && (off < (start + len));
+}
+
+/*
+ * Return true if `off' returned from fext2cluster() was produced from
+ * `fext'.
+ */
+static bool
+cluster_off_in_fext(uint64_t off, const HFSPlusExtentDescriptor *fext,
+               uint32_t fs_bsize)
+{
+       return off_in_range2(off, fext2cluster(fext, fs_bsize), fext->blockCount * fs_bsize);
+}
+
+/*
+ * Allocate space to save a file extent reference for xattr IO.
+ *
+ * This provides a mechanism to communicate file extents from top level, stream
+ * based xattr IO functions down into lower level IO functions
+ * (_blockmap() and _strategy()).
+ *
+ * This doesn't really return a file extent, it returns a reference into
+ * `info->xattr_fexts' which may be pointed to a file extent reference.
+ * Alternatively this could just return an integral index index, but then we'd
+ * need some way to signal failure.
+ *
+ * Note: the returned reference cannot be assigned directly; it must be set
+ * via xattr_fext_set() to correctly synchronize with a racing call to
+ * xattr_fext_find().
+ *
+ * This call will not block; it will return NULL if no free spaces are
+ * available. On success, follow with a call to xattr_fext_free().
+ *
+ * In terms of the implementation, this is a basic zone allocator for thread
+ * local storage in disguise. it supports only one file extent to be
+ * set by up to 64 threads.
+ * For further details, see the documentation for each field above the
+ * definition of `xattr_io_info_t'.
+ */
+static const HFSPlusExtentDescriptor **
+xattr_fext_alloc(xattr_io_info_t *info)
+{
+       const HFSPlusExtentDescriptor **ret;
+       size_t i;
+
+       // search for the first free bit
+       lck_spin_lock(&info->lock);
+       if (!xbm_find_free(info, &i)) {
+               // no free fexts
+               ret = NULL;
+               goto fail;
+       }
+
+       // mark that position as allocated
+       xbm_set_used(info, i);
+       ret = &info->xattr_fexts[i];
+       xattr_assert(!*ret);
+
+fail:
+       lck_spin_unlock(&info->lock);
+       return ret;
+}
+
+/*
+ * Free the memory associated with an xattr fext referenced return from
+ * xattr_fext_alloc().
+ * This simply amounts to clearing the bit within `info->free_bm' that
+ * corresponds to `xfext'. While not strictly neccessary, we also clear out the
+ * xattr fext itself to hold the invariant that a clear bit within the free
+ * bitmap has a corresponding NULL fext reference.
+ */
+static void
+xattr_fext_free(xattr_io_info_t *info, const HFSPlusExtentDescriptor **xfext)
+{
+       lck_spin_lock(&info->lock);
+       const size_t i = xattr_fext_index(info, xfext);
+       xbm_clear_used(info, i);
+       info->xattr_fexts[i] = NULL;
+       lck_spin_unlock(&info->lock);
+}
+
+/*
+ * Given an allocated xattr fext from xattr_fext_alloc() assign it to reference
+ * `fext'. A copy of this fext may be returned by a subsequent call to
+ * xattr_fext_find().
+ *
+ * This may be called multiple times for the same value of `xfext'.
+ * `fext' will be borrowed until a subsequent call to xattr_fext_set() for a
+ * different file extent or xattr_fext_free() for `xfext'. It must have
+ * lifetime that spans at least as long from when it's first set to when it's
+ * cleared either by xattr_fext_free() or xattr_fext_clear().
+ *
+ * Note: `fext' may be introspected by other threads via xattr_fext_find()
+ * (and in terms of getxattr(), two threads may use each other's file extents
+ * if they race to read the same xattr).
+ */
+static void
+xattr_fext_set(xattr_io_info_t *info, const HFSPlusExtentDescriptor **xfext,
+               const HFSPlusExtentDescriptor *fext)
+{
+       xattr_assert(xbm_valid_index(info, xattr_fext_index(info, xfext)));
+       lck_spin_lock(&info->lock);
+       *xfext = fext;
+       lck_spin_unlock(&info->lock);
+}
+
+/*
+ * Given a cluster layer virtual offset, attempt to look up a file extent set
+ * via a previous call to xattr_fext_set().
+ *
+ * If such a fext is found, its value is copied to `out' and true is returned.
+ * Note: `out' should reference wired memory: it will be stored to while a spin
+ * lock is held; accesses must not fault.
+ *
+ * off_out will contain the "unvirtualized" offset
+ */
+bool
+hfs_xattr_fext_find(xattr_io_info_t *info, uint32_t fs_bsize, uint64_t off,
+                               HFSPlusExtentDescriptor *out, uint64_t *off_out)
+{
+       bool found = false;
+       lck_spin_lock(&info->lock);
+
+       // search through all in-use fexts
+       xbm_iter_t iter = xbm_make_iter(info->free_bm, xbm_size(info));
+       while(xbm_iter_next(&iter)) {
+               const HFSPlusExtentDescriptor *fext = info->xattr_fexts[xbm_iter_peek(&iter)];
+               if (!fext || !cluster_off_in_fext(off, fext, fs_bsize)) {
+                       continue;
+               }
+               // `off' intersects; return `fext'
+               *out = *fext;
+               found = true;
+               break;
+       }
+
+       lck_spin_unlock(&info->lock);
+
+       if (found) {
+               *off_out = ((uint64_t)out->startBlock * fs_bsize) + off - fext2cluster(out, fs_bsize);
+       }
+
+       return found;
+}
+
+/*
+ * Given an allocated xattr fext, clear its reference to any `fext' passed to a
+ * previous call to xattr_fext_set().
+ *
+ * This will end the lifetime of such a fext and prevent xattr_fext_find() from
+ * taking a reference to it. From here, its backing memory can be deallocated.
+ *
+ * Unlike xattr_fext_free(), `xfext' will remain allocated and it may passed to
+ * xattr_fext_set() again.
+ */
+static void
+xattr_fext_clear(xattr_io_info_t *info, const HFSPlusExtentDescriptor **xfext)
+{
+       xattr_assert(xbm_valid_index(info, xattr_fext_index(info, xfext)));
+       lck_spin_lock(&info->lock);
+       *xfext = NULL;
+       lck_spin_unlock(&info->lock);
+}
+
+/*
+ * For an xattr file extent, `xfext', returned from a previous call to
+ * xattr_fext_alloc(), return its index within `info->xattr_fexts'.
+ */
+static size_t
+xattr_fext_index(const xattr_io_info_t *info, const HFSPlusExtentDescriptor **xfext)
+{
+       xattr_assert((info->xattr_fexts <= xfext) &&
+                       (xfext < &info->xattr_fexts[xbm_size(info)]));
+       return ((uintptr_t)xfext - (uintptr_t)info->xattr_fexts) / sizeof(*xfext);
+}
+
+static void
+bitmap_set_range(uint64_t *bm, int64_t index, int64_t count)
+{
+       int dstshift0, dstshift1;
+       uint64_t dstmask0, dstmask1;
+       int64_t bmi;
+
+       dstshift0 = index % 64;
+       dstshift1 = 64 - (index % 64);
+       dstmask0 = ~0ULL << (index % 64);
+       dstmask1 = (dstshift1 == 64) ? 0ULL : (~0ULL >> (64 - (index % 64)));
+
+       bmi = index / 64;
+       while (count >= 64) {
+               bm[bmi] = (bm[bmi] & ~dstmask0) | ((~0ULL << dstshift0) & dstmask0);
+               if (dstmask1)
+                       bm[bmi + 1] = (bm[bmi + 1] & ~dstmask1) | ((~0ULL >> dstshift1) & dstmask1);
+               bmi++;
+               count -= 64;
+       }
+       if (count) {
+               // adjust dstmask to cover just the bits remaining
+               dstmask0 = ((1ULL << count) - 1) << (index % 64);
+               dstmask1 = (dstshift1 == 64) ? 0ULL : (((1ULL << count) - 1) >> (64 - (index % 64)));
+               bm[bmi] = (bm[bmi] & ~dstmask0) | ((~0ULL << dstshift0) & dstmask0);
+               if ((count > (64 - dstshift0)) && dstmask1)
+                       bm[bmi + 1] = (bm[bmi + 1] & ~dstmask1) | ((~0ULL >> dstshift1) & dstmask1);
+       }
+}
+
+static void
+bitmap_clear_range(uint64_t *bm, int64_t index, int64_t count)
+{
+       int dstshift0, dstshift1;
+       uint64_t dstmask0, dstmask1;
+       int64_t bmi;
+
+       dstshift0 = index % 64;
+       dstshift1 = 64 - (index % 64);
+       dstmask0 = ~0ULL << (index % 64);
+       dstmask1 = (dstshift1 == 64) ? 0ULL : (~0ULL >> (64 - (index % 64)));
+
+       bmi = index / 64;
+       while (count >= 64) {
+               bm[bmi] = (bm[bmi] & ~dstmask0) | ((0ULL << dstshift0) & dstmask0);
+               if (dstmask1)
+                       bm[bmi + 1] = (bm[bmi + 1] & ~dstmask1) | ((0ULL >> dstshift1) & dstmask1);
+               bmi++;
+               count -= 64;
+       }
+       if (count) {
+               // adjust dstmask to cover just the bits remaining
+               dstmask0 = ((1ULL << count) - 1) << (index % 64);
+               dstmask1 = (dstshift1 == 64) ? 0ULL : (((1ULL << count) - 1) >> (64 - (index % 64)));
+               bm[bmi] = (bm[bmi] & ~dstmask0) | ((0ULL << dstshift0) & dstmask0);
+               if ((count > (64 - dstshift0)) && dstmask1)
+                       bm[bmi + 1] = (bm[bmi + 1] & ~dstmask1) | ((0ULL >> dstshift1) & dstmask1);
+       }
+}
+
+static int
+ctzll(uint64_t val)
+{
+       return (val == 0) ? 64 : __builtin_ctzll(val);
+}
+
+// search forwards through range and return index of first "bit" (0 or 1) encountered
+static int
+bitmap_range_find_first(int bit, const uint64_t *bm, int64_t index, int64_t count, int64_t *clear_index)
+{
+       int64_t bmi, check_count;
+       uint64_t val;
+       int pos;
+
+       bmi = index / 64;
+       while (count > 0) {
+               check_count = 64 - (index % 64);
+               if (check_count > count)
+                       check_count = count;
+               val = bm[bmi] >> (index % 64);
+               if (!bit)
+                       val = ~val;
+               pos = ctzll(val);
+               if (pos < check_count) {
+                       *clear_index = index + pos;
+                       return 1;
+               }
+               index += check_count;
+               count -= check_count;
+               bmi++;
+       }
+       return 0;
+}
+
+/*
+ * Search for the first free (clear) bit within `info's free xattr fext bitmap.
+ * Return false if no bits are clear.
+ * If some bit is found, its index is set to `*out' and true is returned.
+ */
+static bool
+xbm_find_free(const xattr_io_info_t *info, size_t *out)
+{
+       return bm_find(info->free_bm, 0, xbm_size(info), false, out);
+}
+
+/*
+ * Set the bit at index `i' within `info's underlying free xattr fext bitmap.
+ *
+ * info->lock must be held.
+ * It only makes sense to operate on one bit at a time, so this wraps
+ * bitmap_set_range().
+ */
+static void
+xbm_set_used(xattr_io_info_t *info, size_t i)
+{
+       xattr_assert(xbm_valid_index(info, i));
+       bitmap_set_range(info->free_bm, i, 1);
+}
+
+/*
+ * Clear the bit at index `i' within `info's underlying free xattr fext bitmap.
+ * This is the opposite of xbm_set_used().
+ */
+static void
+xbm_clear_used(xattr_io_info_t *info, size_t i)
+{
+       xattr_assert(xbm_valid_index(info, i));
+       bitmap_clear_range(info->free_bm, i, 1);
+}
+
+/*
+ * Return whether the given bitmap index is a valid index into `info's free
+ * bitmap.
+ */
+static bool
+xbm_valid_index(const xattr_io_info_t *info, int64_t i)
+{
+       return bm_valid_index(i, xbm_size(info));
+}
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0]))
+#endif
+
+/*
+ * Return the total number of *bits* in `info's free xattr fext bitmap.
+ */
+static size_t
+xbm_size(const xattr_io_info_t *info)
+{
+       // one bit per xattr fext
+       return ARRAY_SIZE(info->xattr_fexts);
+}
+
+/*
+ * Factory for an iterator over all the set (true value) bits in `bitmap'.
+ * `len' is the length in *bits* of `bitmap'.
+ *
+ * The iterator is created in an uninitialized state, a call to xbm_iter_next()
+ * is required to find the first set bit (this is different than
+ * make_fext_iterator()). As a consequence of this, an iterator may iterate
+ * zero times if no bits within `bitmap' are set. After each successful call to
+ * xbm_iter_next(), xbm_iter_peek() will return the zero-based index of each
+ * set bit.
+ *
+ * The intended use is along the lines of:
+ * uint64_t *bm = ...; // a bitmap 123 bits (two uint64_ts) long
+ * xbm_iterator_t iter = xbm_make_iter(bm, 123);
+ * while(xbm_iter_next(&iter)) {
+ *   size_t i = xbm_iter_peek(&iter);
+ *   printf("index %lu is set\n", i);
+ * }
+ *
+ * In terms of the iterator internals, we hold the invariant that a valid
+ * iterator always has `i' in [0, len). A valid iterator is one for which
+ * xbm_iter_peek() will return an index in range of `bitmap'. To bootstrap the
+ * first iteration, `i' is set to SIZE_MAX; further details are in
+ * xbm_iter_next().
+ */
+static xbm_iter_t
+xbm_make_iter(const uint64_t *bitmap, size_t len)
+{
+       xattr_assert(len);
+       return (xbm_iter_t) {
+               .bitmap = bitmap,
+               .i = SIZE_MAX, // This will overflow-to-zero on the first call to xbm_iter_next()
+               .len = len
+       };
+}
+
+/*
+ * Advance `iter' to internally reference the next set bit within the bitmap.
+ * If there are no more set bits, this returns false.
+ *
+ * On success, the index of the next set bit can be retrieved by
+ * xbm_iter_peek().
+ *
+ * Internally, this searches for the first bit set at bit index >= iter->i+1.
+ * For a new iterator, the value of `i' is initialized to SIZE_MAX so `i'+1
+ * will unsigned integer overflow (which is well defined) to zero.
+ */
+static bool
+xbm_iter_next(xbm_iter_t *iter)
+{
+       size_t i;
+       // find the next set bit > `i'
+       const bool found = bm_find(iter->bitmap, iter->i + 1, iter->len, true, &i);
+
+       // if no bit is found, invalidate the iterator by setting i=len
+       iter->i = found ? i : iter->len;
+       return found;
+}
+
+/*
+ * Return the index a the set bit after a successful call to xbm_iter_next().
+ */
+static size_t
+xbm_iter_peek(const xbm_iter_t *iter)
+{
+       xattr_assert(iter->i < iter->len);
+       return iter->i;
+}
+
+/*
+ * Search for the first bit with `value' within bitmap `bm' >= bit index `from'
+ * for at most `len' *bits*. Whether such a bit exists is returned and if
+ * that's the case, its bit index is written via `out'.
+ *
+ * This is just a fancy wrapper around bitmap_range_find_first().
+ */
+static bool
+bm_find(const uint64_t *bm, size_t from, size_t len, bool value, size_t *out)
+{
+       xattr_assert(bm_valid_index(from, len));
+
+       // search for `value' in [from, len)
+       int64_t i;
+       if (!bitmap_range_find_first(value,
+                               const_cast(uint64_t *, bm), from, len, &i)) {
+               return false;
+       }
+
+       // some bit found; check the returned index is valid
+       xattr_assert(bm_valid_index(i, len));
+       *out = (size_t)i;
+       return true;
+}
+
+/*
+ * Return true if `i' is a valid index into a bit of `len' bits.
+ *
+ * The underlying bitmap_ routines operate on `int64_t' indices. This is
+ * mainly to safely convert to `size_t'.
+ */
+static bool
+bm_valid_index(int64_t i, size_t len)
+{
+       return (i >= 0) && ((uint64_t)i < len);
+}
+
+/*
+ * Virtualize `uio' offset to target xattr `fext' before a call to
+ * cluster_xattr().
+ *
+ * The computation of the IO offset is somewhat subtle. The reason for this
+ * fact largely has to do with how exactly the single xattr vnode
+ * (hfsmp->hfs_attrdata_vp) caches data for multiple xattrs. First,
+ * some discussion on the motivation for the single xattr vnode design. At the
+ * top level, xattr apis are quite different from normal file data apis. Some
+ * key properties are:
+ * - xattr IO apis do no support editing or random reads
+ * - xattrs may not be mmapped
+ * - any one file may have an arbitrary number of xattrs
+ * To contrast with a normal file, each file has a corresponding vnode which in
+ * turn has its own private ubc. The only way in which xattrs are actually like
+ * files is that they the have the same size limits and in
+ * terms of their implementation, they use the same on-disk structures.
+ * The result of this is that it is too high overhead to have one vnode per
+ * xattr, but to instead to maintain a disk block-type cache for xattr data.
+ * This cache is implemented as the ubc of a virtual file known as the single
+ * xattr vnode. Reads and writes are serviced by the cluster layer. The cluster
+ * layer operates in units of the vm pagesize. On a system for which
+ * pagesize > fs blocksize, then the naïve approach of using an identity
+ * mapping between ubc logical offset and device offset poses a challenge.
+ * Consider the following scenario:
+ * - 16k vm pagesize
+ * - 4k fs blocksize
+ *
+ * On disk, we have two xattrs that reside on adjacent blocks:
+ *       xattr A        xattr B
+ *          [aaaa|aaaa|bbbb|bbbb]
+ *      pbn 4    5    6    7    8
+ *
+ * Suppose we want to just read xattr A -- pbn 4 and 5 -- the cluster layer can
+ * issue just an 8k IO, but it will store it in memory as a whole page, so we
+ * would result in
+ *
+ *       xattr A        xattr B
+ * in memory:
+ *          [aaaa|aaaa|0000|0000]
+ *
+ * on disk:
+ *          [aaaa|aaaa|bbbb|bbbb]
+ *      pbn 4    5    6    7    8
+ *
+ * A subsequent read for pbn 6 or 7, as a part of xattr B, will find the page
+ * already cached in the ubc and erroneously return zeros.
+ * Instead, we could have the cluster layer issue the full 16k IO, but then we
+ * run into encryption issues on per-file (really per-xattr) key volumes
+ *
+ *       xattr A        xattr B
+ * in memory:
+ *          [aaaa|aaaa|O!#W|JF%R]
+ *
+ * on disk:
+ *          [asdf|ghjk|ZXCV|BNM,]  encrypted
+ *          [aaaa|aaaa|bbbb|bbbb]  unencrypted
+ *      pbn 4    5    6    7    8
+ *
+ * In this case, the issue is that we have the crypto state available to read
+ * xattr A, but we do not have the crypto state for xattr B, so we would
+ * incorrectly decrypt pbn 6, 7 in the same IO.
+ *
+ * The solution to this is to use a scaled mapping between ubc logical offset
+ * and device offset. In this case, we use
+ *   logical offset = physical block number * pagesize
+ * and result looks like
+ *
+ *       xattr A        xattr B
+ * in memory:
+ *          [aaaa|aaaa|0000|0000] ... [bbbb|bbbb|0000|0000]
+ *          64k                       96k
+ *
+ * on disk:
+ *          [asdf|ghjk|ZXCV|BNM,]  encrypted
+ *          [aaaa|aaaa|bbbb|bbbb]  unencrypted
+ *      pbn 4    5    6    7    8
+ *
+ * In memory, xattr A occupies the virtual range of [64k, 96k), but its
+ * contents are representable in the first 8k out of [64k, 80k). Note that the
+ * mapping here is not per block but rather per xattr file extent. The contents
+ * tracked by an individual file extent are logically contiguous in memory. In
+ * the example above, xattr A has one file extent spanning [0, 8k). Suppose it
+ * instead had two file extents -- [0, 4k) at pbn 4 and [4k, 8k) at pbn 5 --
+ * the above diagram would instead look like
+ * in memory:
+ *          [aaaa|0000|0000|0000][aaaa|0000|0000|0000]
+ *          64k                  80k
+ * on disk:
+ *          [aaaa|aaaa]            unencrypted
+ *      pbn 4    5
+ *
+ * This scaled mapping approach guarantees that xattrs are always on different
+ * pages from other xattrs, but it comes at an increased memory cost for
+ * non-page multiple sized xattrs.
+ * --
+ *
+ * If `fext' is not representable as a virtual offset (e.g. its phys_block_num
+ * is corrupt), this function returns false.
+ */
+static bool
+uio_set_fext(uio_t uio, const HFSPlusExtentDescriptor *fext, uint32_t fs_bsize)
+{
+       uint64_t off;
+       if (!fext2cluster_check(fext, fs_bsize, &off)) {
+               // `fext' is out of range
+               return false;
+       }
+
+       uio_setoffset(uio, off);
+       return true;
+}
+#endif  // NEW_XATTR
  /*
   * Read an extent based attribute.
   */
@@ -2391,6 +3133,7 @@ static int
  read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtentDescriptor *extents)
  {
         vnode_t evp = hfsmp->hfs_attrdata_vp;
+       off_t filesize;
         int bufsize;
         int64_t iosize;
         int attrsize;
@@ -2403,7 +3146,16 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent
         bufsize = (int)uio_resid(uio);
         attrsize = (int)datasize;
         blksize = (int)hfsmp->blockSize;
+       filesize = VTOF(evp)->ff_size;
  
+#if NEW_XATTR
+       // allocate an xattr fext for tls through the cluster layer
+       const HFSPlusExtentDescriptor **xattr_fext;
+       if (!(xattr_fext = xattr_fext_alloc(&hfsmp->hfs_xattr_io))) {
+               result = ENOMEM;
+               goto exit;
+       }
+#endif
         /*
          * Read the attribute data one extent at a time.
          * For the typical case there is only one extent.
@@ -2413,9 +3165,28 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent
                 iosize = MIN(iosize, attrsize);
                 iosize = MIN(iosize, bufsize);
                 uio_setresid(uio, iosize);
+#if NEW_XATTR
+               // virtualize the IO offset to target this fext
+               if (!uio_set_fext(uio, &extents[i], blksize)) {
+                       // `fext' is corrupted
+                       result = EILSEQ;
+                       break;
+               }
+
+               // stage the next xattr fext for IO
+               xattr_fext_set(&hfsmp->hfs_xattr_io, xattr_fext, &extents[i]);
+
+               // Set filesize to end of data read to prevent cluster read-ahead
+               filesize = uio_offset(uio) + iosize;
+#else
                 uio_setoffset(uio, (u_int64_t)extents[i].startBlock * (u_int64_t)blksize);
+#endif
+               result = cluster_read(evp, uio, filesize, IO_SYNC | IO_UNIT);
  
-               result = cluster_read(evp, uio, VTOF(evp)->ff_size, IO_SYNC | IO_UNIT);
+#if NEW_XATTR
+               // post IO, unstage this xattr fext
+               xattr_fext_clear(&hfsmp->hfs_xattr_io, xattr_fext);
+#endif
  
  #if HFS_XATTR_VERBOSE
                 printf("hfs: read_attr_data: cr iosize %lld [%d, %d] (%d)\n",
@@ -2429,6 +3200,11 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent
         uio_setresid(uio, bufsize);
         uio_setoffset(uio, datasize);
  
+#if NEW_XATTR
+       xattr_fext_free(&hfsmp->hfs_xattr_io, xattr_fext);
+
+exit:
+#endif
         hfs_unlock_truncate(VTOC(evp), HFS_LOCK_DEFAULT);
         return (result);
  }
@@ -2436,7 +3212,7 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent
  /*
   * Write an extent based attribute.
   */
-__unused static int
+static int
  write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtentDescriptor *extents)
  {
         vnode_t evp = hfsmp->hfs_attrdata_vp;
@@ -2455,6 +3231,15 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten
         blksize = (int) hfsmp->blockSize;
         filesize = VTOF(evp)->ff_size;
  
+#if NEW_XATTR
+       // allocate an xattr fext for tls through the cluster layer
+       const HFSPlusExtentDescriptor **xattr_fext;
+       if (!(xattr_fext = xattr_fext_alloc(&hfsmp->hfs_xattr_io))) {
+               result = ENOMEM;
+               goto exit;
+       }
+#endif
+
         /*
          * Write the attribute data one extent at a time.
          */
@@ -2463,10 +3248,29 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten
                 iosize = MIN(iosize, attrsize);
                 iosize = MIN(iosize, bufsize);
                 uio_setresid(uio, iosize);
-               uio_setoffset(uio, (u_int64_t)extents[i].startBlock * (u_int64_t)blksize);
+#if NEW_XATTR
+               // virtualize the IO offset to target this fext
+               if (!uio_set_fext(uio, &extents[i], blksize)) {
+                       // `fext' is corrupted
+                       result = EILSEQ;
+                       break;
+               }
+
+               // stage the next xattr fext for IO
+               xattr_fext_set(&hfsmp->hfs_xattr_io, xattr_fext, &extents[i]);
  
+               filesize = uio_offset(uio) + iosize;
+#else
+               uio_setoffset(uio, (u_int64_t)extents[i].startBlock * (u_int64_t)blksize);
+#endif
                 result = cluster_write(evp, uio, filesize, filesize, filesize,
                                        (off_t) 0, IO_SYNC | IO_UNIT);
+
+#if NEW_XATTR
+               // post IO, unstage this xattr fext
+               xattr_fext_clear(&hfsmp->hfs_xattr_io, xattr_fext);
+#endif
+
  #if HFS_XATTR_VERBOSE
                 printf("hfs: write_attr_data: cw iosize %lld [%d, %d] (%d)\n",
                         iosize, extents[i].startBlock, extents[i].blockCount, result);
@@ -2479,6 +3283,11 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten
         uio_setresid(uio, bufsize);
         uio_setoffset(uio, datasize);
  
+#if NEW_XATTR
+       xattr_fext_free(&hfsmp->hfs_xattr_io, xattr_fext);
+
+exit:
+#endif
         hfs_unlock_truncate(VTOC(evp), HFS_LOCK_DEFAULT);
         return (result);
  }
@@ -2486,7 +3295,7 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten
  /*
   * Allocate blocks for an extent based attribute.
   */
-__unused static int
+static int
  alloc_attr_blks(struct hfsmount *hfsmp, size_t attrsize, size_t extentbufsize, HFSPlusExtentDescriptor *extents, int *blocks)
  {
         int blkcnt;
diff --git a/hfs.xcodeproj/project.pbxproj b/hfs.xcodeproj/project.pbxproj

index f6998573778467b7c6b98268b78eb86b0c7caadf..1e206d75052ae18572404165320bb095838d579c 100644 (file)
--- a/hfs.xcodeproj/project.pbxproj
+++ b/hfs.xcodeproj/project.pbxproj
@@ -328,6 +328,7 @@
                 A6873B9C234286780045680B /* lf_cs_checksum.c in Sources */ = {isa = PBXBuildFile; fileRef = A64B3BFA22E8D538009A2B10 /* lf_cs_checksum.c */; };
                 A6873B9D2342868D0045680B /* lf_cs_vfsops.c in Sources */ = {isa = PBXBuildFile; fileRef = A64B3C0422E8D71B009A2B10 /* lf_cs_vfsops.c */; };
                 A6873B9F234287200045680B /* IOKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = A6873B9E234287200045680B /* IOKit.framework */; };
+               A6BE9C8C24C63369005E033C /* test-symlinks.c in Sources */ = {isa = PBXBuildFile; fileRef = A6BE9C8B24C63369005E033C /* test-symlinks.c */; };
                 A6E6D74020909C72002125B0 /* test-get-volume-create-time.c in Sources */ = {isa = PBXBuildFile; fileRef = A6E6D73F20909C72002125B0 /* test-get-volume-create-time.c */; };
                 C1B6FA0810CC0A0A00778D48 /* hfsutil_jnl.c in Sources */ = {isa = PBXBuildFile; fileRef = C1B6FA0610CC0A0A00778D48 /* hfsutil_jnl.c */; };
                 C1B6FA0910CC0A0A00778D48 /* hfsutil_main.c in Sources */ = {isa = PBXBuildFile; fileRef = C1B6FA0710CC0A0A00778D48 /* hfsutil_main.c */; };
@@ -360,7 +361,6 @@
                 D7978426205FC09A00E93B37 /* lf_hfs_endian.h in Headers */ = {isa = PBXBuildFile; fileRef = D7978424205FC09A00E93B37 /* lf_hfs_endian.h */; };
                 D79784412060037400E93B37 /* lf_hfs_raw_read_write.h in Headers */ = {isa = PBXBuildFile; fileRef = D797843F2060037400E93B37 /* lf_hfs_raw_read_write.h */; };
                 D79784422060037400E93B37 /* lf_hfs_raw_read_write.c in Sources */ = {isa = PBXBuildFile; fileRef = D79784402060037400E93B37 /* lf_hfs_raw_read_write.c */; };
-               D7B2DC81233A3F5B00F12230 /* livefiles_hfs.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 900BDED41FF919C2002F7EC0 /* livefiles_hfs.dylib */; };
                 D7BD8F9C20AC388E00E93640 /* lf_hfs_catalog.c in Sources */ = {isa = PBXBuildFile; fileRef = 906EBF82206409B800B21E94 /* lf_hfs_catalog.c */; };
                 EE73740520644328004C2F0E /* lf_hfs_sbunicode.h in Headers */ = {isa = PBXBuildFile; fileRef = EE73740320644328004C2F0E /* lf_hfs_sbunicode.h */; };
                 EE73740620644328004C2F0E /* lf_hfs_sbunicode.c in Sources */ = {isa = PBXBuildFile; fileRef = EE73740420644328004C2F0E /* lf_hfs_sbunicode.c */; };
@@ -1096,6 +1096,7 @@
                 A64B3C1622EA2C4E009A2B10 /* CoreFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreFoundation.framework; path = Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.15.Internal.sdk/System/Library/Frameworks/CoreFoundation.framework; sourceTree = DEVELOPER_DIR; };
                 A64B3C1822EA2C5E009A2B10 /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.15.Internal.sdk/System/Library/Frameworks/IOKit.framework; sourceTree = DEVELOPER_DIR; };
                 A6873B9E234287200045680B /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.14.Internal.sdk/System/Library/Frameworks/IOKit.framework; sourceTree = DEVELOPER_DIR; };
+               A6BE9C8B24C63369005E033C /* test-symlinks.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = "test-symlinks.c"; sourceTree = "<group>"; };
                 A6E6D73F20909C72002125B0 /* test-get-volume-create-time.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = "test-get-volume-create-time.c"; sourceTree = "<group>"; };
                 C1B6FA0610CC0A0A00778D48 /* hfsutil_jnl.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = hfsutil_jnl.c; sourceTree = "<group>"; };
                 C1B6FA0710CC0A0A00778D48 /* hfsutil_main.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = hfsutil_main.c; sourceTree = "<group>"; };
@@ -1862,6 +1863,7 @@
                                 09D6B7D61E317ED2003C20DC /* test_disklevel.c */,
                                 A6E6D73F20909C72002125B0 /* test-get-volume-create-time.c */,
                                 A64B3C1322E91AF6009A2B10 /* test-lf-cs-plugin.c */,
+                               A6BE9C8B24C63369005E033C /* test-symlinks.c */,
                         );
                         path = cases;
                         sourceTree = "<group>";
@@ -3101,6 +3103,7 @@
                                 FB76B3EE1B7BE24B00FA9F2B /* disk-image.m in Sources */,
                                 FB76B3F21B7BE79800FA9F2B /* systemx.c in Sources */,
                                 F90E174921ADFFD100345EE3 /* test-cas-bsdflags.c in Sources */,
+                               A6BE9C8C24C63369005E033C /* test-symlinks.c in Sources */,
                                 FB285C2A1B7E81180099B2ED /* test-sparse-dev.c in Sources */,
                                 FB55AE541B7C271000701D03 /* test-doc-tombstone.c in Sources */,
                                 FBD69AFA1B9132E40022ECAD /* test-dateadded.c in Sources */,
diff --git a/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c b/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c

index 45d1eff470135ab336955b6bbbd403a13b0aaec6..6ce7c7e7fa5c4401be8d3b6315fb60ee207f8bdb 100644 (file)
--- a/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c
+++ b/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c
@@ -785,7 +785,8 @@ hfs_vnop_preallocate(struct vnode * vp, LIFilePreallocateArgs_t* psPreAllocReq,
      hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
  
      if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
-        goto err_exit;
+        hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+        return (retval);
      }
      
      off_t filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
diff --git a/livefiles_hfs_plugin/lf_hfs_vnops.c b/livefiles_hfs_plugin/lf_hfs_vnops.c

index a30b815507c148c4fb4f7874628814a728b34d3f..d5d795faba4c1fb13b146d0811a4d13b59d0753b 100644 (file)
--- a/livefiles_hfs_plugin/lf_hfs_vnops.c
+++ b/livefiles_hfs_plugin/lf_hfs_vnops.c
@@ -37,6 +37,8 @@
  #define HFSRM_SKIP_RESERVE  0x01
  #define _PATH_RSRCFORKSPEC     "/..namedfork/rsrc"
  
+static int hfs_set_bsd_flags(struct cnode *cp, u_int32_t new_bsd_flags);
+
  void
  replace_desc(struct cnode *cp, struct cat_desc *cdp)
  {
@@ -1482,6 +1484,34 @@ out:
      return (error);
  }
  
+static int
+hfs_set_bsd_flags(struct cnode *cp, u_int32_t new_bsd_flags)
+{
+    u_int16_t *fdFlags;
+    // Currently we don't support UF_TRACKED in detonator
+    if (new_bsd_flags & UF_TRACKED)
+        new_bsd_flags &= ~UF_TRACKED;
+
+    cp->c_bsdflags = new_bsd_flags;
+    cp->c_flag |= C_MODIFIED;
+    cp->c_touch_chgtime = TRUE;
+
+    /*
+     * Mirror the UF_HIDDEN flag to the invisible bit of the Finder Info.
+     *
+     * The fdFlags for files and frFlags for folders are both 8 bytes
+     * into the userInfo (the first 16 bytes of the Finder Info).  They
+     * are both 16-bit fields.
+     */
+    fdFlags = (u_int16_t *) &cp->c_finderinfo[8];
+    if (new_bsd_flags & UF_HIDDEN)
+        *fdFlags |= OSSwapHostToBigConstInt16(kFinderInvisibleMask);
+    else
+        *fdFlags &= ~OSSwapHostToBigConstInt16(kFinderInvisibleMask);
+
+    return 0;
+}
+
  int hfs_vnop_setattr( vnode_t vp, const UVFSFileAttributes *attr )
  {
      int err = 0;
@@ -1564,7 +1594,7 @@ int hfs_vnop_setattr( vnode_t vp, const UVFSFileAttributes *attr )
  
      if ( attr->fa_validmask & UVFS_FA_VALID_BSD_FLAGS )
      {
-        cp->c_bsdflags = attr->fa_bsd_flags;
+        hfs_set_bsd_flags(cp, attr->fa_bsd_flags);
      }
  
      /*
@@ -1621,7 +1651,7 @@ hfs_update(struct vnode *vp, int options)
      struct cat_fork datafork;
      struct cat_fork rsrcfork;
      struct hfsmount *hfsmp;
-   int lockflags;
+    int lockflags;
      int error = 0;
  
      if (ISSET(cp->c_flag, C_NOEXISTS))
diff --git a/tests/cases/test-cas-bsdflags.c b/tests/cases/test-cas-bsdflags.c

index d9d0e939dfbfa72f0e96a82646ebf8f43b6f587e..cee54324e467188c999c31b401c7cd13c7122869 100644 (file)
--- a/tests/cases/test-cas-bsdflags.c
+++ b/tests/cases/test-cas-bsdflags.c
@@ -1,4 +1,5 @@
  #include <unistd.h>
+#include <errno.h>
  #include <fcntl.h>
  #include <stdio.h>
  #include <sys/mman.h>
@@ -16,11 +17,14 @@
  #include "hfs-tests.h"
  #include "test-utils.h"
  #include "disk-image.h"
+#include "systemx.h"
  
-//TEST(cas_bsdflags)
+#define AFSCUTIL       "/usr/local/bin/afscutil"
+
+TEST(cas_bsdflags)
  
  static bool
-cas_bsd_flags(int fd, uint32_t expected_flags, uint32_t new_flags)
+cas_bsd_flags(int fd, uint32_t expected_flags, uint32_t new_flags, int expected_error)
  {
         struct fsioc_cas_bsdflags cas;
  
@@ -28,10 +32,34 @@ cas_bsd_flags(int fd, uint32_t expected_flags, uint32_t new_flags)
         cas.new_flags      = new_flags;
         cas.actual_flags   = ~0;                /* poison */
  
-       assert_no_err(ffsctl(fd, FSIOC_CAS_BSDFLAGS, &cas, 0));
+       if (expected_error != 0) {
+               // no assert_call_fail() in test_hfs
+               assert(ffsctl(fd, FSIOC_CAS_BSDFLAGS, &cas, 0) == -1);
+               assert(errno == EPERM);
+               return true; // as expected - flags were not changed
+       } else {
+               assert_no_err(ffsctl(fd, FSIOC_CAS_BSDFLAGS, &cas, 0));
+       }
+
         return (cas.expected_flags == cas.actual_flags);
  }
  
+static void
+write_compressible_data(int fd)
+{
+       // adapted from test_clonefile in apfs
+       char dbuf[4096];
+
+       // write some easily compressable data
+       memset(dbuf + 0*(sizeof(dbuf)/4), 'A', sizeof(dbuf)/4);
+       memset(dbuf + 1*(sizeof(dbuf)/4), 'B', sizeof(dbuf)/4);
+       memset(dbuf + 2*(sizeof(dbuf)/4), 'C', sizeof(dbuf)/4);
+       memset(dbuf + 3*(sizeof(dbuf)/4), 'D', sizeof(dbuf)/4);
+       for (int idx = 0; idx < 32; idx++) {
+               check_io(write(fd, dbuf, sizeof(dbuf)), sizeof(dbuf));
+       }
+}
+
  int run_cas_bsdflags(__unused test_ctx_t *ctx)
  {
         disk_image_t *di = disk_image_get();
@@ -46,19 +74,43 @@ int run_cas_bsdflags(__unused test_ctx_t *ctx)
  
         assert_no_err(fchflags(fd, UF_HIDDEN));
         assert_no_err(fstat(fd, &sb));
-       assert(sb.st_flags == UF_HIDDEN);
+       assert_equal_int(sb.st_flags, UF_HIDDEN);
+
+       assert(cas_bsd_flags(fd, 0, UF_NODUMP, 0) == false);
+       assert_no_err(fstat(fd, &sb));
+       assert_equal_int(sb.st_flags, UF_HIDDEN);
+
+       assert(cas_bsd_flags(fd, UF_HIDDEN, UF_NODUMP, 0) == true);
+       assert_no_err(fstat(fd, &sb));
+       assert_equal_int(sb.st_flags, UF_NODUMP);
+
+       assert(cas_bsd_flags(fd, UF_NODUMP, 0, 0) == true);
+       assert_no_err(fstat(fd, &sb));
+       assert_equal_int(sb.st_flags, 0);
+
+       // Add some data to our (non-compressed) file,
+       // mark it with UF_COMPRESSED,
+       // and check that UF_COMPRESSED is *not* set -
+       // as there is no decmpfs xattr present.
+       check_io(write(fd, "J", 1), 1);
+       assert_no_err(fstat(fd, &sb));
+       assert(sb.st_size > 0);
  
-       assert(cas_bsd_flags(fd, 0, UF_NODUMP) == false);
+       assert(cas_bsd_flags(fd, 0, UF_COMPRESSED, EPERM) == true);
         assert_no_err(fstat(fd, &sb));
-       assert(sb.st_flags == UF_HIDDEN);
+       assert_equal_int(sb.st_flags, 0);
  
-       assert(cas_bsd_flags(fd, UF_HIDDEN, UF_NODUMP) == true);
+       // Now, add some compressible data to the file and compress it using afscutil.
+       write_compressible_data(fd);
+       assert(!systemx(AFSCUTIL, "-c", file, NULL));
         assert_no_err(fstat(fd, &sb));
-       assert(sb.st_flags == UF_NODUMP);
+       assert_equal_int(sb.st_flags, UF_COMPRESSED);
  
-       assert(cas_bsd_flags(fd, UF_NODUMP, 0) == true);
+       // Now, remove UF_COMPRESSED from our file and
+       // check that the file is 0-length.
+       assert(cas_bsd_flags(fd, UF_COMPRESSED, 0, 0) == true);
         assert_no_err(fstat(fd, &sb));
-       assert(sb.st_flags == 0);
+       assert_equal_ll(sb.st_size, 0);
  
         close(fd);
         assert_no_err(unlink(file));
diff --git a/tests/cases/test-symlinks.c b/tests/cases/test-symlinks.c

new file mode 100644 (file)

index 0000000..8fa0da1
--- /dev/null
+++ b/tests/cases/test-symlinks.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * <rdar://problem/65693863>  UNIX Conformance | hfs_vnop_symlink should not validate empty path.
+ */
+#include <unistd.h>
+#include <sys/stat.h>
+
+
+#include "hfs-tests.h"
+#include "test-utils.h"
+#include "disk-image.h"
+
+#define SYMPLINK_TEST_DIR "symlink.testdir"
+#define SYMLINK_EMPTYSTR "symlink.emptystr"
+TEST(symlinks)
+
+int run_symlinks(__unused test_ctx_t *ctx)
+{
+       disk_image_t *di;
+       struct stat statb;
+       char *parent_dir, *slink;
+       char buf;
+
+       di = disk_image_get();
+
+       //
+       // Create a parent directory to host our test.
+       //
+       asprintf(&parent_dir, "%s/"SYMPLINK_TEST_DIR, di->mount_point);
+       assert(!mkdir(parent_dir, 0777) || errno == EEXIST);
+
+       //
+       // Now check to make sure we support creating a symlink with an empty
+       // target required for UNIX Conformance.
+       //
+       asprintf(&slink, "%s/"SYMLINK_EMPTYSTR, parent_dir);
+       assert_no_err(symlink("", slink));
+
+       //
+       // Test that symlink has "l" as the S_ISLNK flag using lstat
+       //
+       memset(&statb, 0, sizeof(statb));
+       assert(!(lstat(slink, &statb) < 0 ));
+       assert(S_ISLNK(statb.st_mode));
+
+       //
+       // Test that readlink returns zero.
+       //
+       assert(!readlink(slink, &buf, 1));
+
+       //
+       // Delete test symlink, test directory and release all resources.
+       //
+       unlink(slink);
+       unlink(parent_dir);
+       free(slink);
+       free(parent_dir);
+       return 0;
+}
author	Apple <opensource@apple.com>
	Tue, 20 Apr 2021 00:35:11 +0000 (00:35 +0000)
committer	Apple <opensource@apple.com>
	Tue, 20 Apr 2021 00:35:11 +0000 (00:35 +0000)
core/hfs.h		patch \| blob \| blame \| history
core/hfs_readwrite.c		patch \| blob \| blame \| history
core/hfs_vfsops.c		patch \| blob \| blame \| history
core/hfs_vnops.c		patch \| blob \| blame \| history
core/hfs_xattr.c		patch \| blob \| blame \| history
hfs.xcodeproj/project.pbxproj		patch \| blob \| blame \| history
livefiles_hfs_plugin/lf_hfs_readwrite_ops.c		patch \| blob \| blame \| history
livefiles_hfs_plugin/lf_hfs_vnops.c		patch \| blob \| blame \| history
tests/cases/test-cas-bsdflags.c		patch \| blob \| blame \| history
tests/cases/test-symlinks.c	[new file with mode: 0644]	patch \| blob