objc4-818.2.tar.gz

[apple/objc4.git] / runtime / objc-cache.mm
diff --git a/runtime/objc-cache.mm b/runtime/objc-cache.mm

index 7602fe07e015244d99d6865ca8953ac2026e7125..213d14718d71f1b0bd020d83f5b9c1b2e67b5881 100644 (file)
--- a/runtime/objc-cache.mm
+++ b/runtime/objc-cache.mm
@@ -63,14 +63,12 @@
   * objc_msgSend*
   * cache_getImp
   *
- * Cache writers (hold cacheUpdateLock while reading or writing; not PC-checked)
- * cache_fill         (acquires lock)
- * cache_expand       (only called from cache_fill)
- * cache_create       (only called from cache_expand)
- * bcopy               (only called from instrumented cache_expand)
- * flush_caches        (acquires lock)
- * cache_flush        (only called from cache_fill and flush_caches)
- * cache_collect_free (only called from cache_expand and cache_flush)
+ * Cache readers/writers (hold cacheUpdateLock during access; not PC-checked)
+ * cache_t::copyCacheNolock    (caller must hold the lock)
+ * cache_t::eraseNolock        (caller must hold the lock)
+ * cache_t::collectNolock      (caller must hold the lock)
+ * cache_t::insert             (acquires lock)
+ * cache_t::destroy            (acquires lock)
   *
   * UNPROTECTED cache readers (NOT thread-safe; used for debug info only)
   * cache_print
@@ -84,19 +82,96 @@
  #if __OBJC2__
  
  #include "objc-private.h"
-#include "objc-cache.h"
  
+#if TARGET_OS_OSX
+#include <Cambria/Traps.h>
+#include <Cambria/Cambria.h>
+#endif
+
+#if __arm__  ||  __x86_64__  ||  __i386__
+
+// objc_msgSend has few registers available.
+// Cache scan increments and wraps at special end-marking bucket.
+#define CACHE_END_MARKER 1
+
+// Historical fill ratio of 75% (since the new objc runtime was introduced).
+static inline mask_t cache_fill_ratio(mask_t capacity) {
+    return capacity * 3 / 4;
+}
+
+#elif __arm64__ && !__LP64__
+
+// objc_msgSend has lots of registers available.
+// Cache scan decrements. No end marker needed.
+#define CACHE_END_MARKER 0
+
+// Historical fill ratio of 75% (since the new objc runtime was introduced).
+static inline mask_t cache_fill_ratio(mask_t capacity) {
+    return capacity * 3 / 4;
+}
+
+#elif __arm64__ && __LP64__
+
+// objc_msgSend has lots of registers available.
+// Cache scan decrements. No end marker needed.
+#define CACHE_END_MARKER 0
+
+// Allow 87.5% fill ratio in the fast path for all cache sizes.
+// Increasing the cache fill ratio reduces the fragmentation and wasted space
+// in imp-caches at the cost of potentially increasing the average lookup of
+// a selector in imp-caches by increasing collision chains. Another potential
+// change is that cache table resizes / resets happen at different moments.
+static inline mask_t cache_fill_ratio(mask_t capacity) {
+    return capacity * 7 / 8;
+}
+
+// Allow 100% cache utilization for smaller cache sizes. This has the same
+// advantages and disadvantages as the fill ratio. A very large percentage
+// of caches end up with very few entries and the worst case of collision
+// chains in small tables is relatively small.
+// NOTE: objc_msgSend properly handles a cache lookup with a full cache.
+#define CACHE_ALLOW_FULL_UTILIZATION 1
+
+#else
+#error unknown architecture
+#endif
  
  /* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
  enum {
+#if CACHE_END_MARKER || (__arm64__ && !__LP64__)
+    // When we have a cache end marker it fills a bucket slot, so having a
+    // initial cache size of 2 buckets would not be efficient when one of the
+    // slots is always filled with the end marker. So start with a cache size
+    // 4 buckets.
      INIT_CACHE_SIZE_LOG2 = 2,
-    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2)
+#else
+    // Allow an initial bucket size of 2 buckets, since a large number of
+    // classes, especially metaclasses, have very few imps, and we support
+    // the ability to fill 100% of the cache before resizing.
+    INIT_CACHE_SIZE_LOG2 = 1,
+#endif
+    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2),
+    MAX_CACHE_SIZE_LOG2  = 16,
+    MAX_CACHE_SIZE       = (1 << MAX_CACHE_SIZE_LOG2),
+    FULL_UTILIZATION_CACHE_SIZE_LOG2 = 3,
+    FULL_UTILIZATION_CACHE_SIZE = (1 << FULL_UTILIZATION_CACHE_SIZE_LOG2),
  };
  
-static void cache_collect_free(struct bucket_t *data, mask_t capacity);
  static int _collecting_in_critical(void);
  static void _garbage_make_room(void);
  
+#if DEBUG_TASK_THREADS
+static kern_return_t objc_task_threads
+(
+       task_t target_task,
+       thread_act_array_t *act_list,
+       mach_msg_type_number_t *act_listCnt
+);
+#endif
+
+#if DEBUG_TASK_THREADS
+#undef HAVE_TASK_RESTARTABLE_RANGES
+#endif
  
  /***********************************************************************
  * Cache statistics for OBJC_PRINT_CACHE_SETUP
@@ -147,56 +222,42 @@ asm("\n .section __TEXT,__const"
      "\n .globl __objc_empty_vtable"
      "\n .set __objc_empty_vtable, 0"
      "\n .globl __objc_empty_cache"
+#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
+    "\n .align 4"
+    "\n L__objc_empty_cache: .space " stringize2(EMPTY_BYTES)
+    "\n .set __objc_empty_cache, L__objc_empty_cache + 0xf"
+#else
      "\n .align 3"
      "\n __objc_empty_cache: .space " stringize2(EMPTY_BYTES)
+#endif
      );
  
+#if CONFIG_USE_PREOPT_CACHES
+__attribute__((used, section("__DATA_CONST,__objc_scoffs")))
+uintptr_t objc_opt_offsets[__OBJC_OPT_OFFSETS_COUNT];
+#endif
  
-#if __arm__  ||  __x86_64__  ||  __i386__
-// objc_msgSend has few registers available.
-// Cache scan increments and wraps at special end-marking bucket.
-#define CACHE_END_MARKER 1
+#if CACHE_END_MARKER
  static inline mask_t cache_next(mask_t i, mask_t mask) {
      return (i+1) & mask;
  }
-
  #elif __arm64__
-// objc_msgSend has lots of registers available.
-// Cache scan decrements. No end marker needed.
-#define CACHE_END_MARKER 0
  static inline mask_t cache_next(mask_t i, mask_t mask) {
      return i ? i-1 : mask;
  }
-
  #else
-#error unknown architecture
+#error unexpected configuration
  #endif
  
  
-// copied from dispatch_atomic_maximally_synchronizing_barrier
-// fixme verify that this barrier hack does in fact work here
-#if __x86_64__
-#define mega_barrier() \
-    do { unsigned long _clbr; __asm__ __volatile__( \
-        "cpuid" \
-        : "=a" (_clbr) : "0" (0) : "rbx", "rcx", "rdx", "cc", "memory" \
-                                                    ); } while(0)
-
-#elif __i386__
-#define mega_barrier() \
-    do { unsigned long _clbr; __asm__ __volatile__( \
-        "cpuid" \
-        : "=a" (_clbr) : "0" (0) : "ebx", "ecx", "edx", "cc", "memory" \
-                                                    ); } while(0)
-
-#elif __arm__  ||  __arm64__
+// mega_barrier doesn't really work, but it works enough on ARM that
+// we leave well enough alone and keep using it there.
+#if __arm__
  #define mega_barrier() \
      __asm__ __volatile__( \
          "dsb    ish" \
          : : : "memory")
  
-#else
-#error unknown architecture
  #endif
  
  #if __arm64__
@@ -245,44 +306,41 @@ ldp(uintptr_t& onep, uintptr_t& twop, const void *srcp)
  
  static inline mask_t cache_hash(SEL sel, mask_t mask) 
  {
-    return (mask_t)(uintptr_t)sel & mask;
-}
-
-cache_t *getCache(Class cls) 
-{
-    assert(cls);
-    return &cls->cache;
+    uintptr_t value = (uintptr_t)sel;
+#if CONFIG_USE_PREOPT_CACHES
+    value ^= value >> 7;
+#endif
+    return (mask_t)(value & mask);
  }
  
  #if __arm64__
  
-template<Atomicity atomicity>
-void bucket_t::set(SEL newSel, IMP newImp)
+template<Atomicity atomicity, IMPEncoding impEncoding>
+void bucket_t::set(bucket_t *base, SEL newSel, IMP newImp, Class cls)
  {
-    assert(_sel == 0  ||  _sel == newSel);
+    ASSERT(_sel.load(memory_order_relaxed) == 0 ||
+           _sel.load(memory_order_relaxed) == newSel);
  
      static_assert(offsetof(bucket_t,_imp) == 0 &&
                    offsetof(bucket_t,_sel) == sizeof(void *),
                    "bucket_t layout doesn't match arm64 bucket_t::set()");
  
-    uintptr_t signedImp = signIMP(newImp, newSel);
+    uintptr_t encodedImp = (impEncoding == Encoded
+                            ? encodeImp(base, newImp, newSel, cls)
+                            : (uintptr_t)newImp);
  
-    if (atomicity == Atomic) {
-        // LDP/STP guarantees that all observers get
-        // either imp/sel or newImp/newSel
-        stp(signedImp, (uintptr_t)newSel, this);
-    } else {
-        _sel = newSel;
-        _imp = signedImp;
-    }
+    // LDP/STP guarantees that all observers get
+    // either imp/sel or newImp/newSel
+    stp(encodedImp, (uintptr_t)newSel, this);
  }
  
  #else
  
-template<Atomicity atomicity>
-void bucket_t::set(SEL newSel, IMP newImp)
+template<Atomicity atomicity, IMPEncoding impEncoding>
+void bucket_t::set(bucket_t *base, SEL newSel, IMP newImp, Class cls)
  {
-    assert(_sel == 0  ||  _sel == newSel);
+    ASSERT(_sel.load(memory_order_relaxed) == 0 ||
+           _sel.load(memory_order_relaxed) == newSel);
  
      // objc_msgSend uses sel and imp with no locks.
      // It is safe for objc_msgSend to see new imp but NULL sel
@@ -290,17 +348,198 @@ void bucket_t::set(SEL newSel, IMP newImp)
      // It is unsafe for objc_msgSend to see old imp and new sel.
      // Therefore we write new imp, wait a lot, then write new sel.
      
-    _imp = (uintptr_t)newImp;
-    
-    if (_sel != newSel) {
-        if (atomicity == Atomic) {
+    uintptr_t newIMP = (impEncoding == Encoded
+                        ? encodeImp(base, newImp, newSel, cls)
+                        : (uintptr_t)newImp);
+
+    if (atomicity == Atomic) {
+        _imp.store(newIMP, memory_order_relaxed);
+        
+        if (_sel.load(memory_order_relaxed) != newSel) {
+#ifdef __arm__
              mega_barrier();
+            _sel.store(newSel, memory_order_relaxed);
+#elif __x86_64__ || __i386__
+            _sel.store(newSel, memory_order_release);
+#else
+#error Don't know how to do bucket_t::set on this architecture.
+#endif
+        }
+    } else {
+        _imp.store(newIMP, memory_order_relaxed);
+        _sel.store(newSel, memory_order_relaxed);
+    }
+}
+
+#endif
+
+void cache_t::initializeToEmpty()
+{
+    _bucketsAndMaybeMask.store((uintptr_t)&_objc_empty_cache, std::memory_order_relaxed);
+    _originalPreoptCache.store(nullptr, std::memory_order_relaxed);
+}
+
+#if CONFIG_USE_PREOPT_CACHES
+/*
+ * The shared cache builder will sometimes have prebuilt an IMP cache
+ * for the class and left a `preopt_cache_t` pointer in _originalPreoptCache.
+ *
+ * However we have this tension:
+ * - when the class is realized it has to have a cache that can't resolve any
+ *   selector until the class is properly initialized so that every
+ *   caller falls in the slowpath and synchronizes with the class initializing,
+ * - we need to remember that cache pointer and we have no space for that.
+ *
+ * The caches are designed so that preopt_cache::bit_one is set to 1,
+ * so we "disguise" the pointer so that it looks like a cache of capacity 1
+ * where that bit one aliases with where the top bit of a SEL in the bucket_t
+ * would live:
+ *
+ * +----------------+----------------+
+ * |      IMP       |       SEL      | << a bucket_t
+ * +----------------+----------------+--------------...
+ * preopt_cache_t >>|               1| ...
+ *                  +----------------+--------------...
+ *
+ * The shared cache guarantees that there's valid memory to read under "IMP"
+ *
+ * This lets us encode the original preoptimized cache pointer during
+ * initialization, and we can reconstruct its original address and install
+ * it back later.
+ */
+void cache_t::initializeToPreoptCacheInDisguise(const preopt_cache_t *cache)
+{
+    // preopt_cache_t::bit_one is 1 which sets the top bit
+    // and is never set on any valid selector
+
+    uintptr_t value = (uintptr_t)cache + sizeof(preopt_cache_t) -
+            (bucket_t::offsetOfSel() + sizeof(SEL));
+
+    _originalPreoptCache.store(nullptr, std::memory_order_relaxed);
+    setBucketsAndMask((bucket_t *)value, 0);
+    _occupied = cache->occupied;
+}
+
+void cache_t::maybeConvertToPreoptimized()
+{
+    const preopt_cache_t *cache = disguised_preopt_cache();
+
+    if (cache == nil) {
+        return;
+    }
+
+    if (!cls()->allowsPreoptCaches() ||
+            (cache->has_inlines && !cls()->allowsPreoptInlinedSels())) {
+        if (PrintCaches) {
+            _objc_inform("CACHES: %sclass %s: dropping cache (from %s)",
+                         cls()->isMetaClass() ? "meta" : "",
+                         cls()->nameForLogging(), "setInitialized");
          }
-        _sel = newSel;
+        return setBucketsAndMask(emptyBuckets(), 0);
      }
+
+    uintptr_t value = (uintptr_t)&cache->entries;
+#if __has_feature(ptrauth_calls)
+    value = (uintptr_t)ptrauth_sign_unauthenticated((void *)value,
+            ptrauth_key_process_dependent_data, (uintptr_t)cls());
+#endif
+    value |= preoptBucketsHashParams(cache) | preoptBucketsMarker;
+    _bucketsAndMaybeMask.store(value, memory_order_relaxed);
+    _occupied = cache->occupied;
+}
+
+void cache_t::initializeToEmptyOrPreoptimizedInDisguise()
+{
+    if (os_fastpath(!DisablePreoptCaches)) {
+        if (!objc::dataSegmentsRanges.inSharedCache((uintptr_t)this)) {
+            if (dyld_shared_cache_some_image_overridden()) {
+                // If the system has roots, then we must disable preoptimized
+                // caches completely. If a class in another image has a
+                // superclass in the root, the offset to the superclass will
+                // be wrong. rdar://problem/61601961
+                cls()->setDisallowPreoptCachesRecursively("roots");
+            }
+            return initializeToEmpty();
+        }
+
+        auto cache = _originalPreoptCache.load(memory_order_relaxed);
+        if (cache) {
+            return initializeToPreoptCacheInDisguise(cache);
+        }
+    }
+
+    return initializeToEmpty();
  }
  
+const preopt_cache_t *cache_t::preopt_cache() const
+{
+    auto addr = _bucketsAndMaybeMask.load(memory_order_relaxed);
+    addr &= preoptBucketsMask;
+#if __has_feature(ptrauth_calls)
+#if __BUILDING_OBJCDT__
+    addr = (uintptr_t)ptrauth_strip((preopt_cache_entry_t *)addr,
+            ptrauth_key_process_dependent_data);
+#else
+    addr = (uintptr_t)ptrauth_auth_data((preopt_cache_entry_t *)addr,
+            ptrauth_key_process_dependent_data, (uintptr_t)cls());
+#endif
  #endif
+    return (preopt_cache_t *)(addr - sizeof(preopt_cache_t));
+}
+
+const preopt_cache_t *cache_t::disguised_preopt_cache() const
+{
+    bucket_t *b = buckets();
+    if ((intptr_t)b->sel() >= 0) return nil;
+
+    uintptr_t value = (uintptr_t)b + bucket_t::offsetOfSel() + sizeof(SEL);
+    return (preopt_cache_t *)(value - sizeof(preopt_cache_t));
+}
+
+Class cache_t::preoptFallbackClass() const
+{
+    return (Class)((uintptr_t)cls() + preopt_cache()->fallback_class_offset);
+}
+
+bool cache_t::isConstantOptimizedCache(bool strict, uintptr_t empty_addr) const
+{
+    uintptr_t addr = _bucketsAndMaybeMask.load(memory_order_relaxed);
+    if (addr & preoptBucketsMarker) {
+        return true;
+    }
+    if (strict) {
+        return false;
+    }
+    return mask() == 0 && addr != empty_addr;
+}
+
+bool cache_t::shouldFlush(SEL sel, IMP imp) const
+{
+    // This test isn't backwards: disguised caches aren't "strict"
+    // constant optimized caches
+    if (!isConstantOptimizedCache(/*strict*/true)) {
+        const preopt_cache_t *cache = disguised_preopt_cache();
+        if (cache) {
+            uintptr_t offs = (uintptr_t)sel - (uintptr_t)@selector(🤯);
+            uintptr_t slot = ((offs >> cache->shift) & cache->mask);
+            auto &entry = cache->entries[slot];
+
+            return entry.sel_offs == offs &&
+                (uintptr_t)cls() - entry.imp_offs ==
+                (uintptr_t)ptrauth_strip(imp, ptrauth_key_function_pointer);
+        }
+    }
+
+    return cache_getImp(cls(), sel) == imp;
+}
+
+bool cache_t::isConstantOptimizedCacheWithInlinedSels() const
+{
+    return isConstantOptimizedCache(/* strict */true) && preopt_cache()->has_inlines;
+}
+#endif // CONFIG_USE_PREOPT_CACHES
+
+#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
  
  void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
  {
@@ -311,83 +550,135 @@ void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
      // Therefore we write new buckets, wait a lot, then write new mask.
      // objc_msgSend reads mask first, then buckets.
  
+#ifdef __arm__
      // ensure other threads see buckets contents before buckets pointer
      mega_barrier();
  
-    _buckets = newBuckets;
-    
+    _bucketsAndMaybeMask.store((uintptr_t)newBuckets, memory_order_relaxed);
+
      // ensure other threads see new buckets before new mask
      mega_barrier();
-    
-    _mask = newMask;
+
+    _maybeMask.store(newMask, memory_order_relaxed);
      _occupied = 0;
+#elif __x86_64__ || i386
+    // ensure other threads see buckets contents before buckets pointer
+    _bucketsAndMaybeMask.store((uintptr_t)newBuckets, memory_order_release);
+
+    // ensure other threads see new buckets before new mask
+    _maybeMask.store(newMask, memory_order_release);
+    _occupied = 0;
+#else
+#error Don't know how to do setBucketsAndMask on this architecture.
+#endif
  }
  
+mask_t cache_t::mask() const
+{
+    return _maybeMask.load(memory_order_relaxed);
+}
+
+#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16 || CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
  
-struct bucket_t *cache_t::buckets() 
+void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
  {
-    return _buckets; 
+    uintptr_t buckets = (uintptr_t)newBuckets;
+    uintptr_t mask = (uintptr_t)newMask;
+
+    ASSERT(buckets <= bucketsMask);
+    ASSERT(mask <= maxMask);
+
+    _bucketsAndMaybeMask.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, memory_order_relaxed);
+    _occupied = 0;
  }
  
-mask_t cache_t::mask() 
+mask_t cache_t::mask() const
  {
-    return _mask; 
+    uintptr_t maskAndBuckets = _bucketsAndMaybeMask.load(memory_order_relaxed);
+    return maskAndBuckets >> maskShift;
  }
  
-mask_t cache_t::occupied() 
+#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
+
+void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
  {
-    return _occupied;
+    uintptr_t buckets = (uintptr_t)newBuckets;
+    unsigned mask = (unsigned)newMask;
+
+    ASSERT(buckets == (buckets & bucketsMask));
+    ASSERT(mask <= 0xffff);
+
+    _bucketsAndMaybeMask.store(buckets | objc::mask16ShiftBits(mask), memory_order_relaxed);
+    _occupied = 0;
+
+    ASSERT(this->buckets() == newBuckets);
+    ASSERT(this->mask() == newMask);
  }
  
-void cache_t::incrementOccupied() 
+mask_t cache_t::mask() const
  {
-    _occupied++;
+    uintptr_t maskAndBuckets = _bucketsAndMaybeMask.load(memory_order_relaxed);
+    uintptr_t maskShift = (maskAndBuckets & maskMask);
+    return 0xffff >> maskShift;
  }
  
-void cache_t::initializeToEmpty()
+#else
+#error Unknown cache mask storage type.
+#endif
+
+struct bucket_t *cache_t::buckets() const
  {
-    bzero(this, sizeof(*this));
-    _buckets = (bucket_t *)&_objc_empty_cache;
+    uintptr_t addr = _bucketsAndMaybeMask.load(memory_order_relaxed);
+    return (bucket_t *)(addr & bucketsMask);
  }
  
+mask_t cache_t::occupied() const
+{
+    return _occupied;
+}
  
-mask_t cache_t::capacity() 
+void cache_t::incrementOccupied() 
  {
-    return mask() ? mask()+1 : 0; 
+    _occupied++;
  }
  
+unsigned cache_t::capacity() const
+{
+    return mask() ? mask()+1 : 0; 
+}
  
-#if CACHE_END_MARKER
+Class cache_t::cls() const
+{
+    return (Class)((uintptr_t)this - offsetof(objc_class, cache));
+}
  
-size_t cache_t::bytesForCapacity(uint32_t cap) 
+size_t cache_t::bytesForCapacity(uint32_t cap)
  {
-    // fixme put end marker inline when capacity+1 malloc is inefficient
-    return sizeof(bucket_t) * (cap + 1);
+    return sizeof(bucket_t) * cap;
  }
  
-bucket_t *cache_t::endMarker(struct bucket_t *b, uint32_t cap) 
+#if CACHE_END_MARKER
+
+bucket_t *cache_t::endMarker(struct bucket_t *b, uint32_t cap)
  {
-    // bytesForCapacity() chooses whether the end marker is inline or not
      return (bucket_t *)((uintptr_t)b + bytesForCapacity(cap)) - 1;
  }
  
-bucket_t *allocateBuckets(mask_t newCapacity)
+bucket_t *cache_t::allocateBuckets(mask_t newCapacity)
  {
      // Allocate one extra bucket to mark the end of the list.
      // This can't overflow mask_t because newCapacity is a power of 2.
-    // fixme instead put the end mark inline when +1 is malloc-inefficient
-    bucket_t *newBuckets = (bucket_t *)
-        calloc(cache_t::bytesForCapacity(newCapacity), 1);
+    bucket_t *newBuckets = (bucket_t *)calloc(bytesForCapacity(newCapacity), 1);
  
-    bucket_t *end = cache_t::endMarker(newBuckets, newCapacity);
+    bucket_t *end = endMarker(newBuckets, newCapacity);
  
  #if __arm__
      // End marker's sel is 1 and imp points BEFORE the first bucket.
      // This saves an instruction in objc_msgSend.
-    end->set<NotAtomic>((SEL)(uintptr_t)1, (IMP)(newBuckets - 1));
+    end->set<NotAtomic, Raw>(newBuckets, (SEL)(uintptr_t)1, (IMP)(newBuckets - 1), nil);
  #else
      // End marker's sel is 1 and imp points to the first bucket.
-    end->set<NotAtomic>((SEL)(uintptr_t)1, (IMP)newBuckets);
+    end->set<NotAtomic, Raw>(newBuckets, (SEL)(uintptr_t)1, (IMP)newBuckets, nil);
  #endif
      
      if (PrintCaches) recordNewCache(newCapacity);
@@ -397,30 +688,33 @@ bucket_t *allocateBuckets(mask_t newCapacity)
  
  #else
  
-size_t cache_t::bytesForCapacity(uint32_t cap) 
-{
-    return sizeof(bucket_t) * cap;
-}
-
-bucket_t *allocateBuckets(mask_t newCapacity)
+bucket_t *cache_t::allocateBuckets(mask_t newCapacity)
  {
      if (PrintCaches) recordNewCache(newCapacity);
  
-    return (bucket_t *)calloc(cache_t::bytesForCapacity(newCapacity), 1);
+    return (bucket_t *)calloc(bytesForCapacity(newCapacity), 1);
  }
  
  #endif
  
+struct bucket_t *cache_t::emptyBuckets()
+{
+    return (bucket_t *)((uintptr_t)&_objc_empty_cache & bucketsMask);
+}
  
-bucket_t *emptyBucketsForCapacity(mask_t capacity, bool allocate = true)
+bucket_t *cache_t::emptyBucketsForCapacity(mask_t capacity, bool allocate)
  {
+#if CONFIG_USE_CACHE_LOCK
      cacheUpdateLock.assertLocked();
+#else
+    runtimeLock.assertLocked();
+#endif
  
-    size_t bytes = cache_t::bytesForCapacity(capacity);
+    size_t bytes = bytesForCapacity(capacity);
  
      // Use _objc_empty_cache if the buckets is small enough.
      if (bytes <= EMPTY_BYTES) {
-        return (bucket_t *)&_objc_empty_cache;
+        return emptyBuckets();
      }
  
      // Use shared empty buckets allocated on the heap.
@@ -452,24 +746,21 @@ bucket_t *emptyBucketsForCapacity(mask_t capacity, bool allocate = true)
      return emptyBucketsList[index];
  }
  
-
-bool cache_t::isConstantEmptyCache()
+bool cache_t::isConstantEmptyCache() const
  {
-    return 
-        occupied() == 0  &&  
+    return
+        occupied() == 0  &&
          buckets() == emptyBucketsForCapacity(capacity(), false);
  }
  
-bool cache_t::canBeFreed()
+bool cache_t::canBeFreed() const
  {
-    return !isConstantEmptyCache();
+    return !isConstantEmptyCache() && !isConstantOptimizedCache();
  }
  
-
-void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity)
+ALWAYS_INLINE
+void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
  {
-    bool freeOld = canBeFreed();
-
      bucket_t *oldBuckets = buckets();
      bucket_t *newBuckets = allocateBuckets(newCapacity);
  
@@ -477,156 +768,213 @@ void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity)
      // This is thought to save cache memory at the cost of extra cache fills.
      // fixme re-measure this
  
-    assert(newCapacity > 0);
-    assert((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
+    ASSERT(newCapacity > 0);
+    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
  
      setBucketsAndMask(newBuckets, newCapacity - 1);
      
      if (freeOld) {
-        cache_collect_free(oldBuckets, oldCapacity);
-        cache_collect(false);
+        collect_free(oldBuckets, oldCapacity);
      }
  }
  
  
-void cache_t::bad_cache(id receiver, SEL sel, Class isa)
+void cache_t::bad_cache(id receiver, SEL sel)
  {
      // Log in separate steps in case the logging itself causes a crash.
      _objc_inform_now_and_on_crash
          ("Method cache corrupted. This may be a message to an "
           "invalid object, or a memory error somewhere else.");
-    cache_t *cache = &isa->cache;
+#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
+    bucket_t *b = buckets();
      _objc_inform_now_and_on_crash
          ("%s %p, SEL %p, isa %p, cache %p, buckets %p, "
           "mask 0x%x, occupied 0x%x", 
           receiver ? "receiver" : "unused", receiver, 
-         sel, isa, cache, cache->_buckets, 
-         cache->_mask, cache->_occupied);
+         sel, cls(), this, b,
+         _maybeMask.load(memory_order_relaxed),
+         _occupied);
      _objc_inform_now_and_on_crash
          ("%s %zu bytes, buckets %zu bytes", 
           receiver ? "receiver" : "unused", malloc_size(receiver), 
-         malloc_size(cache->_buckets));
+         malloc_size(b));
+#elif (CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16 || \
+       CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS || \
+       CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4)
+    uintptr_t maskAndBuckets = _bucketsAndMaybeMask.load(memory_order_relaxed);
+    _objc_inform_now_and_on_crash
+        ("%s %p, SEL %p, isa %p, cache %p, buckets and mask 0x%lx, "
+         "occupied 0x%x",
+         receiver ? "receiver" : "unused", receiver,
+         sel, cls(), this, maskAndBuckets, _occupied);
+    _objc_inform_now_and_on_crash
+        ("%s %zu bytes, buckets %zu bytes",
+         receiver ? "receiver" : "unused", malloc_size(receiver),
+         malloc_size(buckets()));
+#else
+#error Unknown cache mask storage type.
+#endif
      _objc_inform_now_and_on_crash
          ("selector '%s'", sel_getName(sel));
      _objc_inform_now_and_on_crash
-        ("isa '%s'", isa->nameForLogging());
+        ("isa '%s'", cls()->nameForLogging());
      _objc_fatal
          ("Method cache corrupted. This may be a message to an "
           "invalid object, or a memory error somewhere else.");
  }
  
-
-bucket_t * cache_t::find(SEL s, id receiver)
-{
-    assert(s != 0);
-
-    bucket_t *b = buckets();
-    mask_t m = mask();
-    mask_t begin = cache_hash(s, m);
-    mask_t i = begin;
-    do {
-        if (b[i].sel() == 0  ||  b[i].sel() == s) {
-            return &b[i];
-        }
-    } while ((i = cache_next(i, m)) != begin);
-
-    // hack
-    Class cls = (Class)((uintptr_t)this - offsetof(objc_class, cache));
-    cache_t::bad_cache(receiver, (SEL)s, cls);
-}
-
-
-void cache_t::expand()
+void cache_t::insert(SEL sel, IMP imp, id receiver)
  {
-    cacheUpdateLock.assertLocked();
-    
-    uint32_t oldCapacity = capacity();
-    uint32_t newCapacity = oldCapacity ? oldCapacity*2 : INIT_CACHE_SIZE;
+    runtimeLock.assertLocked();
  
-    if ((uint32_t)(mask_t)newCapacity != newCapacity) {
-        // mask overflow - can't grow further
-        // fixme this wastes one bit of mask
-        newCapacity = oldCapacity;
+    // Never cache before +initialize is done
+    if (slowpath(!cls()->isInitialized())) {
+        return;
      }
  
-    reallocate(oldCapacity, newCapacity);
-}
-
-
-static void cache_fill_nolock(Class cls, SEL sel, IMP imp, id receiver)
-{
-    cacheUpdateLock.assertLocked();
-
-    // Never cache before +initialize is done
-    if (!cls->isInitialized()) return;
+    if (isConstantOptimizedCache()) {
+        _objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
+                    cls()->nameForLogging());
+    }
  
-    // Make sure the entry wasn't added to the cache by some other thread 
-    // before we grabbed the cacheUpdateLock.
-    if (cache_getImp(cls, sel)) return;
+#if DEBUG_TASK_THREADS
+    return _collecting_in_critical();
+#else
+#if CONFIG_USE_CACHE_LOCK
+    mutex_locker_t lock(cacheUpdateLock);
+#endif
  
-    cache_t *cache = getCache(cls);
+    ASSERT(sel != 0 && cls()->isInitialized());
  
-    // Use the cache as-is if it is less than 3/4 full
-    mask_t newOccupied = cache->occupied() + 1;
-    mask_t capacity = cache->capacity();
-    if (cache->isConstantEmptyCache()) {
+    // Use the cache as-is if until we exceed our expected fill ratio.
+    mask_t newOccupied = occupied() + 1;
+    unsigned oldCapacity = capacity(), capacity = oldCapacity;
+    if (slowpath(isConstantEmptyCache())) {
          // Cache is read-only. Replace it.
-        cache->reallocate(capacity, capacity ?: INIT_CACHE_SIZE);
+        if (!capacity) capacity = INIT_CACHE_SIZE;
+        reallocate(oldCapacity, capacity, /* freeOld */false);
+    }
+    else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
+        // Cache is less than 3/4 or 7/8 full. Use it as-is.
      }
-    else if (newOccupied <= capacity / 4 * 3) {
-        // Cache is less than 3/4 full. Use it as-is.
+#if CACHE_ALLOW_FULL_UTILIZATION
+    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
+        // Allow 100% cache utilization for small buckets. Use it as-is.
      }
+#endif
      else {
-        // Cache is too full. Expand it.
-        cache->expand();
+        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
+        if (capacity > MAX_CACHE_SIZE) {
+            capacity = MAX_CACHE_SIZE;
+        }
+        reallocate(oldCapacity, capacity, true);
      }
  
+    bucket_t *b = buckets();
+    mask_t m = capacity - 1;
+    mask_t begin = cache_hash(sel, m);
+    mask_t i = begin;
+
      // Scan for the first unused slot and insert there.
-    // There is guaranteed to be an empty slot because the 
-    // minimum size is 4 and we resized at 3/4 full.
-    bucket_t *bucket = cache->find(sel, receiver);
-    if (bucket->sel() == 0) cache->incrementOccupied();
-    bucket->set<Atomic>(sel, imp);
+    // There is guaranteed to be an empty slot.
+    do {
+        if (fastpath(b[i].sel() == 0)) {
+            incrementOccupied();
+            b[i].set<Atomic, Encoded>(b, sel, imp, cls());
+            return;
+        }
+        if (b[i].sel() == sel) {
+            // The entry was added to the cache by some other thread
+            // before we grabbed the cacheUpdateLock.
+            return;
+        }
+    } while (fastpath((i = cache_next(i, m)) != begin));
+
+    bad_cache(receiver, (SEL)sel);
+#endif // !DEBUG_TASK_THREADS
  }
  
-void cache_fill(Class cls, SEL sel, IMP imp, id receiver)
+void cache_t::copyCacheNolock(objc_imp_cache_entry *buffer, int len)
  {
-#if !DEBUG_TASK_THREADS
-    mutex_locker_t lock(cacheUpdateLock);
-    cache_fill_nolock(cls, sel, imp, receiver);
+#if CONFIG_USE_CACHE_LOCK
+    cacheUpdateLock.assertLocked();
  #else
-    _collecting_in_critical();
-    return;
+    runtimeLock.assertLocked();
+#endif
+    int wpos = 0;
+
+#if CONFIG_USE_PREOPT_CACHES
+    if (isConstantOptimizedCache()) {
+        auto cache = preopt_cache();
+        auto mask = cache->mask;
+        uintptr_t sel_base = objc_opt_offsets[OBJC_OPT_METHODNAME_START];
+        uintptr_t imp_base = (uintptr_t)&cache->entries;
+
+        for (uintptr_t index = 0; index <= mask && wpos < len; index++) {
+            auto &ent = cache->entries[index];
+            if (~ent.sel_offs) {
+                buffer[wpos].sel = (SEL)(sel_base + ent.sel_offs);
+                buffer[wpos].imp = (IMP)(imp_base - ent.imp_offs);
+                wpos++;
+            }
+        }
+        return;
+    }
  #endif
+    {
+        bucket_t *buckets = this->buckets();
+        uintptr_t count = capacity();
+
+        for (uintptr_t index = 0; index < count && wpos < len; index++) {
+            if (buckets[index].sel()) {
+                buffer[wpos].imp = buckets[index].imp(buckets, cls());
+                buffer[wpos].sel = buckets[index].sel();
+                wpos++;
+            }
+        }
+    }
  }
  
-
  // Reset this entire cache to the uncached lookup by reallocating it.
  // This must not shrink the cache - that breaks the lock-free scheme.
-void cache_erase_nolock(Class cls)
+void cache_t::eraseNolock(const char *func)
  {
+#if CONFIG_USE_CACHE_LOCK
      cacheUpdateLock.assertLocked();
+#else
+    runtimeLock.assertLocked();
+#endif
  
-    cache_t *cache = getCache(cls);
-
-    mask_t capacity = cache->capacity();
-    if (capacity > 0  &&  cache->occupied() > 0) {
-        auto oldBuckets = cache->buckets();
+    if (isConstantOptimizedCache()) {
+        auto c = cls();
+        if (PrintCaches) {
+            _objc_inform("CACHES: %sclass %s: dropping and disallowing preopt cache (from %s)",
+                         c->isMetaClass() ? "meta" : "",
+                         c->nameForLogging(), func);
+        }
+        setBucketsAndMask(emptyBuckets(), 0);
+        c->setDisallowPreoptCaches();
+    } else if (occupied() > 0) {
+        auto capacity = this->capacity();
+        auto oldBuckets = buckets();
          auto buckets = emptyBucketsForCapacity(capacity);
-        cache->setBucketsAndMask(buckets, capacity - 1); // also clears occupied
  
-        cache_collect_free(oldBuckets, capacity);
-        cache_collect(false);
+        setBucketsAndMask(buckets, capacity - 1); // also clears occupied
+        collect_free(oldBuckets, capacity);
      }
  }
  
  
-void cache_delete(Class cls)
+void cache_t::destroy()
  {
+#if CONFIG_USE_CACHE_LOCK
      mutex_locker_t lock(cacheUpdateLock);
-    if (cls->cache.canBeFreed()) {
-        if (PrintCaches) recordDeadCache(cls->cache.capacity());
-        free(cls->cache.buckets());
+#else
+    runtimeLock.assertLocked();
+#endif
+    if (canBeFreed()) {
+        if (PrintCaches) recordDeadCache(capacity());
+        free(buckets());
      }
  }
  
@@ -669,7 +1017,7 @@ static uintptr_t _get_pc_for_thread(thread_t thread)
      arm_thread_state64_t state;
      unsigned int count = ARM_THREAD_STATE64_COUNT;
      kern_return_t okay = thread_get_state (thread, ARM_THREAD_STATE64, (thread_state_t)&state, &count);
-    return (okay == KERN_SUCCESS) ? arm_thread_state64_get_pc(state) : PC_SENTINEL;
+    return (okay == KERN_SUCCESS) ? (uintptr_t)arm_thread_state64_get_pc(state) : PC_SENTINEL;
  }
  #else
  {
@@ -686,21 +1034,64 @@ static uintptr_t _get_pc_for_thread(thread_t thread)
  * reading function is in progress because it might still be using 
  * the garbage memory.
  **********************************************************************/
-extern "C" uintptr_t objc_entryPoints[];
-extern "C"  uintptr_t objc_exitPoints[];
+#if HAVE_TASK_RESTARTABLE_RANGES
+#include <kern/restartable.h>
+#else
+typedef struct {
+    uint64_t          location;
+    unsigned short    length;
+    unsigned short    recovery_offs;
+    unsigned int      flags;
+} task_restartable_range_t;
+#endif
+
+extern "C" task_restartable_range_t objc_restartableRanges[];
+
+#if HAVE_TASK_RESTARTABLE_RANGES
+static bool shouldUseRestartableRanges = true;
+#endif
+
+void cache_t::init()
+{
+#if HAVE_TASK_RESTARTABLE_RANGES
+    mach_msg_type_number_t count = 0;
+    kern_return_t kr;
+
+    while (objc_restartableRanges[count].location) {
+        count++;
+    }
+
+    kr = task_restartable_ranges_register(mach_task_self(),
+                                          objc_restartableRanges, count);
+    if (kr == KERN_SUCCESS) return;
+    _objc_fatal("task_restartable_ranges_register failed (result 0x%x: %s)",
+                kr, mach_error_string(kr));
+#endif // HAVE_TASK_RESTARTABLE_RANGES
+}
  
  static int _collecting_in_critical(void)
  {
  #if TARGET_OS_WIN32
      return TRUE;
-#else
+#elif HAVE_TASK_RESTARTABLE_RANGES
+    // Only use restartable ranges if we registered them earlier.
+    if (shouldUseRestartableRanges) {
+        kern_return_t kr = task_restartable_ranges_synchronize(mach_task_self());
+        if (kr == KERN_SUCCESS) return FALSE;
+        _objc_fatal("task_restartable_ranges_synchronize failed (result 0x%x: %s)",
+                    kr, mach_error_string(kr));
+    }
+#endif // !HAVE_TASK_RESTARTABLE_RANGES
+
+    // Fallthrough if we didn't use restartable ranges.
+
      thread_act_port_array_t threads;
      unsigned number;
      unsigned count;
      kern_return_t ret;
      int result;
  
-    mach_port_t mythread = pthread_mach_thread_np(pthread_self());
+    mach_port_t mythread = pthread_mach_thread_np(objc_thread_self());
  
      // Get a list of all the threads in the current task
  #if !DEBUG_TASK_THREADS
@@ -726,7 +1117,18 @@ static int _collecting_in_critical(void)
              continue;
  
          // Find out where thread is executing
+#if TARGET_OS_OSX
+        if (oah_is_current_process_translated()) {
+            kern_return_t ret = objc_thread_get_rip(threads[count], (uint64_t*)&pc);
+            if (ret != KERN_SUCCESS) {
+                pc = PC_SENTINEL;
+            }
+        } else {
+            pc = _get_pc_for_thread (threads[count]);
+        }
+#else
          pc = _get_pc_for_thread (threads[count]);
+#endif
  
          // Check for bad status, and if so, assume the worse (can't collect)
          if (pc == PC_SENTINEL)
@@ -736,10 +1138,11 @@ static int _collecting_in_critical(void)
          }
          
          // Check whether it is in the cache lookup code
-        for (region = 0; objc_entryPoints[region] != 0; region++)
+        for (region = 0; objc_restartableRanges[region].location != 0; region++)
          {
-            if ((pc >= objc_entryPoints[region]) &&
-                (pc <= objc_exitPoints[region])) 
+            uint64_t loc = objc_restartableRanges[region].location;
+            if ((pc > loc) &&
+                (pc - loc < (uint64_t)objc_restartableRanges[region].length))
              {
                  result = TRUE;
                  goto done;
@@ -758,7 +1161,6 @@ static int _collecting_in_critical(void)
  
      // Return our finding
      return result;
-#endif
  }
  
  
@@ -811,21 +1213,26 @@ static void _garbage_make_room(void)
  
  
  /***********************************************************************
-* cache_collect_free.  Add the specified malloc'd memory to the list
+* cache_t::collect_free.  Add the specified malloc'd memory to the list
  * of them to free at some later point.
  * size is used for the collection threshold. It does not have to be 
  * precisely the block's size.
  * Cache locks: cacheUpdateLock must be held by the caller.
  **********************************************************************/
-static void cache_collect_free(bucket_t *data, mask_t capacity)
+void cache_t::collect_free(bucket_t *data, mask_t capacity)
  {
+#if CONFIG_USE_CACHE_LOCK
      cacheUpdateLock.assertLocked();
+#else
+    runtimeLock.assertLocked();
+#endif
  
      if (PrintCaches) recordDeadCache(capacity);
  
      _garbage_make_room ();
      garbage_byte_size += cache_t::bytesForCapacity(capacity);
      garbage_refs[garbage_count++] = data;
+    cache_t::collectNolock(false);
  }
  
  
@@ -834,9 +1241,13 @@ static void cache_collect_free(bucket_t *data, mask_t capacity)
  * collectALot tries harder to free memory.
  * Cache locks: cacheUpdateLock must be held by the caller.
  **********************************************************************/
-void cache_collect(bool collectALot)
+void cache_t::collectNolock(bool collectALot)
  {
+#if CONFIG_USE_CACHE_LOCK
      cacheUpdateLock.assertLocked();
+#else
+    runtimeLock.assertLocked();
+#endif
  
      // Done if the garbage is not full
      if (garbage_byte_size < garbage_threshold  &&  !collectALot) {
@@ -1127,6 +1538,41 @@ static kern_return_t objc_task_threads
  // DEBUG_TASK_THREADS
  #endif
  
+OBJC_EXPORT bucket_t * objc_cache_buckets(const cache_t * cache) {
+    return cache->buckets();
+}
+
+#if CONFIG_USE_PREOPT_CACHES
+
+OBJC_EXPORT const preopt_cache_t * _Nonnull objc_cache_preoptCache(const cache_t * _Nonnull cache) {
+    return cache->preopt_cache();
+}
+
+OBJC_EXPORT bool objc_cache_isConstantOptimizedCache(const cache_t * _Nonnull cache, bool strict, uintptr_t empty_addr) {
+    return cache->isConstantOptimizedCache(strict, empty_addr);
+}
+
+OBJC_EXPORT unsigned objc_cache_preoptCapacity(const cache_t * _Nonnull cache) {
+    return cache->preopt_cache()->capacity();
+}
+
+OBJC_EXPORT Class _Nonnull objc_cache_preoptFallbackClass(const cache_t * _Nonnull cache) {
+    return cache->preoptFallbackClass();
+}
+
+#endif
+
+OBJC_EXPORT size_t objc_cache_bytesForCapacity(uint32_t cap) {
+    return cache_t::bytesForCapacity(cap);
+}
+
+OBJC_EXPORT uint32_t objc_cache_occupied(const cache_t * _Nonnull cache) {
+    return cache->occupied();
+}
+
+OBJC_EXPORT unsigned objc_cache_capacity(const struct cache_t * _Nonnull cache) {
+    return cache->capacity();
+}
  
  // __OBJC2__
  #endif