X-Git-Url: https://git.saurik.com/apple/objc4.git/blobdiff_plain/13ba007ef885ec1d079cdb0e881efe5cc776a7d2..refs/heads/master:/runtime/objc-cache.mm diff --git a/runtime/objc-cache.mm b/runtime/objc-cache.mm index 7602fe0..213d147 100644 --- a/runtime/objc-cache.mm +++ b/runtime/objc-cache.mm @@ -63,14 +63,12 @@ * objc_msgSend* * cache_getImp * - * Cache writers (hold cacheUpdateLock while reading or writing; not PC-checked) - * cache_fill (acquires lock) - * cache_expand (only called from cache_fill) - * cache_create (only called from cache_expand) - * bcopy (only called from instrumented cache_expand) - * flush_caches (acquires lock) - * cache_flush (only called from cache_fill and flush_caches) - * cache_collect_free (only called from cache_expand and cache_flush) + * Cache readers/writers (hold cacheUpdateLock during access; not PC-checked) + * cache_t::copyCacheNolock (caller must hold the lock) + * cache_t::eraseNolock (caller must hold the lock) + * cache_t::collectNolock (caller must hold the lock) + * cache_t::insert (acquires lock) + * cache_t::destroy (acquires lock) * * UNPROTECTED cache readers (NOT thread-safe; used for debug info only) * cache_print @@ -84,19 +82,96 @@ #if __OBJC2__ #include "objc-private.h" -#include "objc-cache.h" +#if TARGET_OS_OSX +#include +#include +#endif + +#if __arm__ || __x86_64__ || __i386__ + +// objc_msgSend has few registers available. +// Cache scan increments and wraps at special end-marking bucket. +#define CACHE_END_MARKER 1 + +// Historical fill ratio of 75% (since the new objc runtime was introduced). +static inline mask_t cache_fill_ratio(mask_t capacity) { + return capacity * 3 / 4; +} + +#elif __arm64__ && !__LP64__ + +// objc_msgSend has lots of registers available. +// Cache scan decrements. No end marker needed. +#define CACHE_END_MARKER 0 + +// Historical fill ratio of 75% (since the new objc runtime was introduced). +static inline mask_t cache_fill_ratio(mask_t capacity) { + return capacity * 3 / 4; +} + +#elif __arm64__ && __LP64__ + +// objc_msgSend has lots of registers available. +// Cache scan decrements. No end marker needed. +#define CACHE_END_MARKER 0 + +// Allow 87.5% fill ratio in the fast path for all cache sizes. +// Increasing the cache fill ratio reduces the fragmentation and wasted space +// in imp-caches at the cost of potentially increasing the average lookup of +// a selector in imp-caches by increasing collision chains. Another potential +// change is that cache table resizes / resets happen at different moments. +static inline mask_t cache_fill_ratio(mask_t capacity) { + return capacity * 7 / 8; +} + +// Allow 100% cache utilization for smaller cache sizes. This has the same +// advantages and disadvantages as the fill ratio. A very large percentage +// of caches end up with very few entries and the worst case of collision +// chains in small tables is relatively small. +// NOTE: objc_msgSend properly handles a cache lookup with a full cache. +#define CACHE_ALLOW_FULL_UTILIZATION 1 + +#else +#error unknown architecture +#endif /* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */ enum { +#if CACHE_END_MARKER || (__arm64__ && !__LP64__) + // When we have a cache end marker it fills a bucket slot, so having a + // initial cache size of 2 buckets would not be efficient when one of the + // slots is always filled with the end marker. So start with a cache size + // 4 buckets. INIT_CACHE_SIZE_LOG2 = 2, - INIT_CACHE_SIZE = (1 << INIT_CACHE_SIZE_LOG2) +#else + // Allow an initial bucket size of 2 buckets, since a large number of + // classes, especially metaclasses, have very few imps, and we support + // the ability to fill 100% of the cache before resizing. + INIT_CACHE_SIZE_LOG2 = 1, +#endif + INIT_CACHE_SIZE = (1 << INIT_CACHE_SIZE_LOG2), + MAX_CACHE_SIZE_LOG2 = 16, + MAX_CACHE_SIZE = (1 << MAX_CACHE_SIZE_LOG2), + FULL_UTILIZATION_CACHE_SIZE_LOG2 = 3, + FULL_UTILIZATION_CACHE_SIZE = (1 << FULL_UTILIZATION_CACHE_SIZE_LOG2), }; -static void cache_collect_free(struct bucket_t *data, mask_t capacity); static int _collecting_in_critical(void); static void _garbage_make_room(void); +#if DEBUG_TASK_THREADS +static kern_return_t objc_task_threads +( + task_t target_task, + thread_act_array_t *act_list, + mach_msg_type_number_t *act_listCnt +); +#endif + +#if DEBUG_TASK_THREADS +#undef HAVE_TASK_RESTARTABLE_RANGES +#endif /*********************************************************************** * Cache statistics for OBJC_PRINT_CACHE_SETUP @@ -147,56 +222,42 @@ asm("\n .section __TEXT,__const" "\n .globl __objc_empty_vtable" "\n .set __objc_empty_vtable, 0" "\n .globl __objc_empty_cache" +#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4 + "\n .align 4" + "\n L__objc_empty_cache: .space " stringize2(EMPTY_BYTES) + "\n .set __objc_empty_cache, L__objc_empty_cache + 0xf" +#else "\n .align 3" "\n __objc_empty_cache: .space " stringize2(EMPTY_BYTES) +#endif ); +#if CONFIG_USE_PREOPT_CACHES +__attribute__((used, section("__DATA_CONST,__objc_scoffs"))) +uintptr_t objc_opt_offsets[__OBJC_OPT_OFFSETS_COUNT]; +#endif -#if __arm__ || __x86_64__ || __i386__ -// objc_msgSend has few registers available. -// Cache scan increments and wraps at special end-marking bucket. -#define CACHE_END_MARKER 1 +#if CACHE_END_MARKER static inline mask_t cache_next(mask_t i, mask_t mask) { return (i+1) & mask; } - #elif __arm64__ -// objc_msgSend has lots of registers available. -// Cache scan decrements. No end marker needed. -#define CACHE_END_MARKER 0 static inline mask_t cache_next(mask_t i, mask_t mask) { return i ? i-1 : mask; } - #else -#error unknown architecture +#error unexpected configuration #endif -// copied from dispatch_atomic_maximally_synchronizing_barrier -// fixme verify that this barrier hack does in fact work here -#if __x86_64__ -#define mega_barrier() \ - do { unsigned long _clbr; __asm__ __volatile__( \ - "cpuid" \ - : "=a" (_clbr) : "0" (0) : "rbx", "rcx", "rdx", "cc", "memory" \ - ); } while(0) - -#elif __i386__ -#define mega_barrier() \ - do { unsigned long _clbr; __asm__ __volatile__( \ - "cpuid" \ - : "=a" (_clbr) : "0" (0) : "ebx", "ecx", "edx", "cc", "memory" \ - ); } while(0) - -#elif __arm__ || __arm64__ +// mega_barrier doesn't really work, but it works enough on ARM that +// we leave well enough alone and keep using it there. +#if __arm__ #define mega_barrier() \ __asm__ __volatile__( \ "dsb ish" \ : : : "memory") -#else -#error unknown architecture #endif #if __arm64__ @@ -245,44 +306,41 @@ ldp(uintptr_t& onep, uintptr_t& twop, const void *srcp) static inline mask_t cache_hash(SEL sel, mask_t mask) { - return (mask_t)(uintptr_t)sel & mask; -} - -cache_t *getCache(Class cls) -{ - assert(cls); - return &cls->cache; + uintptr_t value = (uintptr_t)sel; +#if CONFIG_USE_PREOPT_CACHES + value ^= value >> 7; +#endif + return (mask_t)(value & mask); } #if __arm64__ -template -void bucket_t::set(SEL newSel, IMP newImp) +template +void bucket_t::set(bucket_t *base, SEL newSel, IMP newImp, Class cls) { - assert(_sel == 0 || _sel == newSel); + ASSERT(_sel.load(memory_order_relaxed) == 0 || + _sel.load(memory_order_relaxed) == newSel); static_assert(offsetof(bucket_t,_imp) == 0 && offsetof(bucket_t,_sel) == sizeof(void *), "bucket_t layout doesn't match arm64 bucket_t::set()"); - uintptr_t signedImp = signIMP(newImp, newSel); + uintptr_t encodedImp = (impEncoding == Encoded + ? encodeImp(base, newImp, newSel, cls) + : (uintptr_t)newImp); - if (atomicity == Atomic) { - // LDP/STP guarantees that all observers get - // either imp/sel or newImp/newSel - stp(signedImp, (uintptr_t)newSel, this); - } else { - _sel = newSel; - _imp = signedImp; - } + // LDP/STP guarantees that all observers get + // either imp/sel or newImp/newSel + stp(encodedImp, (uintptr_t)newSel, this); } #else -template -void bucket_t::set(SEL newSel, IMP newImp) +template +void bucket_t::set(bucket_t *base, SEL newSel, IMP newImp, Class cls) { - assert(_sel == 0 || _sel == newSel); + ASSERT(_sel.load(memory_order_relaxed) == 0 || + _sel.load(memory_order_relaxed) == newSel); // objc_msgSend uses sel and imp with no locks. // It is safe for objc_msgSend to see new imp but NULL sel @@ -290,17 +348,198 @@ void bucket_t::set(SEL newSel, IMP newImp) // It is unsafe for objc_msgSend to see old imp and new sel. // Therefore we write new imp, wait a lot, then write new sel. - _imp = (uintptr_t)newImp; - - if (_sel != newSel) { - if (atomicity == Atomic) { + uintptr_t newIMP = (impEncoding == Encoded + ? encodeImp(base, newImp, newSel, cls) + : (uintptr_t)newImp); + + if (atomicity == Atomic) { + _imp.store(newIMP, memory_order_relaxed); + + if (_sel.load(memory_order_relaxed) != newSel) { +#ifdef __arm__ mega_barrier(); + _sel.store(newSel, memory_order_relaxed); +#elif __x86_64__ || __i386__ + _sel.store(newSel, memory_order_release); +#else +#error Don't know how to do bucket_t::set on this architecture. +#endif + } + } else { + _imp.store(newIMP, memory_order_relaxed); + _sel.store(newSel, memory_order_relaxed); + } +} + +#endif + +void cache_t::initializeToEmpty() +{ + _bucketsAndMaybeMask.store((uintptr_t)&_objc_empty_cache, std::memory_order_relaxed); + _originalPreoptCache.store(nullptr, std::memory_order_relaxed); +} + +#if CONFIG_USE_PREOPT_CACHES +/* + * The shared cache builder will sometimes have prebuilt an IMP cache + * for the class and left a `preopt_cache_t` pointer in _originalPreoptCache. + * + * However we have this tension: + * - when the class is realized it has to have a cache that can't resolve any + * selector until the class is properly initialized so that every + * caller falls in the slowpath and synchronizes with the class initializing, + * - we need to remember that cache pointer and we have no space for that. + * + * The caches are designed so that preopt_cache::bit_one is set to 1, + * so we "disguise" the pointer so that it looks like a cache of capacity 1 + * where that bit one aliases with where the top bit of a SEL in the bucket_t + * would live: + * + * +----------------+----------------+ + * | IMP | SEL | << a bucket_t + * +----------------+----------------+--------------... + * preopt_cache_t >>| 1| ... + * +----------------+--------------... + * + * The shared cache guarantees that there's valid memory to read under "IMP" + * + * This lets us encode the original preoptimized cache pointer during + * initialization, and we can reconstruct its original address and install + * it back later. + */ +void cache_t::initializeToPreoptCacheInDisguise(const preopt_cache_t *cache) +{ + // preopt_cache_t::bit_one is 1 which sets the top bit + // and is never set on any valid selector + + uintptr_t value = (uintptr_t)cache + sizeof(preopt_cache_t) - + (bucket_t::offsetOfSel() + sizeof(SEL)); + + _originalPreoptCache.store(nullptr, std::memory_order_relaxed); + setBucketsAndMask((bucket_t *)value, 0); + _occupied = cache->occupied; +} + +void cache_t::maybeConvertToPreoptimized() +{ + const preopt_cache_t *cache = disguised_preopt_cache(); + + if (cache == nil) { + return; + } + + if (!cls()->allowsPreoptCaches() || + (cache->has_inlines && !cls()->allowsPreoptInlinedSels())) { + if (PrintCaches) { + _objc_inform("CACHES: %sclass %s: dropping cache (from %s)", + cls()->isMetaClass() ? "meta" : "", + cls()->nameForLogging(), "setInitialized"); } - _sel = newSel; + return setBucketsAndMask(emptyBuckets(), 0); } + + uintptr_t value = (uintptr_t)&cache->entries; +#if __has_feature(ptrauth_calls) + value = (uintptr_t)ptrauth_sign_unauthenticated((void *)value, + ptrauth_key_process_dependent_data, (uintptr_t)cls()); +#endif + value |= preoptBucketsHashParams(cache) | preoptBucketsMarker; + _bucketsAndMaybeMask.store(value, memory_order_relaxed); + _occupied = cache->occupied; +} + +void cache_t::initializeToEmptyOrPreoptimizedInDisguise() +{ + if (os_fastpath(!DisablePreoptCaches)) { + if (!objc::dataSegmentsRanges.inSharedCache((uintptr_t)this)) { + if (dyld_shared_cache_some_image_overridden()) { + // If the system has roots, then we must disable preoptimized + // caches completely. If a class in another image has a + // superclass in the root, the offset to the superclass will + // be wrong. rdar://problem/61601961 + cls()->setDisallowPreoptCachesRecursively("roots"); + } + return initializeToEmpty(); + } + + auto cache = _originalPreoptCache.load(memory_order_relaxed); + if (cache) { + return initializeToPreoptCacheInDisguise(cache); + } + } + + return initializeToEmpty(); } +const preopt_cache_t *cache_t::preopt_cache() const +{ + auto addr = _bucketsAndMaybeMask.load(memory_order_relaxed); + addr &= preoptBucketsMask; +#if __has_feature(ptrauth_calls) +#if __BUILDING_OBJCDT__ + addr = (uintptr_t)ptrauth_strip((preopt_cache_entry_t *)addr, + ptrauth_key_process_dependent_data); +#else + addr = (uintptr_t)ptrauth_auth_data((preopt_cache_entry_t *)addr, + ptrauth_key_process_dependent_data, (uintptr_t)cls()); +#endif #endif + return (preopt_cache_t *)(addr - sizeof(preopt_cache_t)); +} + +const preopt_cache_t *cache_t::disguised_preopt_cache() const +{ + bucket_t *b = buckets(); + if ((intptr_t)b->sel() >= 0) return nil; + + uintptr_t value = (uintptr_t)b + bucket_t::offsetOfSel() + sizeof(SEL); + return (preopt_cache_t *)(value - sizeof(preopt_cache_t)); +} + +Class cache_t::preoptFallbackClass() const +{ + return (Class)((uintptr_t)cls() + preopt_cache()->fallback_class_offset); +} + +bool cache_t::isConstantOptimizedCache(bool strict, uintptr_t empty_addr) const +{ + uintptr_t addr = _bucketsAndMaybeMask.load(memory_order_relaxed); + if (addr & preoptBucketsMarker) { + return true; + } + if (strict) { + return false; + } + return mask() == 0 && addr != empty_addr; +} + +bool cache_t::shouldFlush(SEL sel, IMP imp) const +{ + // This test isn't backwards: disguised caches aren't "strict" + // constant optimized caches + if (!isConstantOptimizedCache(/*strict*/true)) { + const preopt_cache_t *cache = disguised_preopt_cache(); + if (cache) { + uintptr_t offs = (uintptr_t)sel - (uintptr_t)@selector(🤯); + uintptr_t slot = ((offs >> cache->shift) & cache->mask); + auto &entry = cache->entries[slot]; + + return entry.sel_offs == offs && + (uintptr_t)cls() - entry.imp_offs == + (uintptr_t)ptrauth_strip(imp, ptrauth_key_function_pointer); + } + } + + return cache_getImp(cls(), sel) == imp; +} + +bool cache_t::isConstantOptimizedCacheWithInlinedSels() const +{ + return isConstantOptimizedCache(/* strict */true) && preopt_cache()->has_inlines; +} +#endif // CONFIG_USE_PREOPT_CACHES + +#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask) { @@ -311,83 +550,135 @@ void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask) // Therefore we write new buckets, wait a lot, then write new mask. // objc_msgSend reads mask first, then buckets. +#ifdef __arm__ // ensure other threads see buckets contents before buckets pointer mega_barrier(); - _buckets = newBuckets; - + _bucketsAndMaybeMask.store((uintptr_t)newBuckets, memory_order_relaxed); + // ensure other threads see new buckets before new mask mega_barrier(); - - _mask = newMask; + + _maybeMask.store(newMask, memory_order_relaxed); _occupied = 0; +#elif __x86_64__ || i386 + // ensure other threads see buckets contents before buckets pointer + _bucketsAndMaybeMask.store((uintptr_t)newBuckets, memory_order_release); + + // ensure other threads see new buckets before new mask + _maybeMask.store(newMask, memory_order_release); + _occupied = 0; +#else +#error Don't know how to do setBucketsAndMask on this architecture. +#endif } +mask_t cache_t::mask() const +{ + return _maybeMask.load(memory_order_relaxed); +} + +#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16 || CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS -struct bucket_t *cache_t::buckets() +void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask) { - return _buckets; + uintptr_t buckets = (uintptr_t)newBuckets; + uintptr_t mask = (uintptr_t)newMask; + + ASSERT(buckets <= bucketsMask); + ASSERT(mask <= maxMask); + + _bucketsAndMaybeMask.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, memory_order_relaxed); + _occupied = 0; } -mask_t cache_t::mask() +mask_t cache_t::mask() const { - return _mask; + uintptr_t maskAndBuckets = _bucketsAndMaybeMask.load(memory_order_relaxed); + return maskAndBuckets >> maskShift; } -mask_t cache_t::occupied() +#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4 + +void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask) { - return _occupied; + uintptr_t buckets = (uintptr_t)newBuckets; + unsigned mask = (unsigned)newMask; + + ASSERT(buckets == (buckets & bucketsMask)); + ASSERT(mask <= 0xffff); + + _bucketsAndMaybeMask.store(buckets | objc::mask16ShiftBits(mask), memory_order_relaxed); + _occupied = 0; + + ASSERT(this->buckets() == newBuckets); + ASSERT(this->mask() == newMask); } -void cache_t::incrementOccupied() +mask_t cache_t::mask() const { - _occupied++; + uintptr_t maskAndBuckets = _bucketsAndMaybeMask.load(memory_order_relaxed); + uintptr_t maskShift = (maskAndBuckets & maskMask); + return 0xffff >> maskShift; } -void cache_t::initializeToEmpty() +#else +#error Unknown cache mask storage type. +#endif + +struct bucket_t *cache_t::buckets() const { - bzero(this, sizeof(*this)); - _buckets = (bucket_t *)&_objc_empty_cache; + uintptr_t addr = _bucketsAndMaybeMask.load(memory_order_relaxed); + return (bucket_t *)(addr & bucketsMask); } +mask_t cache_t::occupied() const +{ + return _occupied; +} -mask_t cache_t::capacity() +void cache_t::incrementOccupied() { - return mask() ? mask()+1 : 0; + _occupied++; } +unsigned cache_t::capacity() const +{ + return mask() ? mask()+1 : 0; +} -#if CACHE_END_MARKER +Class cache_t::cls() const +{ + return (Class)((uintptr_t)this - offsetof(objc_class, cache)); +} -size_t cache_t::bytesForCapacity(uint32_t cap) +size_t cache_t::bytesForCapacity(uint32_t cap) { - // fixme put end marker inline when capacity+1 malloc is inefficient - return sizeof(bucket_t) * (cap + 1); + return sizeof(bucket_t) * cap; } -bucket_t *cache_t::endMarker(struct bucket_t *b, uint32_t cap) +#if CACHE_END_MARKER + +bucket_t *cache_t::endMarker(struct bucket_t *b, uint32_t cap) { - // bytesForCapacity() chooses whether the end marker is inline or not return (bucket_t *)((uintptr_t)b + bytesForCapacity(cap)) - 1; } -bucket_t *allocateBuckets(mask_t newCapacity) +bucket_t *cache_t::allocateBuckets(mask_t newCapacity) { // Allocate one extra bucket to mark the end of the list. // This can't overflow mask_t because newCapacity is a power of 2. - // fixme instead put the end mark inline when +1 is malloc-inefficient - bucket_t *newBuckets = (bucket_t *) - calloc(cache_t::bytesForCapacity(newCapacity), 1); + bucket_t *newBuckets = (bucket_t *)calloc(bytesForCapacity(newCapacity), 1); - bucket_t *end = cache_t::endMarker(newBuckets, newCapacity); + bucket_t *end = endMarker(newBuckets, newCapacity); #if __arm__ // End marker's sel is 1 and imp points BEFORE the first bucket. // This saves an instruction in objc_msgSend. - end->set((SEL)(uintptr_t)1, (IMP)(newBuckets - 1)); + end->set(newBuckets, (SEL)(uintptr_t)1, (IMP)(newBuckets - 1), nil); #else // End marker's sel is 1 and imp points to the first bucket. - end->set((SEL)(uintptr_t)1, (IMP)newBuckets); + end->set(newBuckets, (SEL)(uintptr_t)1, (IMP)newBuckets, nil); #endif if (PrintCaches) recordNewCache(newCapacity); @@ -397,30 +688,33 @@ bucket_t *allocateBuckets(mask_t newCapacity) #else -size_t cache_t::bytesForCapacity(uint32_t cap) -{ - return sizeof(bucket_t) * cap; -} - -bucket_t *allocateBuckets(mask_t newCapacity) +bucket_t *cache_t::allocateBuckets(mask_t newCapacity) { if (PrintCaches) recordNewCache(newCapacity); - return (bucket_t *)calloc(cache_t::bytesForCapacity(newCapacity), 1); + return (bucket_t *)calloc(bytesForCapacity(newCapacity), 1); } #endif +struct bucket_t *cache_t::emptyBuckets() +{ + return (bucket_t *)((uintptr_t)&_objc_empty_cache & bucketsMask); +} -bucket_t *emptyBucketsForCapacity(mask_t capacity, bool allocate = true) +bucket_t *cache_t::emptyBucketsForCapacity(mask_t capacity, bool allocate) { +#if CONFIG_USE_CACHE_LOCK cacheUpdateLock.assertLocked(); +#else + runtimeLock.assertLocked(); +#endif - size_t bytes = cache_t::bytesForCapacity(capacity); + size_t bytes = bytesForCapacity(capacity); // Use _objc_empty_cache if the buckets is small enough. if (bytes <= EMPTY_BYTES) { - return (bucket_t *)&_objc_empty_cache; + return emptyBuckets(); } // Use shared empty buckets allocated on the heap. @@ -452,24 +746,21 @@ bucket_t *emptyBucketsForCapacity(mask_t capacity, bool allocate = true) return emptyBucketsList[index]; } - -bool cache_t::isConstantEmptyCache() +bool cache_t::isConstantEmptyCache() const { - return - occupied() == 0 && + return + occupied() == 0 && buckets() == emptyBucketsForCapacity(capacity(), false); } -bool cache_t::canBeFreed() +bool cache_t::canBeFreed() const { - return !isConstantEmptyCache(); + return !isConstantEmptyCache() && !isConstantOptimizedCache(); } - -void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity) +ALWAYS_INLINE +void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld) { - bool freeOld = canBeFreed(); - bucket_t *oldBuckets = buckets(); bucket_t *newBuckets = allocateBuckets(newCapacity); @@ -477,156 +768,213 @@ void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity) // This is thought to save cache memory at the cost of extra cache fills. // fixme re-measure this - assert(newCapacity > 0); - assert((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1); + ASSERT(newCapacity > 0); + ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1); setBucketsAndMask(newBuckets, newCapacity - 1); if (freeOld) { - cache_collect_free(oldBuckets, oldCapacity); - cache_collect(false); + collect_free(oldBuckets, oldCapacity); } } -void cache_t::bad_cache(id receiver, SEL sel, Class isa) +void cache_t::bad_cache(id receiver, SEL sel) { // Log in separate steps in case the logging itself causes a crash. _objc_inform_now_and_on_crash ("Method cache corrupted. This may be a message to an " "invalid object, or a memory error somewhere else."); - cache_t *cache = &isa->cache; +#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED + bucket_t *b = buckets(); _objc_inform_now_and_on_crash ("%s %p, SEL %p, isa %p, cache %p, buckets %p, " "mask 0x%x, occupied 0x%x", receiver ? "receiver" : "unused", receiver, - sel, isa, cache, cache->_buckets, - cache->_mask, cache->_occupied); + sel, cls(), this, b, + _maybeMask.load(memory_order_relaxed), + _occupied); _objc_inform_now_and_on_crash ("%s %zu bytes, buckets %zu bytes", receiver ? "receiver" : "unused", malloc_size(receiver), - malloc_size(cache->_buckets)); + malloc_size(b)); +#elif (CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16 || \ + CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS || \ + CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4) + uintptr_t maskAndBuckets = _bucketsAndMaybeMask.load(memory_order_relaxed); + _objc_inform_now_and_on_crash + ("%s %p, SEL %p, isa %p, cache %p, buckets and mask 0x%lx, " + "occupied 0x%x", + receiver ? "receiver" : "unused", receiver, + sel, cls(), this, maskAndBuckets, _occupied); + _objc_inform_now_and_on_crash + ("%s %zu bytes, buckets %zu bytes", + receiver ? "receiver" : "unused", malloc_size(receiver), + malloc_size(buckets())); +#else +#error Unknown cache mask storage type. +#endif _objc_inform_now_and_on_crash ("selector '%s'", sel_getName(sel)); _objc_inform_now_and_on_crash - ("isa '%s'", isa->nameForLogging()); + ("isa '%s'", cls()->nameForLogging()); _objc_fatal ("Method cache corrupted. This may be a message to an " "invalid object, or a memory error somewhere else."); } - -bucket_t * cache_t::find(SEL s, id receiver) -{ - assert(s != 0); - - bucket_t *b = buckets(); - mask_t m = mask(); - mask_t begin = cache_hash(s, m); - mask_t i = begin; - do { - if (b[i].sel() == 0 || b[i].sel() == s) { - return &b[i]; - } - } while ((i = cache_next(i, m)) != begin); - - // hack - Class cls = (Class)((uintptr_t)this - offsetof(objc_class, cache)); - cache_t::bad_cache(receiver, (SEL)s, cls); -} - - -void cache_t::expand() +void cache_t::insert(SEL sel, IMP imp, id receiver) { - cacheUpdateLock.assertLocked(); - - uint32_t oldCapacity = capacity(); - uint32_t newCapacity = oldCapacity ? oldCapacity*2 : INIT_CACHE_SIZE; + runtimeLock.assertLocked(); - if ((uint32_t)(mask_t)newCapacity != newCapacity) { - // mask overflow - can't grow further - // fixme this wastes one bit of mask - newCapacity = oldCapacity; + // Never cache before +initialize is done + if (slowpath(!cls()->isInitialized())) { + return; } - reallocate(oldCapacity, newCapacity); -} - - -static void cache_fill_nolock(Class cls, SEL sel, IMP imp, id receiver) -{ - cacheUpdateLock.assertLocked(); - - // Never cache before +initialize is done - if (!cls->isInitialized()) return; + if (isConstantOptimizedCache()) { + _objc_fatal("cache_t::insert() called with a preoptimized cache for %s", + cls()->nameForLogging()); + } - // Make sure the entry wasn't added to the cache by some other thread - // before we grabbed the cacheUpdateLock. - if (cache_getImp(cls, sel)) return; +#if DEBUG_TASK_THREADS + return _collecting_in_critical(); +#else +#if CONFIG_USE_CACHE_LOCK + mutex_locker_t lock(cacheUpdateLock); +#endif - cache_t *cache = getCache(cls); + ASSERT(sel != 0 && cls()->isInitialized()); - // Use the cache as-is if it is less than 3/4 full - mask_t newOccupied = cache->occupied() + 1; - mask_t capacity = cache->capacity(); - if (cache->isConstantEmptyCache()) { + // Use the cache as-is if until we exceed our expected fill ratio. + mask_t newOccupied = occupied() + 1; + unsigned oldCapacity = capacity(), capacity = oldCapacity; + if (slowpath(isConstantEmptyCache())) { // Cache is read-only. Replace it. - cache->reallocate(capacity, capacity ?: INIT_CACHE_SIZE); + if (!capacity) capacity = INIT_CACHE_SIZE; + reallocate(oldCapacity, capacity, /* freeOld */false); + } + else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) { + // Cache is less than 3/4 or 7/8 full. Use it as-is. } - else if (newOccupied <= capacity / 4 * 3) { - // Cache is less than 3/4 full. Use it as-is. +#if CACHE_ALLOW_FULL_UTILIZATION + else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) { + // Allow 100% cache utilization for small buckets. Use it as-is. } +#endif else { - // Cache is too full. Expand it. - cache->expand(); + capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; + if (capacity > MAX_CACHE_SIZE) { + capacity = MAX_CACHE_SIZE; + } + reallocate(oldCapacity, capacity, true); } + bucket_t *b = buckets(); + mask_t m = capacity - 1; + mask_t begin = cache_hash(sel, m); + mask_t i = begin; + // Scan for the first unused slot and insert there. - // There is guaranteed to be an empty slot because the - // minimum size is 4 and we resized at 3/4 full. - bucket_t *bucket = cache->find(sel, receiver); - if (bucket->sel() == 0) cache->incrementOccupied(); - bucket->set(sel, imp); + // There is guaranteed to be an empty slot. + do { + if (fastpath(b[i].sel() == 0)) { + incrementOccupied(); + b[i].set(b, sel, imp, cls()); + return; + } + if (b[i].sel() == sel) { + // The entry was added to the cache by some other thread + // before we grabbed the cacheUpdateLock. + return; + } + } while (fastpath((i = cache_next(i, m)) != begin)); + + bad_cache(receiver, (SEL)sel); +#endif // !DEBUG_TASK_THREADS } -void cache_fill(Class cls, SEL sel, IMP imp, id receiver) +void cache_t::copyCacheNolock(objc_imp_cache_entry *buffer, int len) { -#if !DEBUG_TASK_THREADS - mutex_locker_t lock(cacheUpdateLock); - cache_fill_nolock(cls, sel, imp, receiver); +#if CONFIG_USE_CACHE_LOCK + cacheUpdateLock.assertLocked(); #else - _collecting_in_critical(); - return; + runtimeLock.assertLocked(); +#endif + int wpos = 0; + +#if CONFIG_USE_PREOPT_CACHES + if (isConstantOptimizedCache()) { + auto cache = preopt_cache(); + auto mask = cache->mask; + uintptr_t sel_base = objc_opt_offsets[OBJC_OPT_METHODNAME_START]; + uintptr_t imp_base = (uintptr_t)&cache->entries; + + for (uintptr_t index = 0; index <= mask && wpos < len; index++) { + auto &ent = cache->entries[index]; + if (~ent.sel_offs) { + buffer[wpos].sel = (SEL)(sel_base + ent.sel_offs); + buffer[wpos].imp = (IMP)(imp_base - ent.imp_offs); + wpos++; + } + } + return; + } #endif + { + bucket_t *buckets = this->buckets(); + uintptr_t count = capacity(); + + for (uintptr_t index = 0; index < count && wpos < len; index++) { + if (buckets[index].sel()) { + buffer[wpos].imp = buckets[index].imp(buckets, cls()); + buffer[wpos].sel = buckets[index].sel(); + wpos++; + } + } + } } - // Reset this entire cache to the uncached lookup by reallocating it. // This must not shrink the cache - that breaks the lock-free scheme. -void cache_erase_nolock(Class cls) +void cache_t::eraseNolock(const char *func) { +#if CONFIG_USE_CACHE_LOCK cacheUpdateLock.assertLocked(); +#else + runtimeLock.assertLocked(); +#endif - cache_t *cache = getCache(cls); - - mask_t capacity = cache->capacity(); - if (capacity > 0 && cache->occupied() > 0) { - auto oldBuckets = cache->buckets(); + if (isConstantOptimizedCache()) { + auto c = cls(); + if (PrintCaches) { + _objc_inform("CACHES: %sclass %s: dropping and disallowing preopt cache (from %s)", + c->isMetaClass() ? "meta" : "", + c->nameForLogging(), func); + } + setBucketsAndMask(emptyBuckets(), 0); + c->setDisallowPreoptCaches(); + } else if (occupied() > 0) { + auto capacity = this->capacity(); + auto oldBuckets = buckets(); auto buckets = emptyBucketsForCapacity(capacity); - cache->setBucketsAndMask(buckets, capacity - 1); // also clears occupied - cache_collect_free(oldBuckets, capacity); - cache_collect(false); + setBucketsAndMask(buckets, capacity - 1); // also clears occupied + collect_free(oldBuckets, capacity); } } -void cache_delete(Class cls) +void cache_t::destroy() { +#if CONFIG_USE_CACHE_LOCK mutex_locker_t lock(cacheUpdateLock); - if (cls->cache.canBeFreed()) { - if (PrintCaches) recordDeadCache(cls->cache.capacity()); - free(cls->cache.buckets()); +#else + runtimeLock.assertLocked(); +#endif + if (canBeFreed()) { + if (PrintCaches) recordDeadCache(capacity()); + free(buckets()); } } @@ -669,7 +1017,7 @@ static uintptr_t _get_pc_for_thread(thread_t thread) arm_thread_state64_t state; unsigned int count = ARM_THREAD_STATE64_COUNT; kern_return_t okay = thread_get_state (thread, ARM_THREAD_STATE64, (thread_state_t)&state, &count); - return (okay == KERN_SUCCESS) ? arm_thread_state64_get_pc(state) : PC_SENTINEL; + return (okay == KERN_SUCCESS) ? (uintptr_t)arm_thread_state64_get_pc(state) : PC_SENTINEL; } #else { @@ -686,21 +1034,64 @@ static uintptr_t _get_pc_for_thread(thread_t thread) * reading function is in progress because it might still be using * the garbage memory. **********************************************************************/ -extern "C" uintptr_t objc_entryPoints[]; -extern "C" uintptr_t objc_exitPoints[]; +#if HAVE_TASK_RESTARTABLE_RANGES +#include +#else +typedef struct { + uint64_t location; + unsigned short length; + unsigned short recovery_offs; + unsigned int flags; +} task_restartable_range_t; +#endif + +extern "C" task_restartable_range_t objc_restartableRanges[]; + +#if HAVE_TASK_RESTARTABLE_RANGES +static bool shouldUseRestartableRanges = true; +#endif + +void cache_t::init() +{ +#if HAVE_TASK_RESTARTABLE_RANGES + mach_msg_type_number_t count = 0; + kern_return_t kr; + + while (objc_restartableRanges[count].location) { + count++; + } + + kr = task_restartable_ranges_register(mach_task_self(), + objc_restartableRanges, count); + if (kr == KERN_SUCCESS) return; + _objc_fatal("task_restartable_ranges_register failed (result 0x%x: %s)", + kr, mach_error_string(kr)); +#endif // HAVE_TASK_RESTARTABLE_RANGES +} static int _collecting_in_critical(void) { #if TARGET_OS_WIN32 return TRUE; -#else +#elif HAVE_TASK_RESTARTABLE_RANGES + // Only use restartable ranges if we registered them earlier. + if (shouldUseRestartableRanges) { + kern_return_t kr = task_restartable_ranges_synchronize(mach_task_self()); + if (kr == KERN_SUCCESS) return FALSE; + _objc_fatal("task_restartable_ranges_synchronize failed (result 0x%x: %s)", + kr, mach_error_string(kr)); + } +#endif // !HAVE_TASK_RESTARTABLE_RANGES + + // Fallthrough if we didn't use restartable ranges. + thread_act_port_array_t threads; unsigned number; unsigned count; kern_return_t ret; int result; - mach_port_t mythread = pthread_mach_thread_np(pthread_self()); + mach_port_t mythread = pthread_mach_thread_np(objc_thread_self()); // Get a list of all the threads in the current task #if !DEBUG_TASK_THREADS @@ -726,7 +1117,18 @@ static int _collecting_in_critical(void) continue; // Find out where thread is executing +#if TARGET_OS_OSX + if (oah_is_current_process_translated()) { + kern_return_t ret = objc_thread_get_rip(threads[count], (uint64_t*)&pc); + if (ret != KERN_SUCCESS) { + pc = PC_SENTINEL; + } + } else { + pc = _get_pc_for_thread (threads[count]); + } +#else pc = _get_pc_for_thread (threads[count]); +#endif // Check for bad status, and if so, assume the worse (can't collect) if (pc == PC_SENTINEL) @@ -736,10 +1138,11 @@ static int _collecting_in_critical(void) } // Check whether it is in the cache lookup code - for (region = 0; objc_entryPoints[region] != 0; region++) + for (region = 0; objc_restartableRanges[region].location != 0; region++) { - if ((pc >= objc_entryPoints[region]) && - (pc <= objc_exitPoints[region])) + uint64_t loc = objc_restartableRanges[region].location; + if ((pc > loc) && + (pc - loc < (uint64_t)objc_restartableRanges[region].length)) { result = TRUE; goto done; @@ -758,7 +1161,6 @@ static int _collecting_in_critical(void) // Return our finding return result; -#endif } @@ -811,21 +1213,26 @@ static void _garbage_make_room(void) /*********************************************************************** -* cache_collect_free. Add the specified malloc'd memory to the list +* cache_t::collect_free. Add the specified malloc'd memory to the list * of them to free at some later point. * size is used for the collection threshold. It does not have to be * precisely the block's size. * Cache locks: cacheUpdateLock must be held by the caller. **********************************************************************/ -static void cache_collect_free(bucket_t *data, mask_t capacity) +void cache_t::collect_free(bucket_t *data, mask_t capacity) { +#if CONFIG_USE_CACHE_LOCK cacheUpdateLock.assertLocked(); +#else + runtimeLock.assertLocked(); +#endif if (PrintCaches) recordDeadCache(capacity); _garbage_make_room (); garbage_byte_size += cache_t::bytesForCapacity(capacity); garbage_refs[garbage_count++] = data; + cache_t::collectNolock(false); } @@ -834,9 +1241,13 @@ static void cache_collect_free(bucket_t *data, mask_t capacity) * collectALot tries harder to free memory. * Cache locks: cacheUpdateLock must be held by the caller. **********************************************************************/ -void cache_collect(bool collectALot) +void cache_t::collectNolock(bool collectALot) { +#if CONFIG_USE_CACHE_LOCK cacheUpdateLock.assertLocked(); +#else + runtimeLock.assertLocked(); +#endif // Done if the garbage is not full if (garbage_byte_size < garbage_threshold && !collectALot) { @@ -1127,6 +1538,41 @@ static kern_return_t objc_task_threads // DEBUG_TASK_THREADS #endif +OBJC_EXPORT bucket_t * objc_cache_buckets(const cache_t * cache) { + return cache->buckets(); +} + +#if CONFIG_USE_PREOPT_CACHES + +OBJC_EXPORT const preopt_cache_t * _Nonnull objc_cache_preoptCache(const cache_t * _Nonnull cache) { + return cache->preopt_cache(); +} + +OBJC_EXPORT bool objc_cache_isConstantOptimizedCache(const cache_t * _Nonnull cache, bool strict, uintptr_t empty_addr) { + return cache->isConstantOptimizedCache(strict, empty_addr); +} + +OBJC_EXPORT unsigned objc_cache_preoptCapacity(const cache_t * _Nonnull cache) { + return cache->preopt_cache()->capacity(); +} + +OBJC_EXPORT Class _Nonnull objc_cache_preoptFallbackClass(const cache_t * _Nonnull cache) { + return cache->preoptFallbackClass(); +} + +#endif + +OBJC_EXPORT size_t objc_cache_bytesForCapacity(uint32_t cap) { + return cache_t::bytesForCapacity(cap); +} + +OBJC_EXPORT uint32_t objc_cache_occupied(const cache_t * _Nonnull cache) { + return cache->occupied(); +} + +OBJC_EXPORT unsigned objc_cache_capacity(const struct cache_t * _Nonnull cache) { + return cache->capacity(); +} // __OBJC2__ #endif