runtime/objc-cache-old.mm

   1 /*
   2  * Copyright (c) 1999-2007 Apple Inc.  All Rights Reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /***********************************************************************
  25 * objc-cache.m
  26 * Method cache management
  27 * Cache flushing
  28 * Cache garbage collection
  29 * Cache instrumentation
  30 * Dedicated allocator for large caches
  31 **********************************************************************/
  32
  33
  34 /***********************************************************************
  35  * Method cache locking (GrP 2001-1-14)
  36  *
  37  * For speed, objc_msgSend does not acquire any locks when it reads
  38  * method caches. Instead, all cache changes are performed so that any
  39  * objc_msgSend running concurrently with the cache mutator will not
  40  * crash or hang or get an incorrect result from the cache.
  41  *
  42  * When cache memory becomes unused (e.g. the old cache after cache
  43  * expansion), it is not immediately freed, because a concurrent
  44  * objc_msgSend could still be using it. Instead, the memory is
  45  * disconnected from the data structures and placed on a garbage list.
  46  * The memory is now only accessible to instances of objc_msgSend that
  47  * were running when the memory was disconnected; any further calls to
  48  * objc_msgSend will not see the garbage memory because the other data
  49  * structures don't point to it anymore. The collecting_in_critical
  50  * function checks the PC of all threads and returns FALSE when all threads
  51  * are found to be outside objc_msgSend. This means any call to objc_msgSend
  52  * that could have had access to the garbage has finished or moved past the
  53  * cache lookup stage, so it is safe to free the memory.
  54  *
  55  * All functions that modify cache data or structures must acquire the
  56  * cacheUpdateLock to prevent interference from concurrent modifications.
  57  * The function that frees cache garbage must acquire the cacheUpdateLock
  58  * and use collecting_in_critical() to flush out cache readers.
  59  * The cacheUpdateLock is also used to protect the custom allocator used
  60  * for large method cache blocks.
  61  *
  62  * Cache readers (PC-checked by collecting_in_critical())
  63  * objc_msgSend*
  64  * _cache_getImp
  65  * _cache_getMethod
  66  *
  67  * Cache writers (hold cacheUpdateLock while reading or writing; not PC-checked)
  68  * _cache_fill         (acquires lock)
  69  * _cache_expand       (only called from cache_fill)
  70  * _cache_create       (only called from cache_expand)
  71  * bcopy               (only called from instrumented cache_expand)
  72  * flush_caches        (acquires lock)
  73  * _cache_flush        (only called from cache_fill and flush_caches)
  74  * _cache_collect_free (only called from cache_expand and cache_flush)
  75  *
  76  * UNPROTECTED cache readers (NOT thread-safe; used for debug info only)
  77  * _cache_print
  78  * _class_printMethodCaches
  79  * _class_printDuplicateCacheEntries
  80  * _class_printMethodCacheStatistics
  81  *
  82  * _class_lookupMethodAndLoadCache is a special case. It may read a
  83  * method triplet out of one cache and store it in another cache. This
  84  * is unsafe if the method triplet is a forward:: entry, because the
  85  * triplet itself could be freed unless _class_lookupMethodAndLoadCache
  86  * were PC-checked or used a lock. Additionally, storing the method
  87  * triplet in both caches would result in double-freeing if both caches
  88  * were flushed or expanded. The solution is for _cache_getMethod to
  89  * ignore all entries whose implementation is _objc_msgForward_impcache,
  90  * so _class_lookupMethodAndLoadCache cannot look at a forward:: entry
  91  * unsafely or place it in multiple caches.
  92  ***********************************************************************/
  93
  94 #if !__OBJC2__
  95
  96 #include "objc-private.h"
  97 #include "objc-cache-old.h"
  98 #include "hashtable2.h"
  99
 100 typedef struct {
 101     SEL name;     // same layout as struct old_method
 102     void *unused;
 103     IMP imp;  // same layout as struct old_method
 104 } cache_entry;
 105
 106
 107 /* When _class_slow_grow is non-zero, any given cache is actually grown
 108  * only on the odd-numbered times it becomes full; on the even-numbered
 109  * times, it is simply emptied and re-used.  When this flag is zero,
 110  * caches are grown every time. */
 111 static const int _class_slow_grow = 1;
 112
 113 /* For min cache size: clear_cache=1, slow_grow=1
 114    For max cache size: clear_cache=0, slow_grow=0 */
 115
 116 /* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
 117 enum {
 118     INIT_CACHE_SIZE_LOG2 = 2,
 119     INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2)
 120 };
 121
 122
 123 /* Amount of space required for `count` hash table buckets, knowing that
 124  * one entry is embedded in the cache structure itself. */
 125 #define TABLE_SIZE(count)  ((count - 1) * sizeof(cache_entry *))
 126
 127
 128 #if !TARGET_OS_WIN32
 129 #   define CACHE_ALLOCATOR
 130 #endif
 131
 132 /* Custom cache allocator parameters.
 133  * CACHE_REGION_SIZE must be a multiple of CACHE_QUANTUM. */
 134 #define CACHE_ALLOCATOR_MIN 512
 135 #define CACHE_QUANTUM (CACHE_ALLOCATOR_MIN+sizeof(struct objc_cache)-sizeof(cache_entry*))
 136 #define CACHE_REGION_SIZE ((128*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
 137 // #define CACHE_REGION_SIZE ((256*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
 138
 139 static uintptr_t cache_allocator_mask_for_size(size_t size)
 140 {
 141     return (size - sizeof(struct objc_cache)) / sizeof(cache_entry *);
 142 }
 143
 144 static size_t cache_allocator_size_for_mask(uintptr_t mask)
 145 {
 146     size_t requested = sizeof(struct objc_cache) + TABLE_SIZE(mask+1);
 147     size_t actual = CACHE_QUANTUM;
 148     while (actual < requested) actual += CACHE_QUANTUM;
 149     return actual;
 150 }
 151
 152
 153 /* Cache instrumentation data. Immediately follows the cache block itself. */
 154 #ifdef OBJC_INSTRUMENTED
 155 typedef struct
 156 {
 157     unsigned int hitCount;           // cache lookup success tally
 158     unsigned int hitProbes;          // sum entries checked to hit
 159     unsigned int maxHitProbes;       // max entries checked to hit
 160     unsigned int missCount;          // cache lookup no-find tally
 161     unsigned int missProbes;         // sum entries checked to miss
 162     unsigned int maxMissProbes;      // max entries checked to miss
 163     unsigned int flushCount;         // cache flush tally
 164     unsigned int flushedEntries;     // sum cache entries flushed
 165     unsigned int maxFlushedEntries;  // max cache entries flushed
 166 } CacheInstrumentation;
 167
 168 #define CACHE_INSTRUMENTATION(cache)  (CacheInstrumentation *) &cache->buckets[cache->mask + 1];
 169 #endif
 170
 171 /* Cache filling and flushing instrumentation */
 172
 173 static int totalCacheFills = 0;
 174
 175 #ifdef OBJC_INSTRUMENTED
 176 unsigned int LinearFlushCachesCount              = 0;
 177 unsigned int LinearFlushCachesVisitedCount       = 0;
 178 unsigned int MaxLinearFlushCachesVisitedCount    = 0;
 179 unsigned int NonlinearFlushCachesCount           = 0;
 180 unsigned int NonlinearFlushCachesClassCount      = 0;
 181 unsigned int NonlinearFlushCachesVisitedCount    = 0;
 182 unsigned int MaxNonlinearFlushCachesVisitedCount = 0;
 183 unsigned int IdealFlushCachesCount               = 0;
 184 unsigned int MaxIdealFlushCachesCount            = 0;
 185 #endif
 186
 187
 188 /***********************************************************************
 189 * A static empty cache.  All classes initially point at this cache.
 190 * When the first message is sent it misses in the cache, and when
 191 * the cache is grown it checks for this case and uses malloc rather
 192 * than realloc.  This avoids the need to check for NULL caches in the
 193 * messenger.
 194 ***********************************************************************/
 195
 196 struct objc_cache _objc_empty_cache =
 197 {
 198     0,        // mask
 199     0,        // occupied
 200     { NULL }  // buckets
 201 };
 202 #ifdef OBJC_INSTRUMENTED
 203 CacheInstrumentation emptyCacheInstrumentation = {0};
 204 #endif
 205
 206
 207 /* Local prototypes */
 208
 209 static BOOL _cache_isEmpty(Cache cache);
 210 static Cache _cache_malloc(uintptr_t slotCount);
 211 static Cache _cache_create(Class cls);
 212 static Cache _cache_expand(Class cls);
 213
 214 static int _collecting_in_critical(void);
 215 static void _garbage_make_room(void);
 216 static void _cache_collect_free(void *data, size_t size);
 217
 218 #if defined(CACHE_ALLOCATOR)
 219 static BOOL cache_allocator_is_block(void *block);
 220 static Cache cache_allocator_calloc(size_t size);
 221 static void cache_allocator_free(void *block);
 222 #endif
 223
 224 /***********************************************************************
 225 * Cache statistics for OBJC_PRINT_CACHE_SETUP
 226 **********************************************************************/
 227 static unsigned int cache_counts[16];
 228 static size_t cache_allocations;
 229 static size_t cache_collections;
 230 static size_t cache_allocator_regions;
 231
 232 static size_t log2u(size_t x)
 233 {
 234     unsigned int log;
 235
 236     log = 0;
 237     while (x >>= 1)
 238         log += 1;
 239
 240     return log;
 241 }
 242
 243
 244 /***********************************************************************
 245 * _cache_isEmpty.
 246 * Returns YES if the given cache is some empty cache.
 247 * Empty caches should never be allocated on the heap.
 248 **********************************************************************/
 249 static BOOL _cache_isEmpty(Cache cache)
 250 {
 251     return (cache == NULL  ||  cache == (Cache)&_objc_empty_cache  ||  cache->mask == 0);
 252 }
 253
 254
 255 /***********************************************************************
 256 * _cache_malloc.
 257 *
 258 * Called from _cache_create() and cache_expand()
 259 * Cache locks: cacheUpdateLock must be held by the caller.
 260 **********************************************************************/
 261 static Cache _cache_malloc(uintptr_t slotCount)
 262 {
 263     Cache new_cache;
 264     size_t size;
 265
 266     mutex_assert_locked(&cacheUpdateLock);
 267
 268     // Allocate table (why not check for failure?)
 269     size = sizeof(struct objc_cache) + TABLE_SIZE(slotCount);
 270 #if defined(OBJC_INSTRUMENTED)
 271     // Custom cache allocator can't handle instrumentation.
 272     size += sizeof(CacheInstrumentation);
 273     new_cache = _calloc_internal(size, 1);
 274     new_cache->mask = slotCount - 1;
 275 #elif !defined(CACHE_ALLOCATOR)
 276     // fixme cache allocator implementation isn't 64-bit clean
 277     new_cache = _calloc_internal(size, 1);
 278     new_cache->mask = (unsigned int)(slotCount - 1);
 279 #else
 280     if (size < CACHE_ALLOCATOR_MIN  ||  UseInternalZone) {
 281         new_cache = (Cache)_calloc_internal(size, 1);
 282         new_cache->mask = slotCount - 1;
 283         // occupied and buckets and instrumentation are all zero
 284     } else {
 285         new_cache = cache_allocator_calloc(size);
 286         // mask is already set
 287         // occupied and buckets and instrumentation are all zero
 288     }
 289 #endif
 290
 291     if (PrintCaches) {
 292         size_t bucket = log2u(slotCount);
 293         if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
 294             cache_counts[bucket]++;
 295         }
 296         cache_allocations++;
 297     }
 298
 299     return new_cache;
 300 }
 301
 302 /***********************************************************************
 303 * _cache_free_block.
 304 *
 305 * Called from _cache_free() and _cache_collect_free().
 306 * block may be a cache or a forward:: entry.
 307 * If block is a cache, forward:: entries it points to will NOT be freed.
 308 * Cache locks: cacheUpdateLock must be held by the caller.
 309 **********************************************************************/
 310 static inline int isPowerOf2(unsigned long l) { return 1 == __builtin_popcountl(l); }
 311 static void _cache_free_block(void *block)
 312 {
 313     mutex_assert_locked(&cacheUpdateLock);
 314
 315 #if !TARGET_OS_WIN32
 316     if (PrintCaches) {
 317         Cache cache = (Cache)block;
 318         size_t slotCount = cache->mask + 1;
 319         if (isPowerOf2(slotCount)) {
 320             size_t bucket = log2u(slotCount);
 321             if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
 322                 cache_counts[bucket]--;
 323             }
 324         }
 325     }
 326 #endif
 327
 328 #if defined(CACHE_ALLOCATOR)
 329     if (cache_allocator_is_block(block)) {
 330         cache_allocator_free(block);
 331     } else
 332 #endif
 333     {
 334         free(block);
 335     }
 336 }
 337
 338
 339 /***********************************************************************
 340 * _cache_free.
 341 *
 342 * Called from _objc_remove_classes_in_image().
 343 * forward:: entries in the cache ARE freed.
 344 * Cache locks: cacheUpdateLock must NOT be held by the caller.
 345 **********************************************************************/
 346 void _cache_free(Cache cache)
 347 {
 348     unsigned int i;
 349
 350     mutex_lock(&cacheUpdateLock);
 351
 352     for (i = 0; i < cache->mask + 1; i++) {
 353         cache_entry *entry = (cache_entry *)cache->buckets[i];
 354         if (entry  &&  entry->imp == _objc_msgForward_impcache) {
 355             _cache_free_block(entry);
 356         }
 357     }
 358
 359     _cache_free_block(cache);
 360
 361     mutex_unlock(&cacheUpdateLock);
 362 }
 363
 364
 365 /***********************************************************************
 366 * _cache_create.
 367 *
 368 * Called from _cache_expand().
 369 * Cache locks: cacheUpdateLock must be held by the caller.
 370 **********************************************************************/
 371 static Cache _cache_create(Class cls)
 372 {
 373     Cache new_cache;
 374
 375     mutex_assert_locked(&cacheUpdateLock);
 376
 377     // Allocate new cache block
 378     new_cache = _cache_malloc(INIT_CACHE_SIZE);
 379
 380     // Install the cache
 381     cls->cache = new_cache;
 382
 383     // Clear the grow flag so that we will re-use the current storage,
 384     // rather than actually grow the cache, when expanding the cache
 385     // for the first time
 386     if (_class_slow_grow) {
 387         cls->setShouldGrowCache(false);
 388     }
 389
 390     // Return our creation
 391     return new_cache;
 392 }
 393
 394
 395 /***********************************************************************
 396 * _cache_expand.
 397 *
 398 * Called from _cache_fill ()
 399 * Cache locks: cacheUpdateLock must be held by the caller.
 400 **********************************************************************/
 401 static Cache _cache_expand(Class cls)
 402 {
 403     Cache old_cache;
 404     Cache new_cache;
 405     uintptr_t slotCount;
 406     uintptr_t index;
 407
 408     mutex_assert_locked(&cacheUpdateLock);
 409
 410     // First growth goes from empty cache to a real one
 411     old_cache = cls->cache;
 412     if (_cache_isEmpty(old_cache))
 413         return _cache_create (cls);
 414
 415     if (_class_slow_grow) {
 416         // Cache grows every other time only.
 417         if (cls->shouldGrowCache()) {
 418             // Grow the cache this time. Don't grow next time.
 419             cls->setShouldGrowCache(false);
 420         }
 421         else {
 422             // Reuse the current cache storage this time. Do grow next time.
 423             cls->setShouldGrowCache(true);
 424
 425             // Clear the valid-entry counter
 426             old_cache->occupied = 0;
 427
 428             // Invalidate all the cache entries
 429             for (index = 0; index < old_cache->mask + 1; index += 1)
 430             {
 431                 // Remember what this entry was, so we can possibly
 432                 // deallocate it after the bucket has been invalidated
 433                 cache_entry *oldEntry = (cache_entry *)old_cache->buckets[index];
 434
 435                 // Skip invalid entry
 436                 if (!oldEntry)
 437                     continue;
 438
 439                 // Invalidate this entry
 440                 old_cache->buckets[index] = NULL;
 441
 442                 // Deallocate "forward::" entry
 443                 if (oldEntry->imp == _objc_msgForward_impcache) {
 444                     _cache_collect_free (oldEntry, sizeof(cache_entry));
 445                 }
 446             }
 447
 448             // Return the same old cache, freshly emptied
 449             return old_cache;
 450         }
 451     }
 452
 453     // Double the cache size
 454     slotCount = (old_cache->mask + 1) << 1;
 455
 456     new_cache = _cache_malloc(slotCount);
 457
 458 #ifdef OBJC_INSTRUMENTED
 459     // Propagate the instrumentation data
 460     {
 461         CacheInstrumentation *oldCacheData;
 462         CacheInstrumentation *newCacheData;
 463
 464         oldCacheData = CACHE_INSTRUMENTATION(old_cache);
 465         newCacheData = CACHE_INSTRUMENTATION(new_cache);
 466         bcopy ((const char *)oldCacheData, (char *)newCacheData, sizeof(CacheInstrumentation));
 467     }
 468 #endif
 469
 470     // Deallocate "forward::" entries from the old cache
 471     for (index = 0; index < old_cache->mask + 1; index++) {
 472         cache_entry *entry = (cache_entry *)old_cache->buckets[index];
 473         if (entry && entry->imp == _objc_msgForward_impcache) {
 474             _cache_collect_free (entry, sizeof(cache_entry));
 475         }
 476     }
 477
 478     // Install new cache
 479     cls->cache = new_cache;
 480
 481     // Deallocate old cache, try freeing all the garbage
 482     _cache_collect_free (old_cache, old_cache->mask * sizeof(cache_entry *));
 483     _cache_collect(false);
 484
 485     return new_cache;
 486 }
 487
 488
 489 /***********************************************************************
 490 * _cache_fill.  Add the specified method to the specified class' cache.
 491 * Returns NO if the cache entry wasn't added: cache was busy,
 492 *  class is still being initialized, new entry is a duplicate.
 493 *
 494 * Called only from _class_lookupMethodAndLoadCache and
 495 * class_respondsToMethod and _cache_addForwardEntry.
 496 *
 497 * Cache locks: cacheUpdateLock must not be held.
 498 **********************************************************************/
 499 BOOL _cache_fill(Class cls, Method smt, SEL sel)
 500 {
 501     uintptr_t newOccupied;
 502     uintptr_t index;
 503     cache_entry **buckets;
 504     cache_entry *entry;
 505     Cache cache;
 506
 507     mutex_assert_unlocked(&cacheUpdateLock);
 508
 509     // Never cache before +initialize is done
 510     if (!cls->isInitialized()) {
 511         return NO;
 512     }
 513
 514     // Keep tally of cache additions
 515     totalCacheFills += 1;
 516
 517     mutex_lock(&cacheUpdateLock);
 518
 519     entry = (cache_entry *)smt;
 520
 521     cache = cls->cache;
 522
 523     // Make sure the entry wasn't added to the cache by some other thread
 524     // before we grabbed the cacheUpdateLock.
 525     // Don't use _cache_getMethod() because _cache_getMethod() doesn't
 526     // return forward:: entries.
 527     if (_cache_getImp(cls, sel)) {
 528         mutex_unlock(&cacheUpdateLock);
 529         return NO; // entry is already cached, didn't add new one
 530     }
 531
 532     // Use the cache as-is if it is less than 3/4 full
 533     newOccupied = cache->occupied + 1;
 534     if ((newOccupied * 4) <= (cache->mask + 1) * 3) {
 535         // Cache is less than 3/4 full.
 536         cache->occupied = (unsigned int)newOccupied;
 537     } else {
 538         // Cache is too full. Expand it.
 539         cache = _cache_expand (cls);
 540
 541         // Account for the addition
 542         cache->occupied += 1;
 543     }
 544
 545     // Scan for the first unused slot and insert there.
 546     // There is guaranteed to be an empty slot because the
 547     // minimum size is 4 and we resized at 3/4 full.
 548     buckets = (cache_entry **)cache->buckets;
 549     for (index = CACHE_HASH(sel, cache->mask);
 550          buckets[index] != NULL;
 551          index = (index+1) & cache->mask)
 552     {
 553         // empty
 554     }
 555     buckets[index] = entry;
 556
 557     mutex_unlock(&cacheUpdateLock);
 558
 559     return YES; // successfully added new cache entry
 560 }
 561
 562
 563 /***********************************************************************
 564 * _cache_addForwardEntry
 565 * Add a forward:: entry  for the given selector to cls's method cache.
 566 * Does nothing if the cache addition fails for any reason.
 567 * Called from class_respondsToMethod and _class_lookupMethodAndLoadCache.
 568 * Cache locks: cacheUpdateLock must not be held.
 569 **********************************************************************/
 570 void _cache_addForwardEntry(Class cls, SEL sel)
 571 {
 572     cache_entry *smt;
 573
 574     smt = (cache_entry *)_malloc_internal(sizeof(cache_entry));
 575     smt->name = sel;
 576     smt->imp = _objc_msgForward_impcache;
 577     if (! _cache_fill(cls, (Method)smt, sel)) {  // fixme hack
 578         // Entry not added to cache. Don't leak the method struct.
 579         _free_internal(smt);
 580     }
 581 }
 582
 583
 584 /***********************************************************************
 585 * _cache_addIgnoredEntry
 586 * Add an entry for the ignored selector to cls's method cache.
 587 * Does nothing if the cache addition fails for any reason.
 588 * Returns the ignored IMP.
 589 * Cache locks: cacheUpdateLock must not be held.
 590 **********************************************************************/
 591 #if SUPPORT_GC  &&  !SUPPORT_IGNORED_SELECTOR_CONSTANT
 592 static cache_entry *alloc_ignored_entries(void)
 593 {
 594     cache_entry *e = (cache_entry *)_malloc_internal(5 * sizeof(cache_entry));
 595     e[0] = (cache_entry){ @selector(retain),     0,(IMP)&_objc_ignored_method};
 596     e[1] = (cache_entry){ @selector(release),    0,(IMP)&_objc_ignored_method};
 597     e[2] = (cache_entry){ @selector(autorelease),0,(IMP)&_objc_ignored_method};
 598     e[3] = (cache_entry){ @selector(retainCount),0,(IMP)&_objc_ignored_method};
 599     e[4] = (cache_entry){ @selector(dealloc),    0,(IMP)&_objc_ignored_method};
 600     return e;
 601 }
 602 #endif
 603
 604 IMP _cache_addIgnoredEntry(Class cls, SEL sel)
 605 {
 606     cache_entry *entryp = NULL;
 607
 608 #if !SUPPORT_GC
 609     _objc_fatal("selector ignored with GC off");
 610 #elif SUPPORT_IGNORED_SELECTOR_CONSTANT
 611     static cache_entry entry = { (SEL)kIgnore, 0, (IMP)&_objc_ignored_method };
 612     entryp = &entry;
 613     assert(sel == (SEL)kIgnore);
 614 #else
 615     // hack
 616     int i;
 617     static cache_entry *entries;
 618     INIT_ONCE_PTR(entries, alloc_ignored_entries(), free(v));
 619
 620     assert(ignoreSelector(sel));
 621     for (i = 0; i < 5; i++) {
 622         if (sel == entries[i].name) {
 623             entryp = &entries[i];
 624             break;
 625         }
 626     }
 627     if (!entryp) _objc_fatal("selector %s (%p) is not ignored",
 628                              sel_getName(sel), sel);
 629 #endif
 630
 631     _cache_fill(cls, (Method)entryp, sel);
 632     return entryp->imp;
 633 }
 634
 635
 636 /***********************************************************************
 637 * _cache_flush.  Invalidate all valid entries in the given class' cache.
 638 *
 639 * Called from flush_caches() and _cache_fill()
 640 * Cache locks: cacheUpdateLock must be held by the caller.
 641 **********************************************************************/
 642 void _cache_flush(Class cls)
 643 {
 644     Cache cache;
 645     unsigned int index;
 646
 647     mutex_assert_locked(&cacheUpdateLock);
 648
 649     // Locate cache.  Ignore unused cache.
 650     cache = cls->cache;
 651     if (_cache_isEmpty(cache)) return;
 652
 653 #ifdef OBJC_INSTRUMENTED
 654     {
 655         CacheInstrumentation *cacheData;
 656
 657         // Tally this flush
 658         cacheData = CACHE_INSTRUMENTATION(cache);
 659         cacheData->flushCount += 1;
 660         cacheData->flushedEntries += cache->occupied;
 661         if (cache->occupied > cacheData->maxFlushedEntries)
 662             cacheData->maxFlushedEntries = cache->occupied;
 663     }
 664 #endif
 665
 666     // Traverse the cache
 667     for (index = 0; index <= cache->mask; index += 1)
 668     {
 669         // Remember what this entry was, so we can possibly
 670         // deallocate it after the bucket has been invalidated
 671         cache_entry *oldEntry = (cache_entry *)cache->buckets[index];
 672
 673         // Invalidate this entry
 674         cache->buckets[index] = NULL;
 675
 676         // Deallocate "forward::" entry
 677         if (oldEntry && oldEntry->imp == _objc_msgForward_impcache)
 678             _cache_collect_free (oldEntry, sizeof(cache_entry));
 679     }
 680
 681     // Clear the valid-entry counter
 682     cache->occupied = 0;
 683 }
 684
 685
 686 /***********************************************************************
 687 * flush_cache.  Flushes the instance method cache for class cls only.
 688 * Use flush_caches() if cls might have in-use subclasses.
 689 **********************************************************************/
 690 void flush_cache(Class cls)
 691 {
 692     if (cls) {
 693         mutex_lock(&cacheUpdateLock);
 694         _cache_flush(cls);
 695         mutex_unlock(&cacheUpdateLock);
 696     }
 697 }
 698
 699
 700 /***********************************************************************
 701 * cache collection.
 702 **********************************************************************/
 703
 704 #if !TARGET_OS_WIN32
 705
 706 // A sentinel (magic value) to report bad thread_get_state status.
 707 // Must not be a valid PC.
 708 // Must not be zero - thread_get_state() on a new thread returns PC == 0.
 709 #define PC_SENTINEL  1
 710
 711 // UNIX03 compliance hack (4508809)
 712 #if !__DARWIN_UNIX03
 713 #define __srr0 srr0
 714 #define __eip eip
 715 #endif
 716
 717 static uintptr_t _get_pc_for_thread(thread_t thread)
 718 #if defined(__i386__)
 719 {
 720     i386_thread_state_t state;
 721     unsigned int count = i386_THREAD_STATE_COUNT;
 722     kern_return_t okay = thread_get_state (thread, i386_THREAD_STATE, (thread_state_t)&state, &count);
 723     return (okay == KERN_SUCCESS) ? state.__eip : PC_SENTINEL;
 724 }
 725 #elif defined(__x86_64__)
 726 {
 727     x86_thread_state64_t                        state;
 728     unsigned int count = x86_THREAD_STATE64_COUNT;
 729     kern_return_t okay = thread_get_state (thread, x86_THREAD_STATE64, (thread_state_t)&state, &count);
 730     return (okay == KERN_SUCCESS) ? state.__rip : PC_SENTINEL;
 731 }
 732 #elif defined(__arm__)
 733 {
 734     arm_thread_state_t state;
 735     unsigned int count = ARM_THREAD_STATE_COUNT;
 736     kern_return_t okay = thread_get_state (thread, ARM_THREAD_STATE, (thread_state_t)&state, &count);
 737     return (okay == KERN_SUCCESS) ? state.__pc : PC_SENTINEL;
 738 }
 739 #else
 740 {
 741 #error _get_pc_for_thread () not implemented for this architecture
 742 }
 743 #endif
 744
 745 #endif
 746
 747 /***********************************************************************
 748 * _collecting_in_critical.
 749 * Returns TRUE if some thread is currently executing a cache-reading
 750 * function. Collection of cache garbage is not allowed when a cache-
 751 * reading function is in progress because it might still be using
 752 * the garbage memory.
 753 **********************************************************************/
 754 OBJC_EXPORT uintptr_t objc_entryPoints[];
 755 OBJC_EXPORT uintptr_t objc_exitPoints[];
 756
 757 static int _collecting_in_critical(void)
 758 {
 759 #if TARGET_OS_WIN32
 760     return TRUE;
 761 #else
 762     thread_act_port_array_t threads;
 763     unsigned number;
 764     unsigned count;
 765     kern_return_t ret;
 766     int result;
 767
 768     mach_port_t mythread = pthread_mach_thread_np(pthread_self());
 769
 770     // Get a list of all the threads in the current task
 771     ret = task_threads (mach_task_self (), &threads, &number);
 772     if (ret != KERN_SUCCESS)
 773     {
 774         _objc_fatal("task_thread failed (result %d)\n", ret);
 775     }
 776
 777     // Check whether any thread is in the cache lookup code
 778     result = FALSE;
 779     for (count = 0; count < number; count++)
 780     {
 781         int region;
 782         uintptr_t pc;
 783
 784         // Don't bother checking ourselves
 785         if (threads[count] == mythread)
 786             continue;
 787
 788         // Find out where thread is executing
 789         pc = _get_pc_for_thread (threads[count]);
 790
 791         // Check for bad status, and if so, assume the worse (can't collect)
 792         if (pc == PC_SENTINEL)
 793         {
 794             result = TRUE;
 795             goto done;
 796         }
 797
 798         // Check whether it is in the cache lookup code
 799         for (region = 0; objc_entryPoints[region] != 0; region++)
 800         {
 801             if ((pc >= objc_entryPoints[region]) &&
 802                 (pc <= objc_exitPoints[region]))
 803             {
 804                 result = TRUE;
 805                 goto done;
 806             }
 807         }
 808     }
 809
 810  done:
 811     // Deallocate the port rights for the threads
 812     for (count = 0; count < number; count++) {
 813         mach_port_deallocate(mach_task_self (), threads[count]);
 814     }
 815
 816     // Deallocate the thread list
 817     vm_deallocate (mach_task_self (), (vm_address_t) threads, sizeof(threads[0]) * number);
 818
 819     // Return our finding
 820     return result;
 821 #endif
 822 }
 823
 824
 825 /***********************************************************************
 826 * _garbage_make_room.  Ensure that there is enough room for at least
 827 * one more ref in the garbage.
 828 **********************************************************************/
 829
 830 // amount of memory represented by all refs in the garbage
 831 static size_t garbage_byte_size = 0;
 832
 833 // do not empty the garbage until garbage_byte_size gets at least this big
 834 static size_t garbage_threshold = 1024;
 835
 836 // table of refs to free
 837 static void **garbage_refs = 0;
 838
 839 // current number of refs in garbage_refs
 840 static size_t garbage_count = 0;
 841
 842 // capacity of current garbage_refs
 843 static size_t garbage_max = 0;
 844
 845 // capacity of initial garbage_refs
 846 enum {
 847     INIT_GARBAGE_COUNT = 128
 848 };
 849
 850 static void _garbage_make_room(void)
 851 {
 852     static int first = 1;
 853
 854     // Create the collection table the first time it is needed
 855     if (first)
 856     {
 857         first = 0;
 858         garbage_refs = (void**)
 859             _malloc_internal(INIT_GARBAGE_COUNT * sizeof(void *));
 860         garbage_max = INIT_GARBAGE_COUNT;
 861     }
 862
 863     // Double the table if it is full
 864     else if (garbage_count == garbage_max)
 865     {
 866         garbage_refs = (void**)
 867             _realloc_internal(garbage_refs, garbage_max * 2 * sizeof(void *));
 868         garbage_max *= 2;
 869     }
 870 }
 871
 872
 873 /***********************************************************************
 874 * _cache_collect_free.  Add the specified malloc'd memory to the list
 875 * of them to free at some later point.
 876 * size is used for the collection threshold. It does not have to be
 877 * precisely the block's size.
 878 * Cache locks: cacheUpdateLock must be held by the caller.
 879 **********************************************************************/
 880 static void _cache_collect_free(void *data, size_t size)
 881 {
 882     mutex_assert_locked(&cacheUpdateLock);
 883
 884     _garbage_make_room ();
 885     garbage_byte_size += size;
 886     garbage_refs[garbage_count++] = data;
 887 }
 888
 889
 890 /***********************************************************************
 891 * _cache_collect.  Try to free accumulated dead caches.
 892 * collectALot tries harder to free memory.
 893 * Cache locks: cacheUpdateLock must be held by the caller.
 894 **********************************************************************/
 895 void _cache_collect(bool collectALot)
 896 {
 897     mutex_assert_locked(&cacheUpdateLock);
 898
 899     // Done if the garbage is not full
 900     if (garbage_byte_size < garbage_threshold  &&  !collectALot) {
 901         return;
 902     }
 903
 904     // Synchronize collection with objc_msgSend and other cache readers
 905     if (!collectALot) {
 906         if (_collecting_in_critical ()) {
 907             // objc_msgSend (or other cache reader) is currently looking in
 908             // the cache and might still be using some garbage.
 909             if (PrintCaches) {
 910                 _objc_inform ("CACHES: not collecting; "
 911                               "objc_msgSend in progress");
 912             }
 913             return;
 914         }
 915     }
 916     else {
 917         // No excuses.
 918         while (_collecting_in_critical())
 919             ;
 920     }
 921
 922     // No cache readers in progress - garbage is now deletable
 923
 924     // Log our progress
 925     if (PrintCaches) {
 926         cache_collections++;
 927         _objc_inform ("CACHES: COLLECTING %zu bytes (%zu regions, %zu allocations, %zu collections)", garbage_byte_size, cache_allocator_regions, cache_allocations, cache_collections);
 928     }
 929
 930     // Dispose all refs now in the garbage
 931     while (garbage_count--) {
 932         _cache_free_block(garbage_refs[garbage_count]);
 933     }
 934
 935     // Clear the garbage count and total size indicator
 936     garbage_count = 0;
 937     garbage_byte_size = 0;
 938
 939     if (PrintCaches) {
 940         size_t i;
 941         size_t total = 0;
 942         size_t ideal_total = 0;
 943         size_t malloc_total = 0;
 944         size_t local_total = 0;
 945
 946         for (i = 0; i < sizeof(cache_counts) / sizeof(cache_counts[0]); i++) {
 947             int count = cache_counts[i];
 948             int slots = 1 << i;
 949             size_t size = sizeof(struct objc_cache) + TABLE_SIZE(slots);
 950             size_t ideal = size;
 951 #if TARGET_OS_WIN32
 952             size_t malloc = size;
 953 #else
 954             size_t malloc = malloc_good_size(size);
 955 #endif
 956             size_t local = size < CACHE_ALLOCATOR_MIN ? malloc : cache_allocator_size_for_mask(cache_allocator_mask_for_size(size));
 957
 958             if (!count) continue;
 959
 960             _objc_inform("CACHES: %4d slots: %4d caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", slots, count, ideal*count, malloc*count, local*count, malloc*count-ideal*count, local*count-ideal*count);
 961
 962             total += count;
 963             ideal_total += ideal*count;
 964             malloc_total += malloc*count;
 965             local_total += local*count;
 966         }
 967
 968         _objc_inform("CACHES:      total: %4zu caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", total, ideal_total, malloc_total, local_total, malloc_total-ideal_total, local_total-ideal_total);
 969     }
 970 }
 971
 972
 973
 974
 975
 976 #if defined(CACHE_ALLOCATOR)
 977
 978 /***********************************************************************
 979 * Custom method cache allocator.
 980 * Method cache block sizes are 2^slots+2 words, which is a pessimal
 981 * case for the system allocator. It wastes 504 bytes per cache block
 982 * with 128 or more slots, which adds up to tens of KB for an AppKit process.
 983 * To save memory, the custom cache allocator below is used.
 984 *
 985 * The cache allocator uses 128 KB allocation regions. Few processes will
 986 * require a second region. Within a region, allocation is address-ordered
 987 * first fit.
 988 *
 989 * The cache allocator uses a quantum of 520.
 990 * Cache block ideal sizes: 520, 1032, 2056, 4104
 991 * Cache allocator sizes:   520, 1040, 2080, 4160
 992 *
 993 * Because all blocks are known to be genuine method caches, the ordinary
 994 * cache->mask and cache->occupied fields are used as block headers.
 995 * No out-of-band headers are maintained. The number of blocks will
 996 * almost always be fewer than 200, so for simplicity there is no free
 997 * list or other optimization.
 998 *
 999 * Block in use: mask != 0, occupied != -1 (mask indicates block size)
1000 * Block free:   mask != 0, occupied == -1 (mask is precisely block size)
1001 *
1002 * No cache allocator functions take any locks. Instead, the caller
1003 * must hold the cacheUpdateLock.
1004 *
1005 * fixme with 128 KB regions and 520 B min block size, an allocation
1006 * bitmap would be only 32 bytes - better than free list?
1007 **********************************************************************/
1008
1009 typedef struct cache_allocator_block {
1010     uintptr_t size;
1011     uintptr_t state;
1012     struct cache_allocator_block *nextFree;
1013 } cache_allocator_block;
1014
1015 typedef struct cache_allocator_region {
1016     cache_allocator_block *start;
1017     cache_allocator_block *end;    // first non-block address
1018     cache_allocator_block *freeList;
1019     struct cache_allocator_region *next;
1020 } cache_allocator_region;
1021
1022 static cache_allocator_region *cacheRegion = NULL;
1023
1024
1025 /***********************************************************************
1026 * cache_allocator_add_region
1027 * Allocates and returns a new region that can hold at least size
1028 *   bytes of large method caches.
1029 * The actual size will be rounded up to a CACHE_QUANTUM boundary,
1030 *   with a minimum of CACHE_REGION_SIZE.
1031 * The new region is lowest-priority for new allocations. Callers that
1032 *   know the other regions are already full should allocate directly
1033 *   into the returned region.
1034 **********************************************************************/
1035 static cache_allocator_region *cache_allocator_add_region(size_t size)
1036 {
1037     vm_address_t addr;
1038     cache_allocator_block *b;
1039     cache_allocator_region **rgnP;
1040     cache_allocator_region *newRegion = (cache_allocator_region *)
1041         _calloc_internal(1, sizeof(cache_allocator_region));
1042
1043     // Round size up to quantum boundary, and apply the minimum size.
1044     size += CACHE_QUANTUM - (size % CACHE_QUANTUM);
1045     if (size < CACHE_REGION_SIZE) size = CACHE_REGION_SIZE;
1046
1047     // Allocate the region
1048     addr = (vm_address_t)calloc(size, 1);
1049     newRegion->start = (cache_allocator_block *)addr;
1050     newRegion->end = (cache_allocator_block *)(addr + size);
1051
1052     // Mark the first block: free and covers the entire region
1053     b = newRegion->start;
1054     b->size = size;
1055     b->state = (uintptr_t)-1;
1056     b->nextFree = NULL;
1057     newRegion->freeList = b;
1058
1059     // Add to end of the linked list of regions.
1060     // Other regions should be re-used before this one is touched.
1061     newRegion->next = NULL;
1062     rgnP = &cacheRegion;
1063     while (*rgnP) {
1064         rgnP = &(**rgnP).next;
1065     }
1066     *rgnP = newRegion;
1067
1068     cache_allocator_regions++;
1069
1070     return newRegion;
1071 }
1072
1073
1074 /***********************************************************************
1075 * cache_allocator_coalesce
1076 * Attempts to coalesce a free block with the single free block following
1077 * it in the free list, if any.
1078 **********************************************************************/
1079 static void cache_allocator_coalesce(cache_allocator_block *block)
1080 {
1081     if (block->size + (uintptr_t)block == (uintptr_t)block->nextFree) {
1082         block->size += block->nextFree->size;
1083         block->nextFree = block->nextFree->nextFree;
1084     }
1085 }
1086
1087
1088 /***********************************************************************
1089 * cache_region_calloc
1090 * Attempt to allocate a size-byte block in the given region.
1091 * Allocation is first-fit. The free list is already fully coalesced.
1092 * Returns NULL if there is not enough room in the region for the block.
1093 **********************************************************************/
1094 static void *cache_region_calloc(cache_allocator_region *rgn, size_t size)
1095 {
1096     cache_allocator_block **blockP;
1097     uintptr_t mask;
1098
1099     // Save mask for allocated block, then round size
1100     // up to CACHE_QUANTUM boundary
1101     mask = cache_allocator_mask_for_size(size);
1102     size = cache_allocator_size_for_mask(mask);
1103
1104     // Search the free list for a sufficiently large free block.
1105
1106     for (blockP = &rgn->freeList;
1107          *blockP != NULL;
1108          blockP = &(**blockP).nextFree)
1109     {
1110         cache_allocator_block *block = *blockP;
1111         if (block->size < size) continue;  // not big enough
1112
1113         // block is now big enough. Allocate from it.
1114
1115         // Slice off unneeded fragment of block, if any,
1116         // and reconnect the free list around block.
1117         if (block->size - size >= CACHE_QUANTUM) {
1118             cache_allocator_block *leftover =
1119                 (cache_allocator_block *)(size + (uintptr_t)block);
1120             leftover->size = block->size - size;
1121             leftover->state = (uintptr_t)-1;
1122             leftover->nextFree = block->nextFree;
1123             *blockP = leftover;
1124         } else {
1125             *blockP = block->nextFree;
1126         }
1127
1128         // block is now exactly the right size.
1129
1130         bzero(block, size);
1131         block->size = mask;  // Cache->mask
1132         block->state = 0;    // Cache->occupied
1133
1134         return block;
1135     }
1136
1137     // No room in this region.
1138     return NULL;
1139 }
1140
1141
1142 /***********************************************************************
1143 * cache_allocator_calloc
1144 * Custom allocator for large method caches (128+ slots)
1145 * The returned cache block already has cache->mask set.
1146 * cache->occupied and the cache contents are zero.
1147 * Cache locks: cacheUpdateLock must be held by the caller
1148 **********************************************************************/
1149 static Cache cache_allocator_calloc(size_t size)
1150 {
1151     cache_allocator_region *rgn;
1152
1153     mutex_assert_locked(&cacheUpdateLock);
1154
1155     for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1156         void *p = cache_region_calloc(rgn, size);
1157         if (p) {
1158             return (Cache)p;
1159         }
1160     }
1161
1162     // No regions or all regions full - make a region and try one more time
1163     // In the unlikely case of a cache over 256KB, it will get its own region.
1164     return (Cache)cache_region_calloc(cache_allocator_add_region(size), size);
1165 }
1166
1167
1168 /***********************************************************************
1169 * cache_allocator_region_for_block
1170 * Returns the cache allocator region that ptr points into, or NULL.
1171 **********************************************************************/
1172 static cache_allocator_region *cache_allocator_region_for_block(cache_allocator_block *block)
1173 {
1174     cache_allocator_region *rgn;
1175     for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1176         if (block >= rgn->start  &&  block < rgn->end) return rgn;
1177     }
1178     return NULL;
1179 }
1180
1181
1182 /***********************************************************************
1183 * cache_allocator_is_block
1184 * If ptr is a live block from the cache allocator, return YES
1185 * If ptr is a block from some other allocator, return NO.
1186 * If ptr is a dead block from the cache allocator, result is undefined.
1187 * Cache locks: cacheUpdateLock must be held by the caller
1188 **********************************************************************/
1189 static BOOL cache_allocator_is_block(void *ptr)
1190 {
1191     mutex_assert_locked(&cacheUpdateLock);
1192     return (cache_allocator_region_for_block((cache_allocator_block *)ptr) != NULL);
1193 }
1194
1195 /***********************************************************************
1196 * cache_allocator_free
1197 * Frees a block allocated by the cache allocator.
1198 * Cache locks: cacheUpdateLock must be held by the caller.
1199 **********************************************************************/
1200 static void cache_allocator_free(void *ptr)
1201 {
1202     cache_allocator_block *dead = (cache_allocator_block *)ptr;
1203     cache_allocator_block *cur;
1204     cache_allocator_region *rgn;
1205
1206     mutex_assert_locked(&cacheUpdateLock);
1207
1208     if (! (rgn = cache_allocator_region_for_block(dead))) {
1209         // free of non-pointer
1210         _objc_inform("cache_allocator_free of non-pointer %p", dead);
1211         return;
1212     }
1213
1214     dead->size = cache_allocator_size_for_mask(dead->size);
1215     dead->state = (uintptr_t)-1;
1216
1217     if (!rgn->freeList  ||  rgn->freeList > dead) {
1218         // dead block belongs at front of free list
1219         dead->nextFree = rgn->freeList;
1220         rgn->freeList = dead;
1221         cache_allocator_coalesce(dead);
1222         return;
1223     }
1224
1225     // dead block belongs in the middle or end of free list
1226     for (cur = rgn->freeList; cur != NULL; cur = cur->nextFree) {
1227         cache_allocator_block *ahead = cur->nextFree;
1228
1229         if (!ahead  ||  ahead > dead) {
1230             // cur and ahead straddle dead, OR dead belongs at end of free list
1231             cur->nextFree = dead;
1232             dead->nextFree = ahead;
1233
1234             // coalesce into dead first in case both succeed
1235             cache_allocator_coalesce(dead);
1236             cache_allocator_coalesce(cur);
1237             return;
1238         }
1239     }
1240
1241     // uh-oh
1242     _objc_inform("cache_allocator_free of non-pointer %p", ptr);
1243 }
1244
1245 // defined(CACHE_ALLOCATOR)
1246 #endif
1247
1248 /***********************************************************************
1249 * Cache instrumentation and debugging
1250 **********************************************************************/
1251
1252 #ifdef OBJC_INSTRUMENTED
1253 enum {
1254     CACHE_HISTOGRAM_SIZE        = 512
1255 };
1256
1257 unsigned int    CacheHitHistogram [CACHE_HISTOGRAM_SIZE];
1258 unsigned int    CacheMissHistogram [CACHE_HISTOGRAM_SIZE];
1259 #endif
1260
1261
1262 /***********************************************************************
1263 * _cache_print.
1264 **********************************************************************/
1265 static void _cache_print(Cache cache)
1266 {
1267     uintptr_t index;
1268     uintptr_t count;
1269
1270     count = cache->mask + 1;
1271     for (index = 0; index < count; index += 1) {
1272         cache_entry *entry = (cache_entry *)cache->buckets[index];
1273         if (entry) {
1274             if (entry->imp == _objc_msgForward_impcache)
1275                 printf ("does not recognize: \n");
1276             printf ("%s\n", sel_getName(entry->name));
1277         }
1278     }
1279 }
1280
1281
1282 /***********************************************************************
1283 * _class_printMethodCaches.
1284 **********************************************************************/
1285 void _class_printMethodCaches(Class cls)
1286 {
1287     if (_cache_isEmpty(cls->cache)) {
1288         printf("no instance-method cache for class %s\n", cls->getName());
1289     } else {
1290         printf("instance-method cache for class %s:\n", cls->getName());
1291         _cache_print(cls->cache);
1292     }
1293
1294     if (_cache_isEmpty(cls->ISA()->cache)) {
1295         printf("no class-method cache for class %s\n", cls->getName());
1296     } else {
1297         printf ("class-method cache for class %s:\n", cls->getName());
1298         _cache_print(cls->ISA()->cache);
1299     }
1300 }
1301
1302
1303 #if 0
1304 #warning fixme
1305
1306
1307 /***********************************************************************
1308 * _class_printDuplicateCacheEntries.
1309 **********************************************************************/
1310 void _class_printDuplicateCacheEntries(BOOL detail)
1311 {
1312     NXHashState state;
1313     Class cls;
1314     unsigned int duplicates;
1315     unsigned int index1;
1316     unsigned int index2;
1317     unsigned int mask;
1318     unsigned int count;
1319     unsigned int isMeta;
1320     Cache cache;
1321
1322
1323     printf ("Checking for duplicate cache entries \n");
1324
1325     // Outermost loop - iterate over all classes
1326     state = NXInitHashState (class_hash);
1327     duplicates = 0;
1328     while (NXNextHashState (class_hash, &state, (void **) &cls))
1329     {
1330         // Control loop - do given class' cache, then its isa's cache
1331         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1332         {
1333             // Select cache of interest and make sure it exists
1334             cache = (isMeta ? cls->ISA : cls)->cache;
1335             if (_cache_isEmpty(cache))
1336                 continue;
1337
1338             // Middle loop - check each entry in the given cache
1339             mask  = cache->mask;
1340             count = mask + 1;
1341             for (index1 = 0; index1 < count; index1 += 1)
1342             {
1343                 // Skip invalid entry
1344                 if (!cache->buckets[index1])
1345                     continue;
1346
1347                 // Inner loop - check that given entry matches no later entry
1348                 for (index2 = index1 + 1; index2 < count; index2 += 1)
1349                 {
1350                     // Skip invalid entry
1351                     if (!cache->buckets[index2])
1352                         continue;
1353
1354                     // Check for duplication by method name comparison
1355                     if (strcmp ((char *) cache->buckets[index1]->name),
1356                                 (char *) cache->buckets[index2]->name)) == 0)
1357                     {
1358                         if (detail)
1359                             printf ("%s %s\n", cls->getName(), sel_getName(cache->buckets[index1]->name));
1360                         duplicates += 1;
1361                         break;
1362                     }
1363                 }
1364             }
1365         }
1366     }
1367
1368     // Log the findings
1369     printf ("duplicates = %d\n", duplicates);
1370     printf ("total cache fills = %d\n", totalCacheFills);
1371 }
1372
1373
1374 /***********************************************************************
1375 * PrintCacheHeader.
1376 **********************************************************************/
1377 static void PrintCacheHeader(void)
1378 {
1379 #ifdef OBJC_INSTRUMENTED
1380     printf ("Cache  Cache  Slots  Avg    Max   AvgS  MaxS  AvgS  MaxS  TotalD   AvgD  MaxD  TotalD   AvgD  MaxD  TotD  AvgD  MaxD\n");
1381     printf ("Size   Count  Used   Used   Used  Hit   Hit   Miss  Miss  Hits     Prbs  Prbs  Misses   Prbs  Prbs  Flsh  Flsh  Flsh\n");
1382     printf ("-----  -----  -----  -----  ----  ----  ----  ----  ----  -------  ----  ----  -------  ----  ----  ----  ----  ----\n");
1383 #else
1384     printf ("Cache  Cache  Slots  Avg    Max   AvgS  MaxS  AvgS  MaxS\n");
1385     printf ("Size   Count  Used   Used   Used  Hit   Hit   Miss  Miss\n");
1386     printf ("-----  -----  -----  -----  ----  ----  ----  ----  ----\n");
1387 #endif
1388 }
1389
1390
1391 /***********************************************************************
1392 * PrintCacheInfo.
1393 **********************************************************************/
1394 static void PrintCacheInfo(unsigned int cacheSize,
1395                            unsigned int cacheCount,
1396                            unsigned int slotsUsed,
1397                            float avgUsed, unsigned int maxUsed,
1398                            float avgSHit, unsigned int maxSHit,
1399                            float avgSMiss, unsigned int maxSMiss
1400 #ifdef OBJC_INSTRUMENTED
1401                            , unsigned int totDHits,
1402                            float avgDHit,
1403                            unsigned int maxDHit,
1404                            unsigned int totDMisses,
1405                            float avgDMiss,
1406                            unsigned int maxDMiss,
1407                            unsigned int totDFlsh,
1408                            float avgDFlsh,
1409                            unsigned int maxDFlsh
1410 #endif
1411                            )
1412 {
1413 #ifdef OBJC_INSTRUMENTED
1414     printf ("%5u  %5u  %5u  %5.1f  %4u  %4.1f  %4u  %4.1f  %4u  %7u  %4.1f  %4u  %7u  %4.1f  %4u  %4u  %4.1f  %4u\n",
1415 #else
1416             printf ("%5u  %5u  %5u  %5.1f  %4u  %4.1f  %4u  %4.1f  %4u\n",
1417 #endif
1418                     cacheSize, cacheCount, slotsUsed, avgUsed, maxUsed, avgSHit, maxSHit, avgSMiss, maxSMiss
1419 #ifdef OBJC_INSTRUMENTED
1420                     , totDHits, avgDHit, maxDHit, totDMisses, avgDMiss, maxDMiss, totDFlsh, avgDFlsh, maxDFlsh
1421 #endif
1422                     );
1423
1424 }
1425
1426
1427 #ifdef OBJC_INSTRUMENTED
1428 /***********************************************************************
1429 * PrintCacheHistogram.  Show the non-zero entries from the specified
1430 * cache histogram.
1431 **********************************************************************/
1432 static void PrintCacheHistogram(char *title,
1433                                 unsigned int *firstEntry,
1434                                 unsigned int entryCount)
1435 {
1436     unsigned int index;
1437     unsigned int *thisEntry;
1438
1439     printf ("%s\n", title);
1440     printf ("    Probes    Tally\n");
1441     printf ("    ------    -----\n");
1442     for (index = 0, thisEntry = firstEntry;
1443          index < entryCount;
1444          index += 1, thisEntry += 1)
1445     {
1446         if (*thisEntry == 0)
1447             continue;
1448
1449         printf ("    %6d    %5d\n", index, *thisEntry);
1450     }
1451 }
1452 #endif
1453
1454
1455 /***********************************************************************
1456 * _class_printMethodCacheStatistics.
1457 **********************************************************************/
1458
1459 #define MAX_LOG2_SIZE   32
1460 #define MAX_CHAIN_SIZE  100
1461
1462 void _class_printMethodCacheStatistics(void)
1463 {
1464     unsigned int isMeta;
1465     unsigned int index;
1466     NXHashState state;
1467     Class cls;
1468     unsigned int totalChain;
1469     unsigned int totalMissChain;
1470     unsigned int maxChain;
1471     unsigned int maxMissChain;
1472     unsigned int classCount;
1473     unsigned int negativeEntryCount;
1474     unsigned int cacheExpandCount;
1475     unsigned int cacheCountBySize[2][MAX_LOG2_SIZE]        = {{0}};
1476     unsigned int totalEntriesBySize[2][MAX_LOG2_SIZE]      = {{0}};
1477     unsigned int maxEntriesBySize[2][MAX_LOG2_SIZE]        = {{0}};
1478     unsigned int totalChainBySize[2][MAX_LOG2_SIZE]        = {{0}};
1479     unsigned int totalMissChainBySize[2][MAX_LOG2_SIZE]    = {{0}};
1480     unsigned int totalMaxChainBySize[2][MAX_LOG2_SIZE]     = {{0}};
1481     unsigned int totalMaxMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1482     unsigned int maxChainBySize[2][MAX_LOG2_SIZE]          = {{0}};
1483     unsigned int maxMissChainBySize[2][MAX_LOG2_SIZE]      = {{0}};
1484     unsigned int chainCount[MAX_CHAIN_SIZE]                = {0};
1485     unsigned int missChainCount[MAX_CHAIN_SIZE]            = {0};
1486 #ifdef OBJC_INSTRUMENTED
1487     unsigned int hitCountBySize[2][MAX_LOG2_SIZE]          = {{0}};
1488     unsigned int hitProbesBySize[2][MAX_LOG2_SIZE]         = {{0}};
1489     unsigned int maxHitProbesBySize[2][MAX_LOG2_SIZE]      = {{0}};
1490     unsigned int missCountBySize[2][MAX_LOG2_SIZE]         = {{0}};
1491     unsigned int missProbesBySize[2][MAX_LOG2_SIZE]        = {{0}};
1492     unsigned int maxMissProbesBySize[2][MAX_LOG2_SIZE]     = {{0}};
1493     unsigned int flushCountBySize[2][MAX_LOG2_SIZE]        = {{0}};
1494     unsigned int flushedEntriesBySize[2][MAX_LOG2_SIZE]    = {{0}};
1495     unsigned int maxFlushedEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1496 #endif
1497
1498     printf ("Printing cache statistics\n");
1499
1500     // Outermost loop - iterate over all classes
1501     state = NXInitHashState (class_hash);
1502     classCount = 0;
1503     negativeEntryCount = 0;
1504     cacheExpandCount = 0;
1505     while (NXNextHashState (class_hash, &state, (void **) &cls))
1506     {
1507         // Tally classes
1508         classCount += 1;
1509
1510         // Control loop - do given class' cache, then its isa's cache
1511         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1512         {
1513             Cache cache;
1514             unsigned int mask;
1515             unsigned int log2Size;
1516             unsigned int entryCount;
1517
1518             // Select cache of interest
1519             cache = (isMeta ? cls->ISA : cls)->cache;
1520
1521             // Ignore empty cache... should we?
1522             if (_cache_isEmpty(cache))
1523                 continue;
1524
1525             // Middle loop - do each entry in the given cache
1526             mask = cache->mask;
1527             entryCount = 0;
1528             totalChain = 0;
1529             totalMissChain = 0;
1530             maxChain = 0;
1531             maxMissChain = 0;
1532             for (index = 0; index < mask + 1; index += 1)
1533             {
1534                 cache_entry **buckets;
1535                 cache_entry *entry;
1536                 unsigned int hash;
1537                 unsigned int methodChain;
1538                 unsigned int methodMissChain;
1539                 unsigned int index2;
1540
1541                 // If entry is invalid, the only item of
1542                 // interest is that future insert hashes
1543                 // to this entry can use it directly.
1544                 buckets = (cache_entry **)cache->buckets;
1545                 if (!buckets[index])
1546                 {
1547                     missChainCount[0] += 1;
1548                     continue;
1549                 }
1550
1551                 entry = buckets[index];
1552
1553                 // Tally valid entries
1554                 entryCount += 1;
1555
1556                 // Tally "forward::" entries
1557                 if (entry->imp == _objc_msgForward_impcache)
1558                     negativeEntryCount += 1;
1559
1560                 // Calculate search distance (chain length) for this method
1561                 // The chain may wrap around to the beginning of the table.
1562                 hash = CACHE_HASH(entry->name, mask);
1563                 if (index >= hash) methodChain = index - hash;
1564                 else methodChain = (mask+1) + index - hash;
1565
1566                 // Tally chains of this length
1567                 if (methodChain < MAX_CHAIN_SIZE)
1568                     chainCount[methodChain] += 1;
1569
1570                 // Keep sum of all chain lengths
1571                 totalChain += methodChain;
1572
1573                 // Record greatest chain length
1574                 if (methodChain > maxChain)
1575                     maxChain = methodChain;
1576
1577                 // Calculate search distance for miss that hashes here
1578                 index2 = index;
1579                 while (buckets[index2])
1580                 {
1581                     index2 += 1;
1582                     index2 &= mask;
1583                 }
1584                 methodMissChain = ((index2 - index) & mask);
1585
1586                 // Tally miss chains of this length
1587                 if (methodMissChain < MAX_CHAIN_SIZE)
1588                     missChainCount[methodMissChain] += 1;
1589
1590                 // Keep sum of all miss chain lengths in this class
1591                 totalMissChain += methodMissChain;
1592
1593                 // Record greatest miss chain length
1594                 if (methodMissChain > maxMissChain)
1595                     maxMissChain = methodMissChain;
1596             }
1597
1598             // Factor this cache into statistics about caches of the same
1599             // type and size (all caches are a power of two in size)
1600             log2Size = log2u (mask + 1);
1601             cacheCountBySize[isMeta][log2Size] += 1;
1602             totalEntriesBySize[isMeta][log2Size] += entryCount;
1603             if (entryCount > maxEntriesBySize[isMeta][log2Size])
1604                 maxEntriesBySize[isMeta][log2Size] = entryCount;
1605             totalChainBySize[isMeta][log2Size] += totalChain;
1606             totalMissChainBySize[isMeta][log2Size] += totalMissChain;
1607             totalMaxChainBySize[isMeta][log2Size] += maxChain;
1608             totalMaxMissChainBySize[isMeta][log2Size] += maxMissChain;
1609             if (maxChain > maxChainBySize[isMeta][log2Size])
1610                 maxChainBySize[isMeta][log2Size] = maxChain;
1611             if (maxMissChain > maxMissChainBySize[isMeta][log2Size])
1612                 maxMissChainBySize[isMeta][log2Size] = maxMissChain;
1613 #ifdef OBJC_INSTRUMENTED
1614             {
1615                 CacheInstrumentation *cacheData;
1616
1617                 cacheData = CACHE_INSTRUMENTATION(cache);
1618                 hitCountBySize[isMeta][log2Size] += cacheData->hitCount;
1619                 hitProbesBySize[isMeta][log2Size] += cacheData->hitProbes;
1620                 if (cacheData->maxHitProbes > maxHitProbesBySize[isMeta][log2Size])
1621                     maxHitProbesBySize[isMeta][log2Size] = cacheData->maxHitProbes;
1622                 missCountBySize[isMeta][log2Size] += cacheData->missCount;
1623                 missProbesBySize[isMeta][log2Size] += cacheData->missProbes;
1624                 if (cacheData->maxMissProbes > maxMissProbesBySize[isMeta][log2Size])
1625                     maxMissProbesBySize[isMeta][log2Size] = cacheData->maxMissProbes;
1626                 flushCountBySize[isMeta][log2Size] += cacheData->flushCount;
1627                 flushedEntriesBySize[isMeta][log2Size] += cacheData->flushedEntries;
1628                 if (cacheData->maxFlushedEntries > maxFlushedEntriesBySize[isMeta][log2Size])
1629                     maxFlushedEntriesBySize[isMeta][log2Size] = cacheData->maxFlushedEntries;
1630             }
1631 #endif
1632             // Caches start with a power of two number of entries, and grow by doubling, so
1633             // we can calculate the number of times this cache has expanded
1634             cacheExpandCount += log2Size - INIT_CACHE_SIZE_LOG2;
1635         }
1636     }
1637
1638     {
1639         unsigned int cacheCountByType[2] = {0};
1640         unsigned int totalCacheCount     = 0;
1641         unsigned int totalEntries        = 0;
1642         unsigned int maxEntries          = 0;
1643         unsigned int totalSlots          = 0;
1644 #ifdef OBJC_INSTRUMENTED
1645         unsigned int totalHitCount       = 0;
1646         unsigned int totalHitProbes      = 0;
1647         unsigned int maxHitProbes        = 0;
1648         unsigned int totalMissCount      = 0;
1649         unsigned int totalMissProbes     = 0;
1650         unsigned int maxMissProbes       = 0;
1651         unsigned int totalFlushCount     = 0;
1652         unsigned int totalFlushedEntries = 0;
1653         unsigned int maxFlushedEntries   = 0;
1654 #endif
1655
1656         totalChain = 0;
1657         maxChain = 0;
1658         totalMissChain = 0;
1659         maxMissChain = 0;
1660
1661         // Sum information over all caches
1662         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1663         {
1664             for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1665             {
1666                 cacheCountByType[isMeta] += cacheCountBySize[isMeta][index];
1667                 totalEntries += totalEntriesBySize[isMeta][index];
1668                 totalSlots += cacheCountBySize[isMeta][index] * (1 << index);
1669                 totalChain += totalChainBySize[isMeta][index];
1670                 if (maxEntriesBySize[isMeta][index] > maxEntries)
1671                     maxEntries = maxEntriesBySize[isMeta][index];
1672                 if (maxChainBySize[isMeta][index] > maxChain)
1673                     maxChain   = maxChainBySize[isMeta][index];
1674                 totalMissChain += totalMissChainBySize[isMeta][index];
1675                 if (maxMissChainBySize[isMeta][index] > maxMissChain)
1676                     maxMissChain = maxMissChainBySize[isMeta][index];
1677 #ifdef OBJC_INSTRUMENTED
1678                 totalHitCount += hitCountBySize[isMeta][index];
1679                 totalHitProbes += hitProbesBySize[isMeta][index];
1680                 if (maxHitProbesBySize[isMeta][index] > maxHitProbes)
1681                     maxHitProbes = maxHitProbesBySize[isMeta][index];
1682                 totalMissCount += missCountBySize[isMeta][index];
1683                 totalMissProbes += missProbesBySize[isMeta][index];
1684                 if (maxMissProbesBySize[isMeta][index] > maxMissProbes)
1685                     maxMissProbes = maxMissProbesBySize[isMeta][index];
1686                 totalFlushCount += flushCountBySize[isMeta][index];
1687                 totalFlushedEntries += flushedEntriesBySize[isMeta][index];
1688                 if (maxFlushedEntriesBySize[isMeta][index] > maxFlushedEntries)
1689                     maxFlushedEntries = maxFlushedEntriesBySize[isMeta][index];
1690 #endif
1691             }
1692
1693             totalCacheCount += cacheCountByType[isMeta];
1694         }
1695
1696         // Log our findings
1697         printf ("There are %u classes\n", classCount);
1698
1699         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1700         {
1701             // Number of this type of class
1702             printf    ("\nThere are %u %s-method caches, broken down by size (slot count):\n",
1703                        cacheCountByType[isMeta],
1704                        isMeta ? "class" : "instance");
1705
1706             // Print header
1707             PrintCacheHeader ();
1708
1709             // Keep format consistent even if there are caches of this kind
1710             if (cacheCountByType[isMeta] == 0)
1711             {
1712                 printf ("(none)\n");
1713                 continue;
1714             }
1715
1716             // Usage information by cache size
1717             for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1718             {
1719                 unsigned int cacheCount;
1720                 unsigned int cacheSlotCount;
1721                 unsigned int cacheEntryCount;
1722
1723                 // Get number of caches of this type and size
1724                 cacheCount = cacheCountBySize[isMeta][index];
1725                 if (cacheCount == 0)
1726                     continue;
1727
1728                 // Get the cache slot count and the total number of valid entries
1729                 cacheSlotCount  = (1 << index);
1730                 cacheEntryCount = totalEntriesBySize[isMeta][index];
1731
1732                 // Give the analysis
1733                 PrintCacheInfo (cacheSlotCount,
1734                                 cacheCount,
1735                                 cacheEntryCount,
1736                                 (float) cacheEntryCount / (float) cacheCount,
1737                                 maxEntriesBySize[isMeta][index],
1738                                 (float) totalChainBySize[isMeta][index] / (float) cacheEntryCount,
1739                                 maxChainBySize[isMeta][index],
1740                                 (float) totalMissChainBySize[isMeta][index] / (float) (cacheCount * cacheSlotCount),
1741                                 maxMissChainBySize[isMeta][index]
1742 #ifdef OBJC_INSTRUMENTED
1743                                 , hitCountBySize[isMeta][index],
1744                                 hitCountBySize[isMeta][index] ?
1745                                 (float) hitProbesBySize[isMeta][index] / (float) hitCountBySize[isMeta][index] : 0.0,
1746                                 maxHitProbesBySize[isMeta][index],
1747                                 missCountBySize[isMeta][index],
1748                                 missCountBySize[isMeta][index] ?
1749                                 (float) missProbesBySize[isMeta][index] / (float) missCountBySize[isMeta][index] : 0.0,
1750                                 maxMissProbesBySize[isMeta][index],
1751                                 flushCountBySize[isMeta][index],
1752                                 flushCountBySize[isMeta][index] ?
1753                                 (float) flushedEntriesBySize[isMeta][index] / (float) flushCountBySize[isMeta][index] : 0.0,
1754                                 maxFlushedEntriesBySize[isMeta][index]
1755 #endif
1756                                 );
1757             }
1758         }
1759
1760         // Give overall numbers
1761         printf ("\nCumulative:\n");
1762         PrintCacheHeader ();
1763         PrintCacheInfo (totalSlots,
1764                         totalCacheCount,
1765                         totalEntries,
1766                         (float) totalEntries / (float) totalCacheCount,
1767                         maxEntries,
1768                         (float) totalChain / (float) totalEntries,
1769                         maxChain,
1770                         (float) totalMissChain / (float) totalSlots,
1771                         maxMissChain
1772 #ifdef OBJC_INSTRUMENTED
1773                         , totalHitCount,
1774                         totalHitCount ?
1775                         (float) totalHitProbes / (float) totalHitCount : 0.0,
1776                         maxHitProbes,
1777                         totalMissCount,
1778                         totalMissCount ?
1779                         (float) totalMissProbes / (float) totalMissCount : 0.0,
1780                         maxMissProbes,
1781                         totalFlushCount,
1782                         totalFlushCount ?
1783                         (float) totalFlushedEntries / (float) totalFlushCount : 0.0,
1784                         maxFlushedEntries
1785 #endif
1786                         );
1787
1788         printf ("\nNumber of \"forward::\" entries: %d\n", negativeEntryCount);
1789         printf ("Number of cache expansions: %d\n", cacheExpandCount);
1790 #ifdef OBJC_INSTRUMENTED
1791         printf ("flush_caches:   total calls  total visits  average visits  max visits  total classes  visits/class\n");
1792         printf ("                -----------  ------------  --------------  ----------  -------------  -------------\n");
1793         printf ("  linear        %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1794                 LinearFlushCachesCount,
1795                 LinearFlushCachesVisitedCount,
1796                 LinearFlushCachesCount ?
1797                 (float) LinearFlushCachesVisitedCount / (float) LinearFlushCachesCount : 0.0,
1798                 MaxLinearFlushCachesVisitedCount,
1799                 LinearFlushCachesVisitedCount,
1800                 1.0);
1801         printf ("  nonlinear     %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1802                 NonlinearFlushCachesCount,
1803                 NonlinearFlushCachesVisitedCount,
1804                 NonlinearFlushCachesCount ?
1805                 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesCount : 0.0,
1806                 MaxNonlinearFlushCachesVisitedCount,
1807                 NonlinearFlushCachesClassCount,
1808                 NonlinearFlushCachesClassCount ?
1809                 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesClassCount : 0.0);
1810         printf ("  ideal         %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1811                 LinearFlushCachesCount + NonlinearFlushCachesCount,
1812                 IdealFlushCachesCount,
1813                 LinearFlushCachesCount + NonlinearFlushCachesCount ?
1814                 (float) IdealFlushCachesCount / (float) (LinearFlushCachesCount + NonlinearFlushCachesCount) : 0.0,
1815                 MaxIdealFlushCachesCount,
1816                 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount,
1817                 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount ?
1818                 (float) IdealFlushCachesCount / (float) (LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount) : 0.0);
1819
1820         PrintCacheHistogram ("\nCache hit histogram:",  &CacheHitHistogram[0],  CACHE_HISTOGRAM_SIZE);
1821         PrintCacheHistogram ("\nCache miss histogram:", &CacheMissHistogram[0], CACHE_HISTOGRAM_SIZE);
1822 #endif
1823
1824 #if 0
1825         printf ("\nLookup chains:");
1826         for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1827         {
1828             if (chainCount[index] != 0)
1829                 printf ("  %u:%u", index, chainCount[index]);
1830         }
1831
1832         printf ("\nMiss chains:");
1833         for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1834         {
1835             if (missChainCount[index] != 0)
1836                 printf ("  %u:%u", index, missChainCount[index]);
1837         }
1838
1839         printf ("\nTotal memory usage for cache data structures: %lu bytes\n",
1840                 totalCacheCount * (sizeof(struct objc_cache) - sizeof(cache_entry *)) +
1841                 totalSlots * sizeof(cache_entry *) +
1842                 negativeEntryCount * sizeof(cache_entry));
1843 #endif
1844     }
1845 }
1846
1847 #endif
1848
1849
1850 // !__OBJC2__
1851 #endif