runtime/objc-cache-old.mm

   1 /*
   2  * Copyright (c) 1999-2007 Apple Inc.  All Rights Reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /***********************************************************************
  25 * objc-cache.m
  26 * Method cache management
  27 * Cache flushing
  28 * Cache garbage collection
  29 * Cache instrumentation
  30 * Dedicated allocator for large caches
  31 **********************************************************************/
  32
  33
  34 /***********************************************************************
  35  * Method cache locking (GrP 2001-1-14)
  36  *
  37  * For speed, objc_msgSend does not acquire any locks when it reads
  38  * method caches. Instead, all cache changes are performed so that any
  39  * objc_msgSend running concurrently with the cache mutator will not
  40  * crash or hang or get an incorrect result from the cache.
  41  *
  42  * When cache memory becomes unused (e.g. the old cache after cache
  43  * expansion), it is not immediately freed, because a concurrent
  44  * objc_msgSend could still be using it. Instead, the memory is
  45  * disconnected from the data structures and placed on a garbage list.
  46  * The memory is now only accessible to instances of objc_msgSend that
  47  * were running when the memory was disconnected; any further calls to
  48  * objc_msgSend will not see the garbage memory because the other data
  49  * structures don't point to it anymore. The collecting_in_critical
  50  * function checks the PC of all threads and returns FALSE when all threads
  51  * are found to be outside objc_msgSend. This means any call to objc_msgSend
  52  * that could have had access to the garbage has finished or moved past the
  53  * cache lookup stage, so it is safe to free the memory.
  54  *
  55  * All functions that modify cache data or structures must acquire the
  56  * cacheUpdateLock to prevent interference from concurrent modifications.
  57  * The function that frees cache garbage must acquire the cacheUpdateLock
  58  * and use collecting_in_critical() to flush out cache readers.
  59  * The cacheUpdateLock is also used to protect the custom allocator used
  60  * for large method cache blocks.
  61  *
  62  * Cache readers (PC-checked by collecting_in_critical())
  63  * objc_msgSend*
  64  * _cache_getImp
  65  * _cache_getMethod
  66  *
  67  * Cache writers (hold cacheUpdateLock while reading or writing; not PC-checked)
  68  * _cache_fill         (acquires lock)
  69  * _cache_expand       (only called from cache_fill)
  70  * _cache_create       (only called from cache_expand)
  71  * bcopy               (only called from instrumented cache_expand)
  72  * flush_caches        (acquires lock)
  73  * _cache_flush        (only called from cache_fill and flush_caches)
  74  * _cache_collect_free (only called from cache_expand and cache_flush)
  75  *
  76  * UNPROTECTED cache readers (NOT thread-safe; used for debug info only)
  77  * _cache_print
  78  * _class_printMethodCaches
  79  * _class_printDuplicateCacheEntries
  80  * _class_printMethodCacheStatistics
  81  *
  82  * _class_lookupMethodAndLoadCache is a special case. It may read a
  83  * method triplet out of one cache and store it in another cache. This
  84  * is unsafe if the method triplet is a forward:: entry, because the
  85  * triplet itself could be freed unless _class_lookupMethodAndLoadCache
  86  * were PC-checked or used a lock. Additionally, storing the method
  87  * triplet in both caches would result in double-freeing if both caches
  88  * were flushed or expanded. The solution is for _cache_getMethod to
  89  * ignore all entries whose implementation is _objc_msgForward_impcache,
  90  * so _class_lookupMethodAndLoadCache cannot look at a forward:: entry
  91  * unsafely or place it in multiple caches.
  92  ***********************************************************************/
  93
  94 #if !__OBJC2__
  95
  96 #include "objc-private.h"
  97 #include "objc-cache-old.h"
  98 #include "hashtable2.h"
  99
 100 typedef struct {
 101     SEL name;     // same layout as struct old_method
 102     void *unused;
 103     IMP imp;  // same layout as struct old_method
 104 } cache_entry;
 105
 106
 107 /* When _class_slow_grow is non-zero, any given cache is actually grown
 108  * only on the odd-numbered times it becomes full; on the even-numbered
 109  * times, it is simply emptied and re-used.  When this flag is zero,
 110  * caches are grown every time. */
 111 static const int _class_slow_grow = 1;
 112
 113 /* For min cache size: clear_cache=1, slow_grow=1
 114    For max cache size: clear_cache=0, slow_grow=0 */
 115
 116 /* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
 117 enum {
 118     INIT_CACHE_SIZE_LOG2 = 2,
 119     INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2)
 120 };
 121
 122
 123 /* Amount of space required for `count` hash table buckets, knowing that
 124  * one entry is embedded in the cache structure itself. */
 125 #define TABLE_SIZE(count)  ((count - 1) * sizeof(cache_entry *))
 126
 127
 128 #if !TARGET_OS_WIN32
 129 #   define CACHE_ALLOCATOR
 130 #endif
 131
 132 /* Custom cache allocator parameters.
 133  * CACHE_REGION_SIZE must be a multiple of CACHE_QUANTUM. */
 134 #define CACHE_ALLOCATOR_MIN 512
 135 #define CACHE_QUANTUM (CACHE_ALLOCATOR_MIN+sizeof(struct objc_cache)-sizeof(cache_entry*))
 136 #define CACHE_REGION_SIZE ((128*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
 137 // #define CACHE_REGION_SIZE ((256*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
 138
 139 static uintptr_t cache_allocator_mask_for_size(size_t size)
 140 {
 141     return (size - sizeof(struct objc_cache)) / sizeof(cache_entry *);
 142 }
 143
 144 static size_t cache_allocator_size_for_mask(uintptr_t mask)
 145 {
 146     size_t requested = sizeof(struct objc_cache) + TABLE_SIZE(mask+1);
 147     size_t actual = CACHE_QUANTUM;
 148     while (actual < requested) actual += CACHE_QUANTUM;
 149     return actual;
 150 }
 151
 152
 153 /* Cache instrumentation data. Immediately follows the cache block itself. */
 154 #ifdef OBJC_INSTRUMENTED
 155 typedef struct
 156 {
 157     unsigned int hitCount;           // cache lookup success tally
 158     unsigned int hitProbes;          // sum entries checked to hit
 159     unsigned int maxHitProbes;       // max entries checked to hit
 160     unsigned int missCount;          // cache lookup no-find tally
 161     unsigned int missProbes;         // sum entries checked to miss
 162     unsigned int maxMissProbes;      // max entries checked to miss
 163     unsigned int flushCount;         // cache flush tally
 164     unsigned int flushedEntries;     // sum cache entries flushed
 165     unsigned int maxFlushedEntries;  // max cache entries flushed
 166 } CacheInstrumentation;
 167
 168 #define CACHE_INSTRUMENTATION(cache)  (CacheInstrumentation *) &cache->buckets[cache->mask + 1];
 169 #endif
 170
 171 /* Cache filling and flushing instrumentation */
 172
 173 static int totalCacheFills = 0;
 174
 175 #ifdef OBJC_INSTRUMENTED
 176 unsigned int LinearFlushCachesCount              = 0;
 177 unsigned int LinearFlushCachesVisitedCount       = 0;
 178 unsigned int MaxLinearFlushCachesVisitedCount    = 0;
 179 unsigned int NonlinearFlushCachesCount           = 0;
 180 unsigned int NonlinearFlushCachesClassCount      = 0;
 181 unsigned int NonlinearFlushCachesVisitedCount    = 0;
 182 unsigned int MaxNonlinearFlushCachesVisitedCount = 0;
 183 unsigned int IdealFlushCachesCount               = 0;
 184 unsigned int MaxIdealFlushCachesCount            = 0;
 185 #endif
 186
 187
 188 /***********************************************************************
 189 * A static empty cache.  All classes initially point at this cache.
 190 * When the first message is sent it misses in the cache, and when
 191 * the cache is grown it checks for this case and uses malloc rather
 192 * than realloc.  This avoids the need to check for NULL caches in the
 193 * messenger.
 194 ***********************************************************************/
 195
 196 struct objc_cache _objc_empty_cache =
 197 {
 198     0,        // mask
 199     0,        // occupied
 200     { NULL }  // buckets
 201 };
 202 #ifdef OBJC_INSTRUMENTED
 203 CacheInstrumentation emptyCacheInstrumentation = {0};
 204 #endif
 205
 206
 207 /* Local prototypes */
 208
 209 static bool _cache_isEmpty(Cache cache);
 210 static Cache _cache_malloc(uintptr_t slotCount);
 211 static Cache _cache_create(Class cls);
 212 static Cache _cache_expand(Class cls);
 213
 214 static int _collecting_in_critical(void);
 215 static void _garbage_make_room(void);
 216 static void _cache_collect_free(void *data, size_t size);
 217
 218 #if defined(CACHE_ALLOCATOR)
 219 static bool cache_allocator_is_block(void *block);
 220 static Cache cache_allocator_calloc(size_t size);
 221 static void cache_allocator_free(void *block);
 222 #endif
 223
 224 /***********************************************************************
 225 * Cache statistics for OBJC_PRINT_CACHE_SETUP
 226 **********************************************************************/
 227 static unsigned int cache_counts[16];
 228 static size_t cache_allocations;
 229 static size_t cache_collections;
 230 static size_t cache_allocator_regions;
 231
 232 static size_t log2u(size_t x)
 233 {
 234     unsigned int log;
 235
 236     log = 0;
 237     while (x >>= 1)
 238         log += 1;
 239
 240     return log;
 241 }
 242
 243
 244 /***********************************************************************
 245 * _cache_isEmpty.
 246 * Returns YES if the given cache is some empty cache.
 247 * Empty caches should never be allocated on the heap.
 248 **********************************************************************/
 249 static bool _cache_isEmpty(Cache cache)
 250 {
 251     return (cache == NULL  ||  cache == (Cache)&_objc_empty_cache  ||  cache->mask == 0);
 252 }
 253
 254
 255 /***********************************************************************
 256 * _cache_malloc.
 257 *
 258 * Called from _cache_create() and cache_expand()
 259 * Cache locks: cacheUpdateLock must be held by the caller.
 260 **********************************************************************/
 261 static Cache _cache_malloc(uintptr_t slotCount)
 262 {
 263     Cache new_cache;
 264     size_t size;
 265
 266     cacheUpdateLock.assertLocked();
 267
 268     // Allocate table (why not check for failure?)
 269     size = sizeof(struct objc_cache) + TABLE_SIZE(slotCount);
 270 #if defined(OBJC_INSTRUMENTED)
 271     // Custom cache allocator can't handle instrumentation.
 272     size += sizeof(CacheInstrumentation);
 273     new_cache = calloc(size, 1);
 274     new_cache->mask = slotCount - 1;
 275 #elif !defined(CACHE_ALLOCATOR)
 276     // fixme cache allocator implementation isn't 64-bit clean
 277     new_cache = calloc(size, 1);
 278     new_cache->mask = (unsigned int)(slotCount - 1);
 279 #else
 280     if (size < CACHE_ALLOCATOR_MIN) {
 281         new_cache = (Cache)calloc(size, 1);
 282         new_cache->mask = slotCount - 1;
 283         // occupied and buckets and instrumentation are all zero
 284     } else {
 285         new_cache = cache_allocator_calloc(size);
 286         // mask is already set
 287         // occupied and buckets and instrumentation are all zero
 288     }
 289 #endif
 290
 291     if (PrintCaches) {
 292         size_t bucket = log2u(slotCount);
 293         if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
 294             cache_counts[bucket]++;
 295         }
 296         cache_allocations++;
 297     }
 298
 299     return new_cache;
 300 }
 301
 302 /***********************************************************************
 303 * _cache_free_block.
 304 *
 305 * Called from _cache_free() and _cache_collect_free().
 306 * block may be a cache or a forward:: entry.
 307 * If block is a cache, forward:: entries it points to will NOT be freed.
 308 * Cache locks: cacheUpdateLock must be held by the caller.
 309 **********************************************************************/
 310 static inline int isPowerOf2(unsigned long l) { return 1 == __builtin_popcountl(l); }
 311 static void _cache_free_block(void *block)
 312 {
 313     cacheUpdateLock.assertLocked();
 314
 315 #if !TARGET_OS_WIN32
 316     if (PrintCaches) {
 317         Cache cache = (Cache)block;
 318         size_t slotCount = cache->mask + 1;
 319         if (isPowerOf2(slotCount)) {
 320             size_t bucket = log2u(slotCount);
 321             if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
 322                 cache_counts[bucket]--;
 323             }
 324         }
 325     }
 326 #endif
 327
 328 #if defined(CACHE_ALLOCATOR)
 329     if (cache_allocator_is_block(block)) {
 330         cache_allocator_free(block);
 331     } else
 332 #endif
 333     {
 334         free(block);
 335     }
 336 }
 337
 338
 339 /***********************************************************************
 340 * _cache_free.
 341 *
 342 * Called from _objc_remove_classes_in_image().
 343 * forward:: entries in the cache ARE freed.
 344 * Cache locks: cacheUpdateLock must NOT be held by the caller.
 345 **********************************************************************/
 346 void _cache_free(Cache cache)
 347 {
 348     unsigned int i;
 349
 350     mutex_locker_t lock(cacheUpdateLock);
 351
 352     for (i = 0; i < cache->mask + 1; i++) {
 353         cache_entry *entry = (cache_entry *)cache->buckets[i];
 354         if (entry  &&  entry->imp == _objc_msgForward_impcache) {
 355             _cache_free_block(entry);
 356         }
 357     }
 358
 359     _cache_free_block(cache);
 360 }
 361
 362
 363 /***********************************************************************
 364 * _cache_create.
 365 *
 366 * Called from _cache_expand().
 367 * Cache locks: cacheUpdateLock must be held by the caller.
 368 **********************************************************************/
 369 static Cache _cache_create(Class cls)
 370 {
 371     Cache new_cache;
 372
 373     cacheUpdateLock.assertLocked();
 374
 375     // Allocate new cache block
 376     new_cache = _cache_malloc(INIT_CACHE_SIZE);
 377
 378     // Install the cache
 379     cls->cache = new_cache;
 380
 381     // Clear the grow flag so that we will re-use the current storage,
 382     // rather than actually grow the cache, when expanding the cache
 383     // for the first time
 384     if (_class_slow_grow) {
 385         cls->setShouldGrowCache(false);
 386     }
 387
 388     // Return our creation
 389     return new_cache;
 390 }
 391
 392
 393 /***********************************************************************
 394 * _cache_expand.
 395 *
 396 * Called from _cache_fill ()
 397 * Cache locks: cacheUpdateLock must be held by the caller.
 398 **********************************************************************/
 399 static Cache _cache_expand(Class cls)
 400 {
 401     Cache old_cache;
 402     Cache new_cache;
 403     uintptr_t slotCount;
 404     uintptr_t index;
 405
 406     cacheUpdateLock.assertLocked();
 407
 408     // First growth goes from empty cache to a real one
 409     old_cache = cls->cache;
 410     if (_cache_isEmpty(old_cache))
 411         return _cache_create (cls);
 412
 413     if (_class_slow_grow) {
 414         // Cache grows every other time only.
 415         if (cls->shouldGrowCache()) {
 416             // Grow the cache this time. Don't grow next time.
 417             cls->setShouldGrowCache(false);
 418         }
 419         else {
 420             // Reuse the current cache storage this time. Do grow next time.
 421             cls->setShouldGrowCache(true);
 422
 423             // Clear the valid-entry counter
 424             old_cache->occupied = 0;
 425
 426             // Invalidate all the cache entries
 427             for (index = 0; index < old_cache->mask + 1; index += 1)
 428             {
 429                 // Remember what this entry was, so we can possibly
 430                 // deallocate it after the bucket has been invalidated
 431                 cache_entry *oldEntry = (cache_entry *)old_cache->buckets[index];
 432
 433                 // Skip invalid entry
 434                 if (!oldEntry)
 435                     continue;
 436
 437                 // Invalidate this entry
 438                 old_cache->buckets[index] = NULL;
 439
 440                 // Deallocate "forward::" entry
 441                 if (oldEntry->imp == _objc_msgForward_impcache) {
 442                     _cache_collect_free (oldEntry, sizeof(cache_entry));
 443                 }
 444             }
 445
 446             // Return the same old cache, freshly emptied
 447             return old_cache;
 448         }
 449     }
 450
 451     // Double the cache size
 452     slotCount = (old_cache->mask + 1) << 1;
 453
 454     new_cache = _cache_malloc(slotCount);
 455
 456 #ifdef OBJC_INSTRUMENTED
 457     // Propagate the instrumentation data
 458     {
 459         CacheInstrumentation *oldCacheData;
 460         CacheInstrumentation *newCacheData;
 461
 462         oldCacheData = CACHE_INSTRUMENTATION(old_cache);
 463         newCacheData = CACHE_INSTRUMENTATION(new_cache);
 464         bcopy ((const char *)oldCacheData, (char *)newCacheData, sizeof(CacheInstrumentation));
 465     }
 466 #endif
 467
 468     // Deallocate "forward::" entries from the old cache
 469     for (index = 0; index < old_cache->mask + 1; index++) {
 470         cache_entry *entry = (cache_entry *)old_cache->buckets[index];
 471         if (entry && entry->imp == _objc_msgForward_impcache) {
 472             _cache_collect_free (entry, sizeof(cache_entry));
 473         }
 474     }
 475
 476     // Install new cache
 477     cls->cache = new_cache;
 478
 479     // Deallocate old cache, try freeing all the garbage
 480     _cache_collect_free (old_cache, old_cache->mask * sizeof(cache_entry *));
 481     _cache_collect(false);
 482
 483     return new_cache;
 484 }
 485
 486
 487 /***********************************************************************
 488 * _cache_fill.  Add the specified method to the specified class' cache.
 489 * Returns NO if the cache entry wasn't added: cache was busy,
 490 *  class is still being initialized, new entry is a duplicate.
 491 *
 492 * Called only from _class_lookupMethodAndLoadCache and
 493 * class_respondsToMethod and _cache_addForwardEntry.
 494 *
 495 * Cache locks: cacheUpdateLock must not be held.
 496 **********************************************************************/
 497 bool _cache_fill(Class cls, Method smt, SEL sel)
 498 {
 499     uintptr_t newOccupied;
 500     uintptr_t index;
 501     cache_entry **buckets;
 502     cache_entry *entry;
 503     Cache cache;
 504
 505     cacheUpdateLock.assertUnlocked();
 506
 507     // Never cache before +initialize is done
 508     if (!cls->isInitialized()) {
 509         return NO;
 510     }
 511
 512     // Keep tally of cache additions
 513     totalCacheFills += 1;
 514
 515     mutex_locker_t lock(cacheUpdateLock);
 516
 517     entry = (cache_entry *)smt;
 518
 519     cache = cls->cache;
 520
 521     // Make sure the entry wasn't added to the cache by some other thread
 522     // before we grabbed the cacheUpdateLock.
 523     // Don't use _cache_getMethod() because _cache_getMethod() doesn't
 524     // return forward:: entries.
 525     if (_cache_getImp(cls, sel)) {
 526         return NO; // entry is already cached, didn't add new one
 527     }
 528
 529     // Use the cache as-is if it is less than 3/4 full
 530     newOccupied = cache->occupied + 1;
 531     if ((newOccupied * 4) <= (cache->mask + 1) * 3) {
 532         // Cache is less than 3/4 full.
 533         cache->occupied = (unsigned int)newOccupied;
 534     } else {
 535         // Cache is too full. Expand it.
 536         cache = _cache_expand (cls);
 537
 538         // Account for the addition
 539         cache->occupied += 1;
 540     }
 541
 542     // Scan for the first unused slot and insert there.
 543     // There is guaranteed to be an empty slot because the
 544     // minimum size is 4 and we resized at 3/4 full.
 545     buckets = (cache_entry **)cache->buckets;
 546     for (index = CACHE_HASH(sel, cache->mask);
 547          buckets[index] != NULL;
 548          index = (index+1) & cache->mask)
 549     {
 550         // empty
 551     }
 552     buckets[index] = entry;
 553
 554     return YES; // successfully added new cache entry
 555 }
 556
 557
 558 /***********************************************************************
 559 * _cache_addForwardEntry
 560 * Add a forward:: entry  for the given selector to cls's method cache.
 561 * Does nothing if the cache addition fails for any reason.
 562 * Called from class_respondsToMethod and _class_lookupMethodAndLoadCache.
 563 * Cache locks: cacheUpdateLock must not be held.
 564 **********************************************************************/
 565 void _cache_addForwardEntry(Class cls, SEL sel)
 566 {
 567     cache_entry *smt;
 568
 569     smt = (cache_entry *)malloc(sizeof(cache_entry));
 570     smt->name = sel;
 571     smt->imp = _objc_msgForward_impcache;
 572     if (! _cache_fill(cls, (Method)smt, sel)) {  // fixme hack
 573         // Entry not added to cache. Don't leak the method struct.
 574         free(smt);
 575     }
 576 }
 577
 578
 579 /***********************************************************************
 580 * _cache_addIgnoredEntry
 581 * Add an entry for the ignored selector to cls's method cache.
 582 * Does nothing if the cache addition fails for any reason.
 583 * Returns the ignored IMP.
 584 * Cache locks: cacheUpdateLock must not be held.
 585 **********************************************************************/
 586 #if SUPPORT_GC  &&  !SUPPORT_IGNORED_SELECTOR_CONSTANT
 587 static cache_entry *alloc_ignored_entries(void)
 588 {
 589     cache_entry *e = (cache_entry *)malloc(5 * sizeof(cache_entry));
 590     e[0] = (cache_entry){ @selector(retain),     0,(IMP)&_objc_ignored_method};
 591     e[1] = (cache_entry){ @selector(release),    0,(IMP)&_objc_ignored_method};
 592     e[2] = (cache_entry){ @selector(autorelease),0,(IMP)&_objc_ignored_method};
 593     e[3] = (cache_entry){ @selector(retainCount),0,(IMP)&_objc_ignored_method};
 594     e[4] = (cache_entry){ @selector(dealloc),    0,(IMP)&_objc_ignored_method};
 595     return e;
 596 }
 597 #endif
 598
 599 IMP _cache_addIgnoredEntry(Class cls, SEL sel)
 600 {
 601     cache_entry *entryp = NULL;
 602
 603 #if !SUPPORT_GC
 604     _objc_fatal("selector ignored with GC off");
 605 #elif SUPPORT_IGNORED_SELECTOR_CONSTANT
 606     static cache_entry entry = { (SEL)kIgnore, 0, (IMP)&_objc_ignored_method };
 607     entryp = &entry;
 608     assert(sel == (SEL)kIgnore);
 609 #else
 610     // hack
 611     int i;
 612     static cache_entry *entries;
 613     INIT_ONCE_PTR(entries, alloc_ignored_entries(), free(v));
 614
 615     assert(ignoreSelector(sel));
 616     for (i = 0; i < 5; i++) {
 617         if (sel == entries[i].name) {
 618             entryp = &entries[i];
 619             break;
 620         }
 621     }
 622     if (!entryp) _objc_fatal("selector %s (%p) is not ignored",
 623                              sel_getName(sel), sel);
 624 #endif
 625
 626     _cache_fill(cls, (Method)entryp, sel);
 627     return entryp->imp;
 628 }
 629
 630
 631 /***********************************************************************
 632 * _cache_flush.  Invalidate all valid entries in the given class' cache.
 633 *
 634 * Called from flush_caches() and _cache_fill()
 635 * Cache locks: cacheUpdateLock must be held by the caller.
 636 **********************************************************************/
 637 void _cache_flush(Class cls)
 638 {
 639     Cache cache;
 640     unsigned int index;
 641
 642     cacheUpdateLock.assertLocked();
 643
 644     // Locate cache.  Ignore unused cache.
 645     cache = cls->cache;
 646     if (_cache_isEmpty(cache)) return;
 647
 648 #ifdef OBJC_INSTRUMENTED
 649     {
 650         CacheInstrumentation *cacheData;
 651
 652         // Tally this flush
 653         cacheData = CACHE_INSTRUMENTATION(cache);
 654         cacheData->flushCount += 1;
 655         cacheData->flushedEntries += cache->occupied;
 656         if (cache->occupied > cacheData->maxFlushedEntries)
 657             cacheData->maxFlushedEntries = cache->occupied;
 658     }
 659 #endif
 660
 661     // Traverse the cache
 662     for (index = 0; index <= cache->mask; index += 1)
 663     {
 664         // Remember what this entry was, so we can possibly
 665         // deallocate it after the bucket has been invalidated
 666         cache_entry *oldEntry = (cache_entry *)cache->buckets[index];
 667
 668         // Invalidate this entry
 669         cache->buckets[index] = NULL;
 670
 671         // Deallocate "forward::" entry
 672         if (oldEntry && oldEntry->imp == _objc_msgForward_impcache)
 673             _cache_collect_free (oldEntry, sizeof(cache_entry));
 674     }
 675
 676     // Clear the valid-entry counter
 677     cache->occupied = 0;
 678 }
 679
 680
 681 /***********************************************************************
 682 * flush_cache.  Flushes the instance method cache for class cls only.
 683 * Use flush_caches() if cls might have in-use subclasses.
 684 **********************************************************************/
 685 void flush_cache(Class cls)
 686 {
 687     if (cls) {
 688         mutex_locker_t lock(cacheUpdateLock);
 689         _cache_flush(cls);
 690     }
 691 }
 692
 693
 694 /***********************************************************************
 695 * cache collection.
 696 **********************************************************************/
 697
 698 #if !TARGET_OS_WIN32
 699
 700 // A sentinel (magic value) to report bad thread_get_state status.
 701 // Must not be a valid PC.
 702 // Must not be zero - thread_get_state() on a new thread returns PC == 0.
 703 #define PC_SENTINEL  1
 704
 705 // UNIX03 compliance hack (4508809)
 706 #if !__DARWIN_UNIX03
 707 #define __srr0 srr0
 708 #define __eip eip
 709 #endif
 710
 711 static uintptr_t _get_pc_for_thread(thread_t thread)
 712 #if defined(__i386__)
 713 {
 714     i386_thread_state_t state;
 715     unsigned int count = i386_THREAD_STATE_COUNT;
 716     kern_return_t okay = thread_get_state (thread, i386_THREAD_STATE, (thread_state_t)&state, &count);
 717     return (okay == KERN_SUCCESS) ? state.__eip : PC_SENTINEL;
 718 }
 719 #elif defined(__x86_64__)
 720 {
 721     x86_thread_state64_t                        state;
 722     unsigned int count = x86_THREAD_STATE64_COUNT;
 723     kern_return_t okay = thread_get_state (thread, x86_THREAD_STATE64, (thread_state_t)&state, &count);
 724     return (okay == KERN_SUCCESS) ? state.__rip : PC_SENTINEL;
 725 }
 726 #elif defined(__arm__)
 727 {
 728     arm_thread_state_t state;
 729     unsigned int count = ARM_THREAD_STATE_COUNT;
 730     kern_return_t okay = thread_get_state (thread, ARM_THREAD_STATE, (thread_state_t)&state, &count);
 731     return (okay == KERN_SUCCESS) ? state.__pc : PC_SENTINEL;
 732 }
 733 #else
 734 {
 735 #error _get_pc_for_thread () not implemented for this architecture
 736 }
 737 #endif
 738
 739 #endif
 740
 741 /***********************************************************************
 742 * _collecting_in_critical.
 743 * Returns TRUE if some thread is currently executing a cache-reading
 744 * function. Collection of cache garbage is not allowed when a cache-
 745 * reading function is in progress because it might still be using
 746 * the garbage memory.
 747 **********************************************************************/
 748 OBJC_EXPORT uintptr_t objc_entryPoints[];
 749 OBJC_EXPORT uintptr_t objc_exitPoints[];
 750
 751 static int _collecting_in_critical(void)
 752 {
 753 #if TARGET_OS_WIN32
 754     return TRUE;
 755 #else
 756     thread_act_port_array_t threads;
 757     unsigned number;
 758     unsigned count;
 759     kern_return_t ret;
 760     int result;
 761
 762     mach_port_t mythread = pthread_mach_thread_np(pthread_self());
 763
 764     // Get a list of all the threads in the current task
 765     ret = task_threads (mach_task_self (), &threads, &number);
 766     if (ret != KERN_SUCCESS)
 767     {
 768         _objc_fatal("task_thread failed (result %d)\n", ret);
 769     }
 770
 771     // Check whether any thread is in the cache lookup code
 772     result = FALSE;
 773     for (count = 0; count < number; count++)
 774     {
 775         int region;
 776         uintptr_t pc;
 777
 778         // Don't bother checking ourselves
 779         if (threads[count] == mythread)
 780             continue;
 781
 782         // Find out where thread is executing
 783         pc = _get_pc_for_thread (threads[count]);
 784
 785         // Check for bad status, and if so, assume the worse (can't collect)
 786         if (pc == PC_SENTINEL)
 787         {
 788             result = TRUE;
 789             goto done;
 790         }
 791
 792         // Check whether it is in the cache lookup code
 793         for (region = 0; objc_entryPoints[region] != 0; region++)
 794         {
 795             if ((pc >= objc_entryPoints[region]) &&
 796                 (pc <= objc_exitPoints[region]))
 797             {
 798                 result = TRUE;
 799                 goto done;
 800             }
 801         }
 802     }
 803
 804  done:
 805     // Deallocate the port rights for the threads
 806     for (count = 0; count < number; count++) {
 807         mach_port_deallocate(mach_task_self (), threads[count]);
 808     }
 809
 810     // Deallocate the thread list
 811     vm_deallocate (mach_task_self (), (vm_address_t) threads, sizeof(threads[0]) * number);
 812
 813     // Return our finding
 814     return result;
 815 #endif
 816 }
 817
 818
 819 /***********************************************************************
 820 * _garbage_make_room.  Ensure that there is enough room for at least
 821 * one more ref in the garbage.
 822 **********************************************************************/
 823
 824 // amount of memory represented by all refs in the garbage
 825 static size_t garbage_byte_size = 0;
 826
 827 // do not empty the garbage until garbage_byte_size gets at least this big
 828 static size_t garbage_threshold = 1024;
 829
 830 // table of refs to free
 831 static void **garbage_refs = 0;
 832
 833 // current number of refs in garbage_refs
 834 static size_t garbage_count = 0;
 835
 836 // capacity of current garbage_refs
 837 static size_t garbage_max = 0;
 838
 839 // capacity of initial garbage_refs
 840 enum {
 841     INIT_GARBAGE_COUNT = 128
 842 };
 843
 844 static void _garbage_make_room(void)
 845 {
 846     static int first = 1;
 847
 848     // Create the collection table the first time it is needed
 849     if (first)
 850     {
 851         first = 0;
 852         garbage_refs = (void**)
 853             malloc(INIT_GARBAGE_COUNT * sizeof(void *));
 854         garbage_max = INIT_GARBAGE_COUNT;
 855     }
 856
 857     // Double the table if it is full
 858     else if (garbage_count == garbage_max)
 859     {
 860         garbage_refs = (void**)
 861             realloc(garbage_refs, garbage_max * 2 * sizeof(void *));
 862         garbage_max *= 2;
 863     }
 864 }
 865
 866
 867 /***********************************************************************
 868 * _cache_collect_free.  Add the specified malloc'd memory to the list
 869 * of them to free at some later point.
 870 * size is used for the collection threshold. It does not have to be
 871 * precisely the block's size.
 872 * Cache locks: cacheUpdateLock must be held by the caller.
 873 **********************************************************************/
 874 static void _cache_collect_free(void *data, size_t size)
 875 {
 876     cacheUpdateLock.assertLocked();
 877
 878     _garbage_make_room ();
 879     garbage_byte_size += size;
 880     garbage_refs[garbage_count++] = data;
 881 }
 882
 883
 884 /***********************************************************************
 885 * _cache_collect.  Try to free accumulated dead caches.
 886 * collectALot tries harder to free memory.
 887 * Cache locks: cacheUpdateLock must be held by the caller.
 888 **********************************************************************/
 889 void _cache_collect(bool collectALot)
 890 {
 891     cacheUpdateLock.assertLocked();
 892
 893     // Done if the garbage is not full
 894     if (garbage_byte_size < garbage_threshold  &&  !collectALot) {
 895         return;
 896     }
 897
 898     // Synchronize collection with objc_msgSend and other cache readers
 899     if (!collectALot) {
 900         if (_collecting_in_critical ()) {
 901             // objc_msgSend (or other cache reader) is currently looking in
 902             // the cache and might still be using some garbage.
 903             if (PrintCaches) {
 904                 _objc_inform ("CACHES: not collecting; "
 905                               "objc_msgSend in progress");
 906             }
 907             return;
 908         }
 909     }
 910     else {
 911         // No excuses.
 912         while (_collecting_in_critical())
 913             ;
 914     }
 915
 916     // No cache readers in progress - garbage is now deletable
 917
 918     // Log our progress
 919     if (PrintCaches) {
 920         cache_collections++;
 921         _objc_inform ("CACHES: COLLECTING %zu bytes (%zu regions, %zu allocations, %zu collections)", garbage_byte_size, cache_allocator_regions, cache_allocations, cache_collections);
 922     }
 923
 924     // Dispose all refs now in the garbage
 925     while (garbage_count--) {
 926         _cache_free_block(garbage_refs[garbage_count]);
 927     }
 928
 929     // Clear the garbage count and total size indicator
 930     garbage_count = 0;
 931     garbage_byte_size = 0;
 932
 933     if (PrintCaches) {
 934         size_t i;
 935         size_t total = 0;
 936         size_t ideal_total = 0;
 937         size_t malloc_total = 0;
 938         size_t local_total = 0;
 939
 940         for (i = 0; i < sizeof(cache_counts) / sizeof(cache_counts[0]); i++) {
 941             int count = cache_counts[i];
 942             int slots = 1 << i;
 943             size_t size = sizeof(struct objc_cache) + TABLE_SIZE(slots);
 944             size_t ideal = size;
 945 #if TARGET_OS_WIN32
 946             size_t malloc = size;
 947 #else
 948             size_t malloc = malloc_good_size(size);
 949 #endif
 950             size_t local = size < CACHE_ALLOCATOR_MIN ? malloc : cache_allocator_size_for_mask(cache_allocator_mask_for_size(size));
 951
 952             if (!count) continue;
 953
 954             _objc_inform("CACHES: %4d slots: %4d caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", slots, count, ideal*count, malloc*count, local*count, malloc*count-ideal*count, local*count-ideal*count);
 955
 956             total += count;
 957             ideal_total += ideal*count;
 958             malloc_total += malloc*count;
 959             local_total += local*count;
 960         }
 961
 962         _objc_inform("CACHES:      total: %4zu caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", total, ideal_total, malloc_total, local_total, malloc_total-ideal_total, local_total-ideal_total);
 963     }
 964 }
 965
 966
 967
 968
 969
 970 #if defined(CACHE_ALLOCATOR)
 971
 972 /***********************************************************************
 973 * Custom method cache allocator.
 974 * Method cache block sizes are 2^slots+2 words, which is a pessimal
 975 * case for the system allocator. It wastes 504 bytes per cache block
 976 * with 128 or more slots, which adds up to tens of KB for an AppKit process.
 977 * To save memory, the custom cache allocator below is used.
 978 *
 979 * The cache allocator uses 128 KB allocation regions. Few processes will
 980 * require a second region. Within a region, allocation is address-ordered
 981 * first fit.
 982 *
 983 * The cache allocator uses a quantum of 520.
 984 * Cache block ideal sizes: 520, 1032, 2056, 4104
 985 * Cache allocator sizes:   520, 1040, 2080, 4160
 986 *
 987 * Because all blocks are known to be genuine method caches, the ordinary
 988 * cache->mask and cache->occupied fields are used as block headers.
 989 * No out-of-band headers are maintained. The number of blocks will
 990 * almost always be fewer than 200, so for simplicity there is no free
 991 * list or other optimization.
 992 *
 993 * Block in use: mask != 0, occupied != -1 (mask indicates block size)
 994 * Block free:   mask != 0, occupied == -1 (mask is precisely block size)
 995 *
 996 * No cache allocator functions take any locks. Instead, the caller
 997 * must hold the cacheUpdateLock.
 998 *
 999 * fixme with 128 KB regions and 520 B min block size, an allocation
1000 * bitmap would be only 32 bytes - better than free list?
1001 **********************************************************************/
1002
1003 typedef struct cache_allocator_block {
1004     uintptr_t size;
1005     uintptr_t state;
1006     struct cache_allocator_block *nextFree;
1007 } cache_allocator_block;
1008
1009 typedef struct cache_allocator_region {
1010     cache_allocator_block *start;
1011     cache_allocator_block *end;    // first non-block address
1012     cache_allocator_block *freeList;
1013     struct cache_allocator_region *next;
1014 } cache_allocator_region;
1015
1016 static cache_allocator_region *cacheRegion = NULL;
1017
1018
1019 /***********************************************************************
1020 * cache_allocator_add_region
1021 * Allocates and returns a new region that can hold at least size
1022 *   bytes of large method caches.
1023 * The actual size will be rounded up to a CACHE_QUANTUM boundary,
1024 *   with a minimum of CACHE_REGION_SIZE.
1025 * The new region is lowest-priority for new allocations. Callers that
1026 *   know the other regions are already full should allocate directly
1027 *   into the returned region.
1028 **********************************************************************/
1029 static cache_allocator_region *cache_allocator_add_region(size_t size)
1030 {
1031     vm_address_t addr;
1032     cache_allocator_block *b;
1033     cache_allocator_region **rgnP;
1034     cache_allocator_region *newRegion = (cache_allocator_region *)
1035         calloc(1, sizeof(cache_allocator_region));
1036
1037     // Round size up to quantum boundary, and apply the minimum size.
1038     size += CACHE_QUANTUM - (size % CACHE_QUANTUM);
1039     if (size < CACHE_REGION_SIZE) size = CACHE_REGION_SIZE;
1040
1041     // Allocate the region
1042     addr = (vm_address_t)calloc(size, 1);
1043     newRegion->start = (cache_allocator_block *)addr;
1044     newRegion->end = (cache_allocator_block *)(addr + size);
1045
1046     // Mark the first block: free and covers the entire region
1047     b = newRegion->start;
1048     b->size = size;
1049     b->state = (uintptr_t)-1;
1050     b->nextFree = NULL;
1051     newRegion->freeList = b;
1052
1053     // Add to end of the linked list of regions.
1054     // Other regions should be re-used before this one is touched.
1055     newRegion->next = NULL;
1056     rgnP = &cacheRegion;
1057     while (*rgnP) {
1058         rgnP = &(**rgnP).next;
1059     }
1060     *rgnP = newRegion;
1061
1062     cache_allocator_regions++;
1063
1064     return newRegion;
1065 }
1066
1067
1068 /***********************************************************************
1069 * cache_allocator_coalesce
1070 * Attempts to coalesce a free block with the single free block following
1071 * it in the free list, if any.
1072 **********************************************************************/
1073 static void cache_allocator_coalesce(cache_allocator_block *block)
1074 {
1075     if (block->size + (uintptr_t)block == (uintptr_t)block->nextFree) {
1076         block->size += block->nextFree->size;
1077         block->nextFree = block->nextFree->nextFree;
1078     }
1079 }
1080
1081
1082 /***********************************************************************
1083 * cache_region_calloc
1084 * Attempt to allocate a size-byte block in the given region.
1085 * Allocation is first-fit. The free list is already fully coalesced.
1086 * Returns NULL if there is not enough room in the region for the block.
1087 **********************************************************************/
1088 static void *cache_region_calloc(cache_allocator_region *rgn, size_t size)
1089 {
1090     cache_allocator_block **blockP;
1091     uintptr_t mask;
1092
1093     // Save mask for allocated block, then round size
1094     // up to CACHE_QUANTUM boundary
1095     mask = cache_allocator_mask_for_size(size);
1096     size = cache_allocator_size_for_mask(mask);
1097
1098     // Search the free list for a sufficiently large free block.
1099
1100     for (blockP = &rgn->freeList;
1101          *blockP != NULL;
1102          blockP = &(**blockP).nextFree)
1103     {
1104         cache_allocator_block *block = *blockP;
1105         if (block->size < size) continue;  // not big enough
1106
1107         // block is now big enough. Allocate from it.
1108
1109         // Slice off unneeded fragment of block, if any,
1110         // and reconnect the free list around block.
1111         if (block->size - size >= CACHE_QUANTUM) {
1112             cache_allocator_block *leftover =
1113                 (cache_allocator_block *)(size + (uintptr_t)block);
1114             leftover->size = block->size - size;
1115             leftover->state = (uintptr_t)-1;
1116             leftover->nextFree = block->nextFree;
1117             *blockP = leftover;
1118         } else {
1119             *blockP = block->nextFree;
1120         }
1121
1122         // block is now exactly the right size.
1123
1124         bzero(block, size);
1125         block->size = mask;  // Cache->mask
1126         block->state = 0;    // Cache->occupied
1127
1128         return block;
1129     }
1130
1131     // No room in this region.
1132     return NULL;
1133 }
1134
1135
1136 /***********************************************************************
1137 * cache_allocator_calloc
1138 * Custom allocator for large method caches (128+ slots)
1139 * The returned cache block already has cache->mask set.
1140 * cache->occupied and the cache contents are zero.
1141 * Cache locks: cacheUpdateLock must be held by the caller
1142 **********************************************************************/
1143 static Cache cache_allocator_calloc(size_t size)
1144 {
1145     cache_allocator_region *rgn;
1146
1147     cacheUpdateLock.assertLocked();
1148
1149     for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1150         void *p = cache_region_calloc(rgn, size);
1151         if (p) {
1152             return (Cache)p;
1153         }
1154     }
1155
1156     // No regions or all regions full - make a region and try one more time
1157     // In the unlikely case of a cache over 256KB, it will get its own region.
1158     return (Cache)cache_region_calloc(cache_allocator_add_region(size), size);
1159 }
1160
1161
1162 /***********************************************************************
1163 * cache_allocator_region_for_block
1164 * Returns the cache allocator region that ptr points into, or NULL.
1165 **********************************************************************/
1166 static cache_allocator_region *cache_allocator_region_for_block(cache_allocator_block *block)
1167 {
1168     cache_allocator_region *rgn;
1169     for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1170         if (block >= rgn->start  &&  block < rgn->end) return rgn;
1171     }
1172     return NULL;
1173 }
1174
1175
1176 /***********************************************************************
1177 * cache_allocator_is_block
1178 * If ptr is a live block from the cache allocator, return YES
1179 * If ptr is a block from some other allocator, return NO.
1180 * If ptr is a dead block from the cache allocator, result is undefined.
1181 * Cache locks: cacheUpdateLock must be held by the caller
1182 **********************************************************************/
1183 static bool cache_allocator_is_block(void *ptr)
1184 {
1185     cacheUpdateLock.assertLocked();
1186     return (cache_allocator_region_for_block((cache_allocator_block *)ptr) != NULL);
1187 }
1188
1189 /***********************************************************************
1190 * cache_allocator_free
1191 * Frees a block allocated by the cache allocator.
1192 * Cache locks: cacheUpdateLock must be held by the caller.
1193 **********************************************************************/
1194 static void cache_allocator_free(void *ptr)
1195 {
1196     cache_allocator_block *dead = (cache_allocator_block *)ptr;
1197     cache_allocator_block *cur;
1198     cache_allocator_region *rgn;
1199
1200     cacheUpdateLock.assertLocked();
1201
1202     if (! (rgn = cache_allocator_region_for_block(dead))) {
1203         // free of non-pointer
1204         _objc_inform("cache_allocator_free of non-pointer %p", dead);
1205         return;
1206     }
1207
1208     dead->size = cache_allocator_size_for_mask(dead->size);
1209     dead->state = (uintptr_t)-1;
1210
1211     if (!rgn->freeList  ||  rgn->freeList > dead) {
1212         // dead block belongs at front of free list
1213         dead->nextFree = rgn->freeList;
1214         rgn->freeList = dead;
1215         cache_allocator_coalesce(dead);
1216         return;
1217     }
1218
1219     // dead block belongs in the middle or end of free list
1220     for (cur = rgn->freeList; cur != NULL; cur = cur->nextFree) {
1221         cache_allocator_block *ahead = cur->nextFree;
1222
1223         if (!ahead  ||  ahead > dead) {
1224             // cur and ahead straddle dead, OR dead belongs at end of free list
1225             cur->nextFree = dead;
1226             dead->nextFree = ahead;
1227
1228             // coalesce into dead first in case both succeed
1229             cache_allocator_coalesce(dead);
1230             cache_allocator_coalesce(cur);
1231             return;
1232         }
1233     }
1234
1235     // uh-oh
1236     _objc_inform("cache_allocator_free of non-pointer %p", ptr);
1237 }
1238
1239 // defined(CACHE_ALLOCATOR)
1240 #endif
1241
1242 /***********************************************************************
1243 * Cache instrumentation and debugging
1244 **********************************************************************/
1245
1246 #ifdef OBJC_INSTRUMENTED
1247 enum {
1248     CACHE_HISTOGRAM_SIZE        = 512
1249 };
1250
1251 unsigned int    CacheHitHistogram [CACHE_HISTOGRAM_SIZE];
1252 unsigned int    CacheMissHistogram [CACHE_HISTOGRAM_SIZE];
1253 #endif
1254
1255
1256 /***********************************************************************
1257 * _cache_print.
1258 **********************************************************************/
1259 static void _cache_print(Cache cache)
1260 {
1261     uintptr_t index;
1262     uintptr_t count;
1263
1264     count = cache->mask + 1;
1265     for (index = 0; index < count; index += 1) {
1266         cache_entry *entry = (cache_entry *)cache->buckets[index];
1267         if (entry) {
1268             if (entry->imp == _objc_msgForward_impcache)
1269                 printf ("does not recognize: \n");
1270             printf ("%s\n", sel_getName(entry->name));
1271         }
1272     }
1273 }
1274
1275
1276 /***********************************************************************
1277 * _class_printMethodCaches.
1278 **********************************************************************/
1279 void _class_printMethodCaches(Class cls)
1280 {
1281     if (_cache_isEmpty(cls->cache)) {
1282         printf("no instance-method cache for class %s\n",cls->nameForLogging());
1283     } else {
1284         printf("instance-method cache for class %s:\n", cls->nameForLogging());
1285         _cache_print(cls->cache);
1286     }
1287
1288     if (_cache_isEmpty(cls->ISA()->cache)) {
1289         printf("no class-method cache for class %s\n", cls->nameForLogging());
1290     } else {
1291         printf ("class-method cache for class %s:\n", cls->nameForLogging());
1292         _cache_print(cls->ISA()->cache);
1293     }
1294 }
1295
1296
1297 #if 0
1298 #warning fixme
1299
1300
1301 /***********************************************************************
1302 * _class_printDuplicateCacheEntries.
1303 **********************************************************************/
1304 void _class_printDuplicateCacheEntries(bool detail)
1305 {
1306     NXHashState state;
1307     Class cls;
1308     unsigned int duplicates;
1309     unsigned int index1;
1310     unsigned int index2;
1311     unsigned int mask;
1312     unsigned int count;
1313     unsigned int isMeta;
1314     Cache cache;
1315
1316
1317     printf ("Checking for duplicate cache entries \n");
1318
1319     // Outermost loop - iterate over all classes
1320     state = NXInitHashState (class_hash);
1321     duplicates = 0;
1322     while (NXNextHashState (class_hash, &state, (void **) &cls))
1323     {
1324         // Control loop - do given class' cache, then its isa's cache
1325         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1326         {
1327             // Select cache of interest and make sure it exists
1328             cache = (isMeta ? cls->ISA : cls)->cache;
1329             if (_cache_isEmpty(cache))
1330                 continue;
1331
1332             // Middle loop - check each entry in the given cache
1333             mask  = cache->mask;
1334             count = mask + 1;
1335             for (index1 = 0; index1 < count; index1 += 1)
1336             {
1337                 // Skip invalid entry
1338                 if (!cache->buckets[index1])
1339                     continue;
1340
1341                 // Inner loop - check that given entry matches no later entry
1342                 for (index2 = index1 + 1; index2 < count; index2 += 1)
1343                 {
1344                     // Skip invalid entry
1345                     if (!cache->buckets[index2])
1346                         continue;
1347
1348                     // Check for duplication by method name comparison
1349                     if (strcmp ((char *) cache->buckets[index1]->name),
1350                                 (char *) cache->buckets[index2]->name)) == 0)
1351                     {
1352                         if (detail)
1353                             printf ("%s %s\n", cls->nameForLogging(), sel_getName(cache->buckets[index1]->name));
1354                         duplicates += 1;
1355                         break;
1356                     }
1357                 }
1358             }
1359         }
1360     }
1361
1362     // Log the findings
1363     printf ("duplicates = %d\n", duplicates);
1364     printf ("total cache fills = %d\n", totalCacheFills);
1365 }
1366
1367
1368 /***********************************************************************
1369 * PrintCacheHeader.
1370 **********************************************************************/
1371 static void PrintCacheHeader(void)
1372 {
1373 #ifdef OBJC_INSTRUMENTED
1374     printf ("Cache  Cache  Slots  Avg    Max   AvgS  MaxS  AvgS  MaxS  TotalD   AvgD  MaxD  TotalD   AvgD  MaxD  TotD  AvgD  MaxD\n");
1375     printf ("Size   Count  Used   Used   Used  Hit   Hit   Miss  Miss  Hits     Prbs  Prbs  Misses   Prbs  Prbs  Flsh  Flsh  Flsh\n");
1376     printf ("-----  -----  -----  -----  ----  ----  ----  ----  ----  -------  ----  ----  -------  ----  ----  ----  ----  ----\n");
1377 #else
1378     printf ("Cache  Cache  Slots  Avg    Max   AvgS  MaxS  AvgS  MaxS\n");
1379     printf ("Size   Count  Used   Used   Used  Hit   Hit   Miss  Miss\n");
1380     printf ("-----  -----  -----  -----  ----  ----  ----  ----  ----\n");
1381 #endif
1382 }
1383
1384
1385 /***********************************************************************
1386 * PrintCacheInfo.
1387 **********************************************************************/
1388 static void PrintCacheInfo(unsigned int cacheSize,
1389                            unsigned int cacheCount,
1390                            unsigned int slotsUsed,
1391                            float avgUsed, unsigned int maxUsed,
1392                            float avgSHit, unsigned int maxSHit,
1393                            float avgSMiss, unsigned int maxSMiss
1394 #ifdef OBJC_INSTRUMENTED
1395                            , unsigned int totDHits,
1396                            float avgDHit,
1397                            unsigned int maxDHit,
1398                            unsigned int totDMisses,
1399                            float avgDMiss,
1400                            unsigned int maxDMiss,
1401                            unsigned int totDFlsh,
1402                            float avgDFlsh,
1403                            unsigned int maxDFlsh
1404 #endif
1405                            )
1406 {
1407 #ifdef OBJC_INSTRUMENTED
1408     printf ("%5u  %5u  %5u  %5.1f  %4u  %4.1f  %4u  %4.1f  %4u  %7u  %4.1f  %4u  %7u  %4.1f  %4u  %4u  %4.1f  %4u\n",
1409 #else
1410             printf ("%5u  %5u  %5u  %5.1f  %4u  %4.1f  %4u  %4.1f  %4u\n",
1411 #endif
1412                     cacheSize, cacheCount, slotsUsed, avgUsed, maxUsed, avgSHit, maxSHit, avgSMiss, maxSMiss
1413 #ifdef OBJC_INSTRUMENTED
1414                     , totDHits, avgDHit, maxDHit, totDMisses, avgDMiss, maxDMiss, totDFlsh, avgDFlsh, maxDFlsh
1415 #endif
1416                     );
1417
1418 }
1419
1420
1421 #ifdef OBJC_INSTRUMENTED
1422 /***********************************************************************
1423 * PrintCacheHistogram.  Show the non-zero entries from the specified
1424 * cache histogram.
1425 **********************************************************************/
1426 static void PrintCacheHistogram(char *title,
1427                                 unsigned int *firstEntry,
1428                                 unsigned int entryCount)
1429 {
1430     unsigned int index;
1431     unsigned int *thisEntry;
1432
1433     printf ("%s\n", title);
1434     printf ("    Probes    Tally\n");
1435     printf ("    ------    -----\n");
1436     for (index = 0, thisEntry = firstEntry;
1437          index < entryCount;
1438          index += 1, thisEntry += 1)
1439     {
1440         if (*thisEntry == 0)
1441             continue;
1442
1443         printf ("    %6d    %5d\n", index, *thisEntry);
1444     }
1445 }
1446 #endif
1447
1448
1449 /***********************************************************************
1450 * _class_printMethodCacheStatistics.
1451 **********************************************************************/
1452
1453 #define MAX_LOG2_SIZE   32
1454 #define MAX_CHAIN_SIZE  100
1455
1456 void _class_printMethodCacheStatistics(void)
1457 {
1458     unsigned int isMeta;
1459     unsigned int index;
1460     NXHashState state;
1461     Class cls;
1462     unsigned int totalChain;
1463     unsigned int totalMissChain;
1464     unsigned int maxChain;
1465     unsigned int maxMissChain;
1466     unsigned int classCount;
1467     unsigned int negativeEntryCount;
1468     unsigned int cacheExpandCount;
1469     unsigned int cacheCountBySize[2][MAX_LOG2_SIZE]        = {{0}};
1470     unsigned int totalEntriesBySize[2][MAX_LOG2_SIZE]      = {{0}};
1471     unsigned int maxEntriesBySize[2][MAX_LOG2_SIZE]        = {{0}};
1472     unsigned int totalChainBySize[2][MAX_LOG2_SIZE]        = {{0}};
1473     unsigned int totalMissChainBySize[2][MAX_LOG2_SIZE]    = {{0}};
1474     unsigned int totalMaxChainBySize[2][MAX_LOG2_SIZE]     = {{0}};
1475     unsigned int totalMaxMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1476     unsigned int maxChainBySize[2][MAX_LOG2_SIZE]          = {{0}};
1477     unsigned int maxMissChainBySize[2][MAX_LOG2_SIZE]      = {{0}};
1478     unsigned int chainCount[MAX_CHAIN_SIZE]                = {0};
1479     unsigned int missChainCount[MAX_CHAIN_SIZE]            = {0};
1480 #ifdef OBJC_INSTRUMENTED
1481     unsigned int hitCountBySize[2][MAX_LOG2_SIZE]          = {{0}};
1482     unsigned int hitProbesBySize[2][MAX_LOG2_SIZE]         = {{0}};
1483     unsigned int maxHitProbesBySize[2][MAX_LOG2_SIZE]      = {{0}};
1484     unsigned int missCountBySize[2][MAX_LOG2_SIZE]         = {{0}};
1485     unsigned int missProbesBySize[2][MAX_LOG2_SIZE]        = {{0}};
1486     unsigned int maxMissProbesBySize[2][MAX_LOG2_SIZE]     = {{0}};
1487     unsigned int flushCountBySize[2][MAX_LOG2_SIZE]        = {{0}};
1488     unsigned int flushedEntriesBySize[2][MAX_LOG2_SIZE]    = {{0}};
1489     unsigned int maxFlushedEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1490 #endif
1491
1492     printf ("Printing cache statistics\n");
1493
1494     // Outermost loop - iterate over all classes
1495     state = NXInitHashState (class_hash);
1496     classCount = 0;
1497     negativeEntryCount = 0;
1498     cacheExpandCount = 0;
1499     while (NXNextHashState (class_hash, &state, (void **) &cls))
1500     {
1501         // Tally classes
1502         classCount += 1;
1503
1504         // Control loop - do given class' cache, then its isa's cache
1505         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1506         {
1507             Cache cache;
1508             unsigned int mask;
1509             unsigned int log2Size;
1510             unsigned int entryCount;
1511
1512             // Select cache of interest
1513             cache = (isMeta ? cls->ISA : cls)->cache;
1514
1515             // Ignore empty cache... should we?
1516             if (_cache_isEmpty(cache))
1517                 continue;
1518
1519             // Middle loop - do each entry in the given cache
1520             mask = cache->mask;
1521             entryCount = 0;
1522             totalChain = 0;
1523             totalMissChain = 0;
1524             maxChain = 0;
1525             maxMissChain = 0;
1526             for (index = 0; index < mask + 1; index += 1)
1527             {
1528                 cache_entry **buckets;
1529                 cache_entry *entry;
1530                 unsigned int hash;
1531                 unsigned int methodChain;
1532                 unsigned int methodMissChain;
1533                 unsigned int index2;
1534
1535                 // If entry is invalid, the only item of
1536                 // interest is that future insert hashes
1537                 // to this entry can use it directly.
1538                 buckets = (cache_entry **)cache->buckets;
1539                 if (!buckets[index])
1540                 {
1541                     missChainCount[0] += 1;
1542                     continue;
1543                 }
1544
1545                 entry = buckets[index];
1546
1547                 // Tally valid entries
1548                 entryCount += 1;
1549
1550                 // Tally "forward::" entries
1551                 if (entry->imp == _objc_msgForward_impcache)
1552                     negativeEntryCount += 1;
1553
1554                 // Calculate search distance (chain length) for this method
1555                 // The chain may wrap around to the beginning of the table.
1556                 hash = CACHE_HASH(entry->name, mask);
1557                 if (index >= hash) methodChain = index - hash;
1558                 else methodChain = (mask+1) + index - hash;
1559
1560                 // Tally chains of this length
1561                 if (methodChain < MAX_CHAIN_SIZE)
1562                     chainCount[methodChain] += 1;
1563
1564                 // Keep sum of all chain lengths
1565                 totalChain += methodChain;
1566
1567                 // Record greatest chain length
1568                 if (methodChain > maxChain)
1569                     maxChain = methodChain;
1570
1571                 // Calculate search distance for miss that hashes here
1572                 index2 = index;
1573                 while (buckets[index2])
1574                 {
1575                     index2 += 1;
1576                     index2 &= mask;
1577                 }
1578                 methodMissChain = ((index2 - index) & mask);
1579
1580                 // Tally miss chains of this length
1581                 if (methodMissChain < MAX_CHAIN_SIZE)
1582                     missChainCount[methodMissChain] += 1;
1583
1584                 // Keep sum of all miss chain lengths in this class
1585                 totalMissChain += methodMissChain;
1586
1587                 // Record greatest miss chain length
1588                 if (methodMissChain > maxMissChain)
1589                     maxMissChain = methodMissChain;
1590             }
1591
1592             // Factor this cache into statistics about caches of the same
1593             // type and size (all caches are a power of two in size)
1594             log2Size = log2u (mask + 1);
1595             cacheCountBySize[isMeta][log2Size] += 1;
1596             totalEntriesBySize[isMeta][log2Size] += entryCount;
1597             if (entryCount > maxEntriesBySize[isMeta][log2Size])
1598                 maxEntriesBySize[isMeta][log2Size] = entryCount;
1599             totalChainBySize[isMeta][log2Size] += totalChain;
1600             totalMissChainBySize[isMeta][log2Size] += totalMissChain;
1601             totalMaxChainBySize[isMeta][log2Size] += maxChain;
1602             totalMaxMissChainBySize[isMeta][log2Size] += maxMissChain;
1603             if (maxChain > maxChainBySize[isMeta][log2Size])
1604                 maxChainBySize[isMeta][log2Size] = maxChain;
1605             if (maxMissChain > maxMissChainBySize[isMeta][log2Size])
1606                 maxMissChainBySize[isMeta][log2Size] = maxMissChain;
1607 #ifdef OBJC_INSTRUMENTED
1608             {
1609                 CacheInstrumentation *cacheData;
1610
1611                 cacheData = CACHE_INSTRUMENTATION(cache);
1612                 hitCountBySize[isMeta][log2Size] += cacheData->hitCount;
1613                 hitProbesBySize[isMeta][log2Size] += cacheData->hitProbes;
1614                 if (cacheData->maxHitProbes > maxHitProbesBySize[isMeta][log2Size])
1615                     maxHitProbesBySize[isMeta][log2Size] = cacheData->maxHitProbes;
1616                 missCountBySize[isMeta][log2Size] += cacheData->missCount;
1617                 missProbesBySize[isMeta][log2Size] += cacheData->missProbes;
1618                 if (cacheData->maxMissProbes > maxMissProbesBySize[isMeta][log2Size])
1619                     maxMissProbesBySize[isMeta][log2Size] = cacheData->maxMissProbes;
1620                 flushCountBySize[isMeta][log2Size] += cacheData->flushCount;
1621                 flushedEntriesBySize[isMeta][log2Size] += cacheData->flushedEntries;
1622                 if (cacheData->maxFlushedEntries > maxFlushedEntriesBySize[isMeta][log2Size])
1623                     maxFlushedEntriesBySize[isMeta][log2Size] = cacheData->maxFlushedEntries;
1624             }
1625 #endif
1626             // Caches start with a power of two number of entries, and grow by doubling, so
1627             // we can calculate the number of times this cache has expanded
1628             cacheExpandCount += log2Size - INIT_CACHE_SIZE_LOG2;
1629         }
1630     }
1631
1632     {
1633         unsigned int cacheCountByType[2] = {0};
1634         unsigned int totalCacheCount     = 0;
1635         unsigned int totalEntries        = 0;
1636         unsigned int maxEntries          = 0;
1637         unsigned int totalSlots          = 0;
1638 #ifdef OBJC_INSTRUMENTED
1639         unsigned int totalHitCount       = 0;
1640         unsigned int totalHitProbes      = 0;
1641         unsigned int maxHitProbes        = 0;
1642         unsigned int totalMissCount      = 0;
1643         unsigned int totalMissProbes     = 0;
1644         unsigned int maxMissProbes       = 0;
1645         unsigned int totalFlushCount     = 0;
1646         unsigned int totalFlushedEntries = 0;
1647         unsigned int maxFlushedEntries   = 0;
1648 #endif
1649
1650         totalChain = 0;
1651         maxChain = 0;
1652         totalMissChain = 0;
1653         maxMissChain = 0;
1654
1655         // Sum information over all caches
1656         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1657         {
1658             for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1659             {
1660                 cacheCountByType[isMeta] += cacheCountBySize[isMeta][index];
1661                 totalEntries += totalEntriesBySize[isMeta][index];
1662                 totalSlots += cacheCountBySize[isMeta][index] * (1 << index);
1663                 totalChain += totalChainBySize[isMeta][index];
1664                 if (maxEntriesBySize[isMeta][index] > maxEntries)
1665                     maxEntries = maxEntriesBySize[isMeta][index];
1666                 if (maxChainBySize[isMeta][index] > maxChain)
1667                     maxChain   = maxChainBySize[isMeta][index];
1668                 totalMissChain += totalMissChainBySize[isMeta][index];
1669                 if (maxMissChainBySize[isMeta][index] > maxMissChain)
1670                     maxMissChain = maxMissChainBySize[isMeta][index];
1671 #ifdef OBJC_INSTRUMENTED
1672                 totalHitCount += hitCountBySize[isMeta][index];
1673                 totalHitProbes += hitProbesBySize[isMeta][index];
1674                 if (maxHitProbesBySize[isMeta][index] > maxHitProbes)
1675                     maxHitProbes = maxHitProbesBySize[isMeta][index];
1676                 totalMissCount += missCountBySize[isMeta][index];
1677                 totalMissProbes += missProbesBySize[isMeta][index];
1678                 if (maxMissProbesBySize[isMeta][index] > maxMissProbes)
1679                     maxMissProbes = maxMissProbesBySize[isMeta][index];
1680                 totalFlushCount += flushCountBySize[isMeta][index];
1681                 totalFlushedEntries += flushedEntriesBySize[isMeta][index];
1682                 if (maxFlushedEntriesBySize[isMeta][index] > maxFlushedEntries)
1683                     maxFlushedEntries = maxFlushedEntriesBySize[isMeta][index];
1684 #endif
1685             }
1686
1687             totalCacheCount += cacheCountByType[isMeta];
1688         }
1689
1690         // Log our findings
1691         printf ("There are %u classes\n", classCount);
1692
1693         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1694         {
1695             // Number of this type of class
1696             printf    ("\nThere are %u %s-method caches, broken down by size (slot count):\n",
1697                        cacheCountByType[isMeta],
1698                        isMeta ? "class" : "instance");
1699
1700             // Print header
1701             PrintCacheHeader ();
1702
1703             // Keep format consistent even if there are caches of this kind
1704             if (cacheCountByType[isMeta] == 0)
1705             {
1706                 printf ("(none)\n");
1707                 continue;
1708             }
1709
1710             // Usage information by cache size
1711             for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1712             {
1713                 unsigned int cacheCount;
1714                 unsigned int cacheSlotCount;
1715                 unsigned int cacheEntryCount;
1716
1717                 // Get number of caches of this type and size
1718                 cacheCount = cacheCountBySize[isMeta][index];
1719                 if (cacheCount == 0)
1720                     continue;
1721
1722                 // Get the cache slot count and the total number of valid entries
1723                 cacheSlotCount  = (1 << index);
1724                 cacheEntryCount = totalEntriesBySize[isMeta][index];
1725
1726                 // Give the analysis
1727                 PrintCacheInfo (cacheSlotCount,
1728                                 cacheCount,
1729                                 cacheEntryCount,
1730                                 (float) cacheEntryCount / (float) cacheCount,
1731                                 maxEntriesBySize[isMeta][index],
1732                                 (float) totalChainBySize[isMeta][index] / (float) cacheEntryCount,
1733                                 maxChainBySize[isMeta][index],
1734                                 (float) totalMissChainBySize[isMeta][index] / (float) (cacheCount * cacheSlotCount),
1735                                 maxMissChainBySize[isMeta][index]
1736 #ifdef OBJC_INSTRUMENTED
1737                                 , hitCountBySize[isMeta][index],
1738                                 hitCountBySize[isMeta][index] ?
1739                                 (float) hitProbesBySize[isMeta][index] / (float) hitCountBySize[isMeta][index] : 0.0,
1740                                 maxHitProbesBySize[isMeta][index],
1741                                 missCountBySize[isMeta][index],
1742                                 missCountBySize[isMeta][index] ?
1743                                 (float) missProbesBySize[isMeta][index] / (float) missCountBySize[isMeta][index] : 0.0,
1744                                 maxMissProbesBySize[isMeta][index],
1745                                 flushCountBySize[isMeta][index],
1746                                 flushCountBySize[isMeta][index] ?
1747                                 (float) flushedEntriesBySize[isMeta][index] / (float) flushCountBySize[isMeta][index] : 0.0,
1748                                 maxFlushedEntriesBySize[isMeta][index]
1749 #endif
1750                                 );
1751             }
1752         }
1753
1754         // Give overall numbers
1755         printf ("\nCumulative:\n");
1756         PrintCacheHeader ();
1757         PrintCacheInfo (totalSlots,
1758                         totalCacheCount,
1759                         totalEntries,
1760                         (float) totalEntries / (float) totalCacheCount,
1761                         maxEntries,
1762                         (float) totalChain / (float) totalEntries,
1763                         maxChain,
1764                         (float) totalMissChain / (float) totalSlots,
1765                         maxMissChain
1766 #ifdef OBJC_INSTRUMENTED
1767                         , totalHitCount,
1768                         totalHitCount ?
1769                         (float) totalHitProbes / (float) totalHitCount : 0.0,
1770                         maxHitProbes,
1771                         totalMissCount,
1772                         totalMissCount ?
1773                         (float) totalMissProbes / (float) totalMissCount : 0.0,
1774                         maxMissProbes,
1775                         totalFlushCount,
1776                         totalFlushCount ?
1777                         (float) totalFlushedEntries / (float) totalFlushCount : 0.0,
1778                         maxFlushedEntries
1779 #endif
1780                         );
1781
1782         printf ("\nNumber of \"forward::\" entries: %d\n", negativeEntryCount);
1783         printf ("Number of cache expansions: %d\n", cacheExpandCount);
1784 #ifdef OBJC_INSTRUMENTED
1785         printf ("flush_caches:   total calls  total visits  average visits  max visits  total classes  visits/class\n");
1786         printf ("                -----------  ------------  --------------  ----------  -------------  -------------\n");
1787         printf ("  linear        %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1788                 LinearFlushCachesCount,
1789                 LinearFlushCachesVisitedCount,
1790                 LinearFlushCachesCount ?
1791                 (float) LinearFlushCachesVisitedCount / (float) LinearFlushCachesCount : 0.0,
1792                 MaxLinearFlushCachesVisitedCount,
1793                 LinearFlushCachesVisitedCount,
1794                 1.0);
1795         printf ("  nonlinear     %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1796                 NonlinearFlushCachesCount,
1797                 NonlinearFlushCachesVisitedCount,
1798                 NonlinearFlushCachesCount ?
1799                 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesCount : 0.0,
1800                 MaxNonlinearFlushCachesVisitedCount,
1801                 NonlinearFlushCachesClassCount,
1802                 NonlinearFlushCachesClassCount ?
1803                 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesClassCount : 0.0);
1804         printf ("  ideal         %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1805                 LinearFlushCachesCount + NonlinearFlushCachesCount,
1806                 IdealFlushCachesCount,
1807                 LinearFlushCachesCount + NonlinearFlushCachesCount ?
1808                 (float) IdealFlushCachesCount / (float) (LinearFlushCachesCount + NonlinearFlushCachesCount) : 0.0,
1809                 MaxIdealFlushCachesCount,
1810                 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount,
1811                 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount ?
1812                 (float) IdealFlushCachesCount / (float) (LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount) : 0.0);
1813
1814         PrintCacheHistogram ("\nCache hit histogram:",  &CacheHitHistogram[0],  CACHE_HISTOGRAM_SIZE);
1815         PrintCacheHistogram ("\nCache miss histogram:", &CacheMissHistogram[0], CACHE_HISTOGRAM_SIZE);
1816 #endif
1817
1818 #if 0
1819         printf ("\nLookup chains:");
1820         for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1821         {
1822             if (chainCount[index] != 0)
1823                 printf ("  %u:%u", index, chainCount[index]);
1824         }
1825
1826         printf ("\nMiss chains:");
1827         for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1828         {
1829             if (missChainCount[index] != 0)
1830                 printf ("  %u:%u", index, missChainCount[index]);
1831         }
1832
1833         printf ("\nTotal memory usage for cache data structures: %lu bytes\n",
1834                 totalCacheCount * (sizeof(struct objc_cache) - sizeof(cache_entry *)) +
1835                 totalSlots * sizeof(cache_entry *) +
1836                 negativeEntryCount * sizeof(cache_entry));
1837 #endif
1838     }
1839 }
1840
1841 #endif
1842
1843
1844 // !__OBJC2__
1845 #endif