runtime/objc-cache-old.mm

   1 /*
   2  * Copyright (c) 1999-2007 Apple Inc.  All Rights Reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /***********************************************************************
  25 * objc-cache.m
  26 * Method cache management
  27 * Cache flushing
  28 * Cache garbage collection
  29 * Cache instrumentation
  30 * Dedicated allocator for large caches
  31 **********************************************************************/
  32
  33
  34 /***********************************************************************
  35  * Method cache locking (GrP 2001-1-14)
  36  *
  37  * For speed, objc_msgSend does not acquire any locks when it reads
  38  * method caches. Instead, all cache changes are performed so that any
  39  * objc_msgSend running concurrently with the cache mutator will not
  40  * crash or hang or get an incorrect result from the cache.
  41  *
  42  * When cache memory becomes unused (e.g. the old cache after cache
  43  * expansion), it is not immediately freed, because a concurrent
  44  * objc_msgSend could still be using it. Instead, the memory is
  45  * disconnected from the data structures and placed on a garbage list.
  46  * The memory is now only accessible to instances of objc_msgSend that
  47  * were running when the memory was disconnected; any further calls to
  48  * objc_msgSend will not see the garbage memory because the other data
  49  * structures don't point to it anymore. The collecting_in_critical
  50  * function checks the PC of all threads and returns FALSE when all threads
  51  * are found to be outside objc_msgSend. This means any call to objc_msgSend
  52  * that could have had access to the garbage has finished or moved past the
  53  * cache lookup stage, so it is safe to free the memory.
  54  *
  55  * All functions that modify cache data or structures must acquire the
  56  * cacheUpdateLock to prevent interference from concurrent modifications.
  57  * The function that frees cache garbage must acquire the cacheUpdateLock
  58  * and use collecting_in_critical() to flush out cache readers.
  59  * The cacheUpdateLock is also used to protect the custom allocator used
  60  * for large method cache blocks.
  61  *
  62  * Cache readers (PC-checked by collecting_in_critical())
  63  * objc_msgSend*
  64  * _cache_getImp
  65  * _cache_getMethod
  66  *
  67  * Cache writers (hold cacheUpdateLock while reading or writing; not PC-checked)
  68  * _cache_fill         (acquires lock)
  69  * _cache_expand       (only called from cache_fill)
  70  * _cache_create       (only called from cache_expand)
  71  * bcopy               (only called from instrumented cache_expand)
  72  * flush_caches        (acquires lock)
  73  * _cache_flush        (only called from cache_fill and flush_caches)
  74  * _cache_collect_free (only called from cache_expand and cache_flush)
  75  *
  76  * UNPROTECTED cache readers (NOT thread-safe; used for debug info only)
  77  * _cache_print
  78  * _class_printMethodCaches
  79  * _class_printDuplicateCacheEntries
  80  * _class_printMethodCacheStatistics
  81  *
  82  * _class_lookupMethodAndLoadCache is a special case. It may read a
  83  * method triplet out of one cache and store it in another cache. This
  84  * is unsafe if the method triplet is a forward:: entry, because the
  85  * triplet itself could be freed unless _class_lookupMethodAndLoadCache
  86  * were PC-checked or used a lock. Additionally, storing the method
  87  * triplet in both caches would result in double-freeing if both caches
  88  * were flushed or expanded. The solution is for _cache_getMethod to
  89  * ignore all entries whose implementation is _objc_msgForward_impcache,
  90  * so _class_lookupMethodAndLoadCache cannot look at a forward:: entry
  91  * unsafely or place it in multiple caches.
  92  ***********************************************************************/
  93
  94 #if !__OBJC2__
  95
  96 #include "objc-private.h"
  97 #include "objc-cache-old.h"
  98 #include "hashtable2.h"
  99
 100 typedef struct {
 101     SEL name;     // same layout as struct old_method
 102     void *unused;
 103     IMP imp;  // same layout as struct old_method
 104 } cache_entry;
 105
 106
 107 /* When _class_slow_grow is non-zero, any given cache is actually grown
 108  * only on the odd-numbered times it becomes full; on the even-numbered
 109  * times, it is simply emptied and re-used.  When this flag is zero,
 110  * caches are grown every time. */
 111 static const int _class_slow_grow = 1;
 112
 113 /* For min cache size: clear_cache=1, slow_grow=1
 114    For max cache size: clear_cache=0, slow_grow=0 */
 115
 116 /* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
 117 enum {
 118     INIT_CACHE_SIZE_LOG2 = 2,
 119     INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2)
 120 };
 121
 122
 123 /* Amount of space required for `count` hash table buckets, knowing that
 124  * one entry is embedded in the cache structure itself. */
 125 #define TABLE_SIZE(count)  ((count - 1) * sizeof(cache_entry *))
 126
 127
 128 #if !TARGET_OS_WIN32
 129 #   define CACHE_ALLOCATOR
 130 #endif
 131
 132 /* Custom cache allocator parameters.
 133  * CACHE_REGION_SIZE must be a multiple of CACHE_QUANTUM. */
 134 #define CACHE_ALLOCATOR_MIN 512
 135 #define CACHE_QUANTUM (CACHE_ALLOCATOR_MIN+sizeof(struct objc_cache)-sizeof(cache_entry*))
 136 #define CACHE_REGION_SIZE ((128*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
 137 // #define CACHE_REGION_SIZE ((256*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
 138
 139 static uintptr_t cache_allocator_mask_for_size(size_t size)
 140 {
 141     return (size - sizeof(struct objc_cache)) / sizeof(cache_entry *);
 142 }
 143
 144 static size_t cache_allocator_size_for_mask(uintptr_t mask)
 145 {
 146     size_t requested = sizeof(struct objc_cache) + TABLE_SIZE(mask+1);
 147     size_t actual = CACHE_QUANTUM;
 148     while (actual < requested) actual += CACHE_QUANTUM;
 149     return actual;
 150 }
 151
 152
 153 /* Cache instrumentation data. Immediately follows the cache block itself. */
 154 #ifdef OBJC_INSTRUMENTED
 155 typedef struct
 156 {
 157     unsigned int hitCount;           // cache lookup success tally
 158     unsigned int hitProbes;          // sum entries checked to hit
 159     unsigned int maxHitProbes;       // max entries checked to hit
 160     unsigned int missCount;          // cache lookup no-find tally
 161     unsigned int missProbes;         // sum entries checked to miss
 162     unsigned int maxMissProbes;      // max entries checked to miss
 163     unsigned int flushCount;         // cache flush tally
 164     unsigned int flushedEntries;     // sum cache entries flushed
 165     unsigned int maxFlushedEntries;  // max cache entries flushed
 166 } CacheInstrumentation;
 167
 168 #define CACHE_INSTRUMENTATION(cache)  (CacheInstrumentation *) &cache->buckets[cache->mask + 1];
 169 #endif
 170
 171 /* Cache filling and flushing instrumentation */
 172
 173 static int totalCacheFills = 0;
 174
 175 #ifdef OBJC_INSTRUMENTED
 176 unsigned int LinearFlushCachesCount              = 0;
 177 unsigned int LinearFlushCachesVisitedCount       = 0;
 178 unsigned int MaxLinearFlushCachesVisitedCount    = 0;
 179 unsigned int NonlinearFlushCachesCount           = 0;
 180 unsigned int NonlinearFlushCachesClassCount      = 0;
 181 unsigned int NonlinearFlushCachesVisitedCount    = 0;
 182 unsigned int MaxNonlinearFlushCachesVisitedCount = 0;
 183 unsigned int IdealFlushCachesCount               = 0;
 184 unsigned int MaxIdealFlushCachesCount            = 0;
 185 #endif
 186
 187
 188 /***********************************************************************
 189 * A static empty cache.  All classes initially point at this cache.
 190 * When the first message is sent it misses in the cache, and when
 191 * the cache is grown it checks for this case and uses malloc rather
 192 * than realloc.  This avoids the need to check for NULL caches in the
 193 * messenger.
 194 ***********************************************************************/
 195
 196 struct objc_cache _objc_empty_cache =
 197 {
 198     0,        // mask
 199     0,        // occupied
 200     { NULL }  // buckets
 201 };
 202 #ifdef OBJC_INSTRUMENTED
 203 CacheInstrumentation emptyCacheInstrumentation = {0};
 204 #endif
 205
 206
 207 /* Local prototypes */
 208
 209 static bool _cache_isEmpty(Cache cache);
 210 static Cache _cache_malloc(uintptr_t slotCount);
 211 static Cache _cache_create(Class cls);
 212 static Cache _cache_expand(Class cls);
 213
 214 static int _collecting_in_critical(void);
 215 static void _garbage_make_room(void);
 216 static void _cache_collect_free(void *data, size_t size);
 217
 218 #if defined(CACHE_ALLOCATOR)
 219 static bool cache_allocator_is_block(void *block);
 220 static Cache cache_allocator_calloc(size_t size);
 221 static void cache_allocator_free(void *block);
 222 #endif
 223
 224 /***********************************************************************
 225 * Cache statistics for OBJC_PRINT_CACHE_SETUP
 226 **********************************************************************/
 227 static unsigned int cache_counts[16];
 228 static size_t cache_allocations;
 229 static size_t cache_collections;
 230 static size_t cache_allocator_regions;
 231
 232 static size_t log2u(size_t x)
 233 {
 234     unsigned int log;
 235
 236     log = 0;
 237     while (x >>= 1)
 238         log += 1;
 239
 240     return log;
 241 }
 242
 243
 244 /***********************************************************************
 245 * _cache_isEmpty.
 246 * Returns YES if the given cache is some empty cache.
 247 * Empty caches should never be allocated on the heap.
 248 **********************************************************************/
 249 static bool _cache_isEmpty(Cache cache)
 250 {
 251     return (cache == NULL  ||  cache == (Cache)&_objc_empty_cache  ||  cache->mask == 0);
 252 }
 253
 254
 255 /***********************************************************************
 256 * _cache_malloc.
 257 *
 258 * Called from _cache_create() and cache_expand()
 259 * Cache locks: cacheUpdateLock must be held by the caller.
 260 **********************************************************************/
 261 static Cache _cache_malloc(uintptr_t slotCount)
 262 {
 263     Cache new_cache;
 264     size_t size;
 265
 266     cacheUpdateLock.assertLocked();
 267
 268     // Allocate table (why not check for failure?)
 269     size = sizeof(struct objc_cache) + TABLE_SIZE(slotCount);
 270 #if defined(OBJC_INSTRUMENTED)
 271     // Custom cache allocator can't handle instrumentation.
 272     size += sizeof(CacheInstrumentation);
 273     new_cache = calloc(size, 1);
 274     new_cache->mask = slotCount - 1;
 275 #elif !defined(CACHE_ALLOCATOR)
 276     // fixme cache allocator implementation isn't 64-bit clean
 277     new_cache = calloc(size, 1);
 278     new_cache->mask = (unsigned int)(slotCount - 1);
 279 #else
 280     if (size < CACHE_ALLOCATOR_MIN) {
 281         new_cache = (Cache)calloc(size, 1);
 282         new_cache->mask = slotCount - 1;
 283         // occupied and buckets and instrumentation are all zero
 284     } else {
 285         new_cache = cache_allocator_calloc(size);
 286         // mask is already set
 287         // occupied and buckets and instrumentation are all zero
 288     }
 289 #endif
 290
 291     if (PrintCaches) {
 292         size_t bucket = log2u(slotCount);
 293         if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
 294             cache_counts[bucket]++;
 295         }
 296         cache_allocations++;
 297     }
 298
 299     return new_cache;
 300 }
 301
 302 /***********************************************************************
 303 * _cache_free_block.
 304 *
 305 * Called from _cache_free() and _cache_collect_free().
 306 * block may be a cache or a forward:: entry.
 307 * If block is a cache, forward:: entries it points to will NOT be freed.
 308 * Cache locks: cacheUpdateLock must be held by the caller.
 309 **********************************************************************/
 310 static inline int isPowerOf2(unsigned long l) { return 1 == __builtin_popcountl(l); }
 311 static void _cache_free_block(void *block)
 312 {
 313     cacheUpdateLock.assertLocked();
 314
 315 #if !TARGET_OS_WIN32
 316     if (PrintCaches) {
 317         Cache cache = (Cache)block;
 318         size_t slotCount = cache->mask + 1;
 319         if (isPowerOf2(slotCount)) {
 320             size_t bucket = log2u(slotCount);
 321             if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
 322                 cache_counts[bucket]--;
 323             }
 324         }
 325     }
 326 #endif
 327
 328 #if defined(CACHE_ALLOCATOR)
 329     if (cache_allocator_is_block(block)) {
 330         cache_allocator_free(block);
 331     } else
 332 #endif
 333     {
 334         free(block);
 335     }
 336 }
 337
 338
 339 /***********************************************************************
 340 * _cache_free.
 341 *
 342 * Called from _objc_remove_classes_in_image().
 343 * forward:: entries in the cache ARE freed.
 344 * Cache locks: cacheUpdateLock must NOT be held by the caller.
 345 **********************************************************************/
 346 void _cache_free(Cache cache)
 347 {
 348     unsigned int i;
 349
 350     mutex_locker_t lock(cacheUpdateLock);
 351
 352     for (i = 0; i < cache->mask + 1; i++) {
 353         cache_entry *entry = (cache_entry *)cache->buckets[i];
 354         if (entry  &&  entry->imp == _objc_msgForward_impcache) {
 355             _cache_free_block(entry);
 356         }
 357     }
 358
 359     _cache_free_block(cache);
 360 }
 361
 362
 363 /***********************************************************************
 364 * _cache_create.
 365 *
 366 * Called from _cache_expand().
 367 * Cache locks: cacheUpdateLock must be held by the caller.
 368 **********************************************************************/
 369 static Cache _cache_create(Class cls)
 370 {
 371     Cache new_cache;
 372
 373     cacheUpdateLock.assertLocked();
 374
 375     // Allocate new cache block
 376     new_cache = _cache_malloc(INIT_CACHE_SIZE);
 377
 378     // Install the cache
 379     cls->cache = new_cache;
 380
 381     // Clear the grow flag so that we will re-use the current storage,
 382     // rather than actually grow the cache, when expanding the cache
 383     // for the first time
 384     if (_class_slow_grow) {
 385         cls->setShouldGrowCache(false);
 386     }
 387
 388     // Return our creation
 389     return new_cache;
 390 }
 391
 392
 393 /***********************************************************************
 394 * _cache_expand.
 395 *
 396 * Called from _cache_fill ()
 397 * Cache locks: cacheUpdateLock must be held by the caller.
 398 **********************************************************************/
 399 static Cache _cache_expand(Class cls)
 400 {
 401     Cache old_cache;
 402     Cache new_cache;
 403     uintptr_t slotCount;
 404     uintptr_t index;
 405
 406     cacheUpdateLock.assertLocked();
 407
 408     // First growth goes from empty cache to a real one
 409     old_cache = cls->cache;
 410     if (_cache_isEmpty(old_cache))
 411         return _cache_create (cls);
 412
 413     if (_class_slow_grow) {
 414         // Cache grows every other time only.
 415         if (cls->shouldGrowCache()) {
 416             // Grow the cache this time. Don't grow next time.
 417             cls->setShouldGrowCache(false);
 418         }
 419         else {
 420             // Reuse the current cache storage this time. Do grow next time.
 421             cls->setShouldGrowCache(true);
 422
 423             // Clear the valid-entry counter
 424             old_cache->occupied = 0;
 425
 426             // Invalidate all the cache entries
 427             for (index = 0; index < old_cache->mask + 1; index += 1)
 428             {
 429                 // Remember what this entry was, so we can possibly
 430                 // deallocate it after the bucket has been invalidated
 431                 cache_entry *oldEntry = (cache_entry *)old_cache->buckets[index];
 432
 433                 // Skip invalid entry
 434                 if (!oldEntry)
 435                     continue;
 436
 437                 // Invalidate this entry
 438                 old_cache->buckets[index] = NULL;
 439
 440                 // Deallocate "forward::" entry
 441                 if (oldEntry->imp == _objc_msgForward_impcache) {
 442                     _cache_collect_free (oldEntry, sizeof(cache_entry));
 443                 }
 444             }
 445
 446             // Return the same old cache, freshly emptied
 447             return old_cache;
 448         }
 449     }
 450
 451     // Double the cache size
 452     slotCount = (old_cache->mask + 1) << 1;
 453
 454     new_cache = _cache_malloc(slotCount);
 455
 456 #ifdef OBJC_INSTRUMENTED
 457     // Propagate the instrumentation data
 458     {
 459         CacheInstrumentation *oldCacheData;
 460         CacheInstrumentation *newCacheData;
 461
 462         oldCacheData = CACHE_INSTRUMENTATION(old_cache);
 463         newCacheData = CACHE_INSTRUMENTATION(new_cache);
 464         bcopy ((const char *)oldCacheData, (char *)newCacheData, sizeof(CacheInstrumentation));
 465     }
 466 #endif
 467
 468     // Deallocate "forward::" entries from the old cache
 469     for (index = 0; index < old_cache->mask + 1; index++) {
 470         cache_entry *entry = (cache_entry *)old_cache->buckets[index];
 471         if (entry && entry->imp == _objc_msgForward_impcache) {
 472             _cache_collect_free (entry, sizeof(cache_entry));
 473         }
 474     }
 475
 476     // Install new cache
 477     cls->cache = new_cache;
 478
 479     // Deallocate old cache, try freeing all the garbage
 480     _cache_collect_free (old_cache, old_cache->mask * sizeof(cache_entry *));
 481     _cache_collect(false);
 482
 483     return new_cache;
 484 }
 485
 486
 487 /***********************************************************************
 488 * _cache_fill.  Add the specified method to the specified class' cache.
 489 * Returns NO if the cache entry wasn't added: cache was busy,
 490 *  class is still being initialized, new entry is a duplicate.
 491 *
 492 * Called only from _class_lookupMethodAndLoadCache and
 493 * class_respondsToMethod and _cache_addForwardEntry.
 494 *
 495 * Cache locks: cacheUpdateLock must not be held.
 496 **********************************************************************/
 497 bool _cache_fill(Class cls, Method smt, SEL sel)
 498 {
 499     uintptr_t newOccupied;
 500     uintptr_t index;
 501     cache_entry **buckets;
 502     cache_entry *entry;
 503     Cache cache;
 504
 505     cacheUpdateLock.assertUnlocked();
 506
 507     // Never cache before +initialize is done
 508     if (!cls->isInitialized()) {
 509         return NO;
 510     }
 511
 512     // Keep tally of cache additions
 513     totalCacheFills += 1;
 514
 515     mutex_locker_t lock(cacheUpdateLock);
 516
 517     entry = (cache_entry *)smt;
 518
 519     cache = cls->cache;
 520
 521     // Make sure the entry wasn't added to the cache by some other thread
 522     // before we grabbed the cacheUpdateLock.
 523     // Don't use _cache_getMethod() because _cache_getMethod() doesn't
 524     // return forward:: entries.
 525     if (_cache_getImp(cls, sel)) {
 526         return NO; // entry is already cached, didn't add new one
 527     }
 528
 529     // Use the cache as-is if it is less than 3/4 full
 530     newOccupied = cache->occupied + 1;
 531     if ((newOccupied * 4) <= (cache->mask + 1) * 3) {
 532         // Cache is less than 3/4 full.
 533         cache->occupied = (unsigned int)newOccupied;
 534     } else {
 535         // Cache is too full. Expand it.
 536         cache = _cache_expand (cls);
 537
 538         // Account for the addition
 539         cache->occupied += 1;
 540     }
 541
 542     // Scan for the first unused slot and insert there.
 543     // There is guaranteed to be an empty slot because the
 544     // minimum size is 4 and we resized at 3/4 full.
 545     buckets = (cache_entry **)cache->buckets;
 546     for (index = CACHE_HASH(sel, cache->mask);
 547          buckets[index] != NULL;
 548          index = (index+1) & cache->mask)
 549     {
 550         // empty
 551     }
 552     buckets[index] = entry;
 553
 554     return YES; // successfully added new cache entry
 555 }
 556
 557
 558 /***********************************************************************
 559 * _cache_addForwardEntry
 560 * Add a forward:: entry  for the given selector to cls's method cache.
 561 * Does nothing if the cache addition fails for any reason.
 562 * Called from class_respondsToMethod and _class_lookupMethodAndLoadCache.
 563 * Cache locks: cacheUpdateLock must not be held.
 564 **********************************************************************/
 565 void _cache_addForwardEntry(Class cls, SEL sel)
 566 {
 567     cache_entry *smt;
 568
 569     smt = (cache_entry *)malloc(sizeof(cache_entry));
 570     smt->name = sel;
 571     smt->imp = _objc_msgForward_impcache;
 572     if (! _cache_fill(cls, (Method)smt, sel)) {  // fixme hack
 573         // Entry not added to cache. Don't leak the method struct.
 574         free(smt);
 575     }
 576 }
 577
 578
 579 /***********************************************************************
 580 * _cache_flush.  Invalidate all valid entries in the given class' cache.
 581 *
 582 * Called from flush_caches() and _cache_fill()
 583 * Cache locks: cacheUpdateLock must be held by the caller.
 584 **********************************************************************/
 585 void _cache_flush(Class cls)
 586 {
 587     Cache cache;
 588     unsigned int index;
 589
 590     cacheUpdateLock.assertLocked();
 591
 592     // Locate cache.  Ignore unused cache.
 593     cache = cls->cache;
 594     if (_cache_isEmpty(cache)) return;
 595
 596 #ifdef OBJC_INSTRUMENTED
 597     {
 598         CacheInstrumentation *cacheData;
 599
 600         // Tally this flush
 601         cacheData = CACHE_INSTRUMENTATION(cache);
 602         cacheData->flushCount += 1;
 603         cacheData->flushedEntries += cache->occupied;
 604         if (cache->occupied > cacheData->maxFlushedEntries)
 605             cacheData->maxFlushedEntries = cache->occupied;
 606     }
 607 #endif
 608
 609     // Traverse the cache
 610     for (index = 0; index <= cache->mask; index += 1)
 611     {
 612         // Remember what this entry was, so we can possibly
 613         // deallocate it after the bucket has been invalidated
 614         cache_entry *oldEntry = (cache_entry *)cache->buckets[index];
 615
 616         // Invalidate this entry
 617         cache->buckets[index] = NULL;
 618
 619         // Deallocate "forward::" entry
 620         if (oldEntry && oldEntry->imp == _objc_msgForward_impcache)
 621             _cache_collect_free (oldEntry, sizeof(cache_entry));
 622     }
 623
 624     // Clear the valid-entry counter
 625     cache->occupied = 0;
 626 }
 627
 628
 629 /***********************************************************************
 630 * flush_cache.  Flushes the instance method cache for class cls only.
 631 * Use flush_caches() if cls might have in-use subclasses.
 632 **********************************************************************/
 633 void flush_cache(Class cls)
 634 {
 635     if (cls) {
 636         mutex_locker_t lock(cacheUpdateLock);
 637         _cache_flush(cls);
 638     }
 639 }
 640
 641
 642 /***********************************************************************
 643 * cache collection.
 644 **********************************************************************/
 645
 646 #if !TARGET_OS_WIN32
 647
 648 // A sentinel (magic value) to report bad thread_get_state status.
 649 // Must not be a valid PC.
 650 // Must not be zero - thread_get_state() on a new thread returns PC == 0.
 651 #define PC_SENTINEL  1
 652
 653 // UNIX03 compliance hack (4508809)
 654 #if !__DARWIN_UNIX03
 655 #define __srr0 srr0
 656 #define __eip eip
 657 #endif
 658
 659 static uintptr_t _get_pc_for_thread(thread_t thread)
 660 #if defined(__i386__)
 661 {
 662     i386_thread_state_t state;
 663     unsigned int count = i386_THREAD_STATE_COUNT;
 664     kern_return_t okay = thread_get_state (thread, i386_THREAD_STATE, (thread_state_t)&state, &count);
 665     return (okay == KERN_SUCCESS) ? state.__eip : PC_SENTINEL;
 666 }
 667 #elif defined(__x86_64__)
 668 {
 669     x86_thread_state64_t                        state;
 670     unsigned int count = x86_THREAD_STATE64_COUNT;
 671     kern_return_t okay = thread_get_state (thread, x86_THREAD_STATE64, (thread_state_t)&state, &count);
 672     return (okay == KERN_SUCCESS) ? state.__rip : PC_SENTINEL;
 673 }
 674 #elif defined(__arm__)
 675 {
 676     arm_thread_state_t state;
 677     unsigned int count = ARM_THREAD_STATE_COUNT;
 678     kern_return_t okay = thread_get_state (thread, ARM_THREAD_STATE, (thread_state_t)&state, &count);
 679     return (okay == KERN_SUCCESS) ? state.__pc : PC_SENTINEL;
 680 }
 681 #else
 682 {
 683 #error _get_pc_for_thread () not implemented for this architecture
 684 }
 685 #endif
 686
 687 #endif
 688
 689 /***********************************************************************
 690 * _collecting_in_critical.
 691 * Returns TRUE if some thread is currently executing a cache-reading
 692 * function. Collection of cache garbage is not allowed when a cache-
 693 * reading function is in progress because it might still be using
 694 * the garbage memory.
 695 **********************************************************************/
 696 typedef struct {
 697     uint64_t          location;
 698     unsigned short    length;
 699     unsigned short    recovery_offs;
 700     unsigned int      flags;
 701 } task_restartable_range_t;
 702
 703 extern "C" task_restartable_range_t objc_restartableRanges[];
 704
 705 static int _collecting_in_critical(void)
 706 {
 707 #if TARGET_OS_WIN32
 708     return TRUE;
 709 #else
 710     thread_act_port_array_t threads;
 711     unsigned number;
 712     unsigned count;
 713     kern_return_t ret;
 714     int result;
 715
 716     mach_port_t mythread = pthread_mach_thread_np(objc_thread_self());
 717
 718     // Get a list of all the threads in the current task
 719     ret = task_threads (mach_task_self (), &threads, &number);
 720     if (ret != KERN_SUCCESS)
 721     {
 722         _objc_fatal("task_thread failed (result %d)\n", ret);
 723     }
 724
 725     // Check whether any thread is in the cache lookup code
 726     result = FALSE;
 727     for (count = 0; count < number; count++)
 728     {
 729         int region;
 730         uintptr_t pc;
 731
 732         // Don't bother checking ourselves
 733         if (threads[count] == mythread)
 734             continue;
 735
 736         // Find out where thread is executing
 737         pc = _get_pc_for_thread (threads[count]);
 738
 739         // Check for bad status, and if so, assume the worse (can't collect)
 740         if (pc == PC_SENTINEL)
 741         {
 742             result = TRUE;
 743             goto done;
 744         }
 745
 746         // Check whether it is in the cache lookup code
 747         for (region = 0; objc_restartableRanges[region].location != 0; region++)
 748         {
 749             uint32_t loc = (uint32_t)objc_restartableRanges[region].location;
 750             if ((pc > loc) &&
 751                 (pc - loc) < objc_restartableRanges[region].length)
 752             {
 753                 result = TRUE;
 754                 goto done;
 755             }
 756         }
 757     }
 758
 759  done:
 760     // Deallocate the port rights for the threads
 761     for (count = 0; count < number; count++) {
 762         mach_port_deallocate(mach_task_self (), threads[count]);
 763     }
 764
 765     // Deallocate the thread list
 766     vm_deallocate (mach_task_self (), (vm_address_t) threads, sizeof(threads[0]) * number);
 767
 768     // Return our finding
 769     return result;
 770 #endif
 771 }
 772
 773
 774 /***********************************************************************
 775 * _garbage_make_room.  Ensure that there is enough room for at least
 776 * one more ref in the garbage.
 777 **********************************************************************/
 778
 779 // amount of memory represented by all refs in the garbage
 780 static size_t garbage_byte_size = 0;
 781
 782 // do not empty the garbage until garbage_byte_size gets at least this big
 783 static size_t garbage_threshold = 1024;
 784
 785 // table of refs to free
 786 static void **garbage_refs = 0;
 787
 788 // current number of refs in garbage_refs
 789 static size_t garbage_count = 0;
 790
 791 // capacity of current garbage_refs
 792 static size_t garbage_max = 0;
 793
 794 // capacity of initial garbage_refs
 795 enum {
 796     INIT_GARBAGE_COUNT = 128
 797 };
 798
 799 static void _garbage_make_room(void)
 800 {
 801     static int first = 1;
 802
 803     // Create the collection table the first time it is needed
 804     if (first)
 805     {
 806         first = 0;
 807         garbage_refs = (void**)
 808             malloc(INIT_GARBAGE_COUNT * sizeof(void *));
 809         garbage_max = INIT_GARBAGE_COUNT;
 810     }
 811
 812     // Double the table if it is full
 813     else if (garbage_count == garbage_max)
 814     {
 815         garbage_refs = (void**)
 816             realloc(garbage_refs, garbage_max * 2 * sizeof(void *));
 817         garbage_max *= 2;
 818     }
 819 }
 820
 821
 822 /***********************************************************************
 823 * _cache_collect_free.  Add the specified malloc'd memory to the list
 824 * of them to free at some later point.
 825 * size is used for the collection threshold. It does not have to be
 826 * precisely the block's size.
 827 * Cache locks: cacheUpdateLock must be held by the caller.
 828 **********************************************************************/
 829 static void _cache_collect_free(void *data, size_t size)
 830 {
 831     cacheUpdateLock.assertLocked();
 832
 833     _garbage_make_room ();
 834     garbage_byte_size += size;
 835     garbage_refs[garbage_count++] = data;
 836 }
 837
 838
 839 /***********************************************************************
 840 * _cache_collect.  Try to free accumulated dead caches.
 841 * collectALot tries harder to free memory.
 842 * Cache locks: cacheUpdateLock must be held by the caller.
 843 **********************************************************************/
 844 void _cache_collect(bool collectALot)
 845 {
 846     cacheUpdateLock.assertLocked();
 847
 848     // Done if the garbage is not full
 849     if (garbage_byte_size < garbage_threshold  &&  !collectALot) {
 850         return;
 851     }
 852
 853     // Synchronize collection with objc_msgSend and other cache readers
 854     if (!collectALot) {
 855         if (_collecting_in_critical ()) {
 856             // objc_msgSend (or other cache reader) is currently looking in
 857             // the cache and might still be using some garbage.
 858             if (PrintCaches) {
 859                 _objc_inform ("CACHES: not collecting; "
 860                               "objc_msgSend in progress");
 861             }
 862             return;
 863         }
 864     }
 865     else {
 866         // No excuses.
 867         while (_collecting_in_critical())
 868             ;
 869     }
 870
 871     // No cache readers in progress - garbage is now deletable
 872
 873     // Log our progress
 874     if (PrintCaches) {
 875         cache_collections++;
 876         _objc_inform ("CACHES: COLLECTING %zu bytes (%zu regions, %zu allocations, %zu collections)", garbage_byte_size, cache_allocator_regions, cache_allocations, cache_collections);
 877     }
 878
 879     // Dispose all refs now in the garbage
 880     while (garbage_count--) {
 881         _cache_free_block(garbage_refs[garbage_count]);
 882     }
 883
 884     // Clear the garbage count and total size indicator
 885     garbage_count = 0;
 886     garbage_byte_size = 0;
 887
 888     if (PrintCaches) {
 889         size_t i;
 890         size_t total = 0;
 891         size_t ideal_total = 0;
 892         size_t malloc_total = 0;
 893         size_t local_total = 0;
 894
 895         for (i = 0; i < sizeof(cache_counts) / sizeof(cache_counts[0]); i++) {
 896             int count = cache_counts[i];
 897             int slots = 1 << i;
 898             size_t size = sizeof(struct objc_cache) + TABLE_SIZE(slots);
 899             size_t ideal = size;
 900 #if TARGET_OS_WIN32
 901             size_t malloc = size;
 902 #else
 903             size_t malloc = malloc_good_size(size);
 904 #endif
 905             size_t local = size < CACHE_ALLOCATOR_MIN ? malloc : cache_allocator_size_for_mask(cache_allocator_mask_for_size(size));
 906
 907             if (!count) continue;
 908
 909             _objc_inform("CACHES: %4d slots: %4d caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", slots, count, ideal*count, malloc*count, local*count, malloc*count-ideal*count, local*count-ideal*count);
 910
 911             total += count;
 912             ideal_total += ideal*count;
 913             malloc_total += malloc*count;
 914             local_total += local*count;
 915         }
 916
 917         _objc_inform("CACHES:      total: %4zu caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", total, ideal_total, malloc_total, local_total, malloc_total-ideal_total, local_total-ideal_total);
 918     }
 919 }
 920
 921
 922
 923
 924
 925 #if defined(CACHE_ALLOCATOR)
 926
 927 /***********************************************************************
 928 * Custom method cache allocator.
 929 * Method cache block sizes are 2^slots+2 words, which is a pessimal
 930 * case for the system allocator. It wastes 504 bytes per cache block
 931 * with 128 or more slots, which adds up to tens of KB for an AppKit process.
 932 * To save memory, the custom cache allocator below is used.
 933 *
 934 * The cache allocator uses 128 KB allocation regions. Few processes will
 935 * require a second region. Within a region, allocation is address-ordered
 936 * first fit.
 937 *
 938 * The cache allocator uses a quantum of 520.
 939 * Cache block ideal sizes: 520, 1032, 2056, 4104
 940 * Cache allocator sizes:   520, 1040, 2080, 4160
 941 *
 942 * Because all blocks are known to be genuine method caches, the ordinary
 943 * cache->mask and cache->occupied fields are used as block headers.
 944 * No out-of-band headers are maintained. The number of blocks will
 945 * almost always be fewer than 200, so for simplicity there is no free
 946 * list or other optimization.
 947 *
 948 * Block in use: mask != 0, occupied != -1 (mask indicates block size)
 949 * Block free:   mask != 0, occupied == -1 (mask is precisely block size)
 950 *
 951 * No cache allocator functions take any locks. Instead, the caller
 952 * must hold the cacheUpdateLock.
 953 *
 954 * fixme with 128 KB regions and 520 B min block size, an allocation
 955 * bitmap would be only 32 bytes - better than free list?
 956 **********************************************************************/
 957
 958 typedef struct cache_allocator_block {
 959     uintptr_t size;
 960     uintptr_t state;
 961     struct cache_allocator_block *nextFree;
 962 } cache_allocator_block;
 963
 964 typedef struct cache_allocator_region {
 965     cache_allocator_block *start;
 966     cache_allocator_block *end;    // first non-block address
 967     cache_allocator_block *freeList;
 968     struct cache_allocator_region *next;
 969 } cache_allocator_region;
 970
 971 static cache_allocator_region *cacheRegion = NULL;
 972
 973
 974 /***********************************************************************
 975 * cache_allocator_add_region
 976 * Allocates and returns a new region that can hold at least size
 977 *   bytes of large method caches.
 978 * The actual size will be rounded up to a CACHE_QUANTUM boundary,
 979 *   with a minimum of CACHE_REGION_SIZE.
 980 * The new region is lowest-priority for new allocations. Callers that
 981 *   know the other regions are already full should allocate directly
 982 *   into the returned region.
 983 **********************************************************************/
 984 static cache_allocator_region *cache_allocator_add_region(size_t size)
 985 {
 986     vm_address_t addr;
 987     cache_allocator_block *b;
 988     cache_allocator_region **rgnP;
 989     cache_allocator_region *newRegion = (cache_allocator_region *)
 990         calloc(1, sizeof(cache_allocator_region));
 991
 992     // Round size up to quantum boundary, and apply the minimum size.
 993     size += CACHE_QUANTUM - (size % CACHE_QUANTUM);
 994     if (size < CACHE_REGION_SIZE) size = CACHE_REGION_SIZE;
 995
 996     // Allocate the region
 997     addr = (vm_address_t)calloc(size, 1);
 998     newRegion->start = (cache_allocator_block *)addr;
 999     newRegion->end = (cache_allocator_block *)(addr + size);
1000
1001     // Mark the first block: free and covers the entire region
1002     b = newRegion->start;
1003     b->size = size;
1004     b->state = (uintptr_t)-1;
1005     b->nextFree = NULL;
1006     newRegion->freeList = b;
1007
1008     // Add to end of the linked list of regions.
1009     // Other regions should be re-used before this one is touched.
1010     newRegion->next = NULL;
1011     rgnP = &cacheRegion;
1012     while (*rgnP) {
1013         rgnP = &(**rgnP).next;
1014     }
1015     *rgnP = newRegion;
1016
1017     cache_allocator_regions++;
1018
1019     return newRegion;
1020 }
1021
1022
1023 /***********************************************************************
1024 * cache_allocator_coalesce
1025 * Attempts to coalesce a free block with the single free block following
1026 * it in the free list, if any.
1027 **********************************************************************/
1028 static void cache_allocator_coalesce(cache_allocator_block *block)
1029 {
1030     if (block->size + (uintptr_t)block == (uintptr_t)block->nextFree) {
1031         block->size += block->nextFree->size;
1032         block->nextFree = block->nextFree->nextFree;
1033     }
1034 }
1035
1036
1037 /***********************************************************************
1038 * cache_region_calloc
1039 * Attempt to allocate a size-byte block in the given region.
1040 * Allocation is first-fit. The free list is already fully coalesced.
1041 * Returns NULL if there is not enough room in the region for the block.
1042 **********************************************************************/
1043 static void *cache_region_calloc(cache_allocator_region *rgn, size_t size)
1044 {
1045     cache_allocator_block **blockP;
1046     uintptr_t mask;
1047
1048     // Save mask for allocated block, then round size
1049     // up to CACHE_QUANTUM boundary
1050     mask = cache_allocator_mask_for_size(size);
1051     size = cache_allocator_size_for_mask(mask);
1052
1053     // Search the free list for a sufficiently large free block.
1054
1055     for (blockP = &rgn->freeList;
1056          *blockP != NULL;
1057          blockP = &(**blockP).nextFree)
1058     {
1059         cache_allocator_block *block = *blockP;
1060         if (block->size < size) continue;  // not big enough
1061
1062         // block is now big enough. Allocate from it.
1063
1064         // Slice off unneeded fragment of block, if any,
1065         // and reconnect the free list around block.
1066         if (block->size - size >= CACHE_QUANTUM) {
1067             cache_allocator_block *leftover =
1068                 (cache_allocator_block *)(size + (uintptr_t)block);
1069             leftover->size = block->size - size;
1070             leftover->state = (uintptr_t)-1;
1071             leftover->nextFree = block->nextFree;
1072             *blockP = leftover;
1073         } else {
1074             *blockP = block->nextFree;
1075         }
1076
1077         // block is now exactly the right size.
1078
1079         bzero(block, size);
1080         block->size = mask;  // Cache->mask
1081         block->state = 0;    // Cache->occupied
1082
1083         return block;
1084     }
1085
1086     // No room in this region.
1087     return NULL;
1088 }
1089
1090
1091 /***********************************************************************
1092 * cache_allocator_calloc
1093 * Custom allocator for large method caches (128+ slots)
1094 * The returned cache block already has cache->mask set.
1095 * cache->occupied and the cache contents are zero.
1096 * Cache locks: cacheUpdateLock must be held by the caller
1097 **********************************************************************/
1098 static Cache cache_allocator_calloc(size_t size)
1099 {
1100     cache_allocator_region *rgn;
1101
1102     cacheUpdateLock.assertLocked();
1103
1104     for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1105         void *p = cache_region_calloc(rgn, size);
1106         if (p) {
1107             return (Cache)p;
1108         }
1109     }
1110
1111     // No regions or all regions full - make a region and try one more time
1112     // In the unlikely case of a cache over 256KB, it will get its own region.
1113     return (Cache)cache_region_calloc(cache_allocator_add_region(size), size);
1114 }
1115
1116
1117 /***********************************************************************
1118 * cache_allocator_region_for_block
1119 * Returns the cache allocator region that ptr points into, or NULL.
1120 **********************************************************************/
1121 static cache_allocator_region *cache_allocator_region_for_block(cache_allocator_block *block)
1122 {
1123     cache_allocator_region *rgn;
1124     for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1125         if (block >= rgn->start  &&  block < rgn->end) return rgn;
1126     }
1127     return NULL;
1128 }
1129
1130
1131 /***********************************************************************
1132 * cache_allocator_is_block
1133 * If ptr is a live block from the cache allocator, return YES
1134 * If ptr is a block from some other allocator, return NO.
1135 * If ptr is a dead block from the cache allocator, result is undefined.
1136 * Cache locks: cacheUpdateLock must be held by the caller
1137 **********************************************************************/
1138 static bool cache_allocator_is_block(void *ptr)
1139 {
1140     cacheUpdateLock.assertLocked();
1141     return (cache_allocator_region_for_block((cache_allocator_block *)ptr) != NULL);
1142 }
1143
1144 /***********************************************************************
1145 * cache_allocator_free
1146 * Frees a block allocated by the cache allocator.
1147 * Cache locks: cacheUpdateLock must be held by the caller.
1148 **********************************************************************/
1149 static void cache_allocator_free(void *ptr)
1150 {
1151     cache_allocator_block *dead = (cache_allocator_block *)ptr;
1152     cache_allocator_block *cur;
1153     cache_allocator_region *rgn;
1154
1155     cacheUpdateLock.assertLocked();
1156
1157     if (! (rgn = cache_allocator_region_for_block(dead))) {
1158         // free of non-pointer
1159         _objc_inform("cache_allocator_free of non-pointer %p", dead);
1160         return;
1161     }
1162
1163     dead->size = cache_allocator_size_for_mask(dead->size);
1164     dead->state = (uintptr_t)-1;
1165
1166     if (!rgn->freeList  ||  rgn->freeList > dead) {
1167         // dead block belongs at front of free list
1168         dead->nextFree = rgn->freeList;
1169         rgn->freeList = dead;
1170         cache_allocator_coalesce(dead);
1171         return;
1172     }
1173
1174     // dead block belongs in the middle or end of free list
1175     for (cur = rgn->freeList; cur != NULL; cur = cur->nextFree) {
1176         cache_allocator_block *ahead = cur->nextFree;
1177
1178         if (!ahead  ||  ahead > dead) {
1179             // cur and ahead straddle dead, OR dead belongs at end of free list
1180             cur->nextFree = dead;
1181             dead->nextFree = ahead;
1182
1183             // coalesce into dead first in case both succeed
1184             cache_allocator_coalesce(dead);
1185             cache_allocator_coalesce(cur);
1186             return;
1187         }
1188     }
1189
1190     // uh-oh
1191     _objc_inform("cache_allocator_free of non-pointer %p", ptr);
1192 }
1193
1194 // defined(CACHE_ALLOCATOR)
1195 #endif
1196
1197 /***********************************************************************
1198 * Cache instrumentation and debugging
1199 **********************************************************************/
1200
1201 #ifdef OBJC_INSTRUMENTED
1202 enum {
1203     CACHE_HISTOGRAM_SIZE        = 512
1204 };
1205
1206 unsigned int    CacheHitHistogram [CACHE_HISTOGRAM_SIZE];
1207 unsigned int    CacheMissHistogram [CACHE_HISTOGRAM_SIZE];
1208 #endif
1209
1210
1211 /***********************************************************************
1212 * _cache_print.
1213 **********************************************************************/
1214 static void _cache_print(Cache cache)
1215 {
1216     uintptr_t index;
1217     uintptr_t count;
1218
1219     count = cache->mask + 1;
1220     for (index = 0; index < count; index += 1) {
1221         cache_entry *entry = (cache_entry *)cache->buckets[index];
1222         if (entry) {
1223             if (entry->imp == _objc_msgForward_impcache)
1224                 printf ("does not recognize: \n");
1225             printf ("%s\n", sel_getName(entry->name));
1226         }
1227     }
1228 }
1229
1230
1231 /***********************************************************************
1232 * _class_printMethodCaches.
1233 **********************************************************************/
1234 void _class_printMethodCaches(Class cls)
1235 {
1236     if (_cache_isEmpty(cls->cache)) {
1237         printf("no instance-method cache for class %s\n",cls->nameForLogging());
1238     } else {
1239         printf("instance-method cache for class %s:\n", cls->nameForLogging());
1240         _cache_print(cls->cache);
1241     }
1242
1243     if (_cache_isEmpty(cls->ISA()->cache)) {
1244         printf("no class-method cache for class %s\n", cls->nameForLogging());
1245     } else {
1246         printf ("class-method cache for class %s:\n", cls->nameForLogging());
1247         _cache_print(cls->ISA()->cache);
1248     }
1249 }
1250
1251
1252 #if 0
1253 #warning fixme
1254
1255
1256 /***********************************************************************
1257 * _class_printDuplicateCacheEntries.
1258 **********************************************************************/
1259 void _class_printDuplicateCacheEntries(bool detail)
1260 {
1261     NXHashState state;
1262     Class cls;
1263     unsigned int duplicates;
1264     unsigned int index1;
1265     unsigned int index2;
1266     unsigned int mask;
1267     unsigned int count;
1268     unsigned int isMeta;
1269     Cache cache;
1270
1271
1272     printf ("Checking for duplicate cache entries \n");
1273
1274     // Outermost loop - iterate over all classes
1275     state = NXInitHashState (class_hash);
1276     duplicates = 0;
1277     while (NXNextHashState (class_hash, &state, (void **) &cls))
1278     {
1279         // Control loop - do given class' cache, then its isa's cache
1280         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1281         {
1282             // Select cache of interest and make sure it exists
1283             cache = (isMeta ? cls->ISA : cls)->cache;
1284             if (_cache_isEmpty(cache))
1285                 continue;
1286
1287             // Middle loop - check each entry in the given cache
1288             mask  = cache->mask;
1289             count = mask + 1;
1290             for (index1 = 0; index1 < count; index1 += 1)
1291             {
1292                 // Skip invalid entry
1293                 if (!cache->buckets[index1])
1294                     continue;
1295
1296                 // Inner loop - check that given entry matches no later entry
1297                 for (index2 = index1 + 1; index2 < count; index2 += 1)
1298                 {
1299                     // Skip invalid entry
1300                     if (!cache->buckets[index2])
1301                         continue;
1302
1303                     // Check for duplication by method name comparison
1304                     if (strcmp ((char *) cache->buckets[index1]->name),
1305                                 (char *) cache->buckets[index2]->name)) == 0)
1306                     {
1307                         if (detail)
1308                             printf ("%s %s\n", cls->nameForLogging(), sel_getName(cache->buckets[index1]->name));
1309                         duplicates += 1;
1310                         break;
1311                     }
1312                 }
1313             }
1314         }
1315     }
1316
1317     // Log the findings
1318     printf ("duplicates = %d\n", duplicates);
1319     printf ("total cache fills = %d\n", totalCacheFills);
1320 }
1321
1322
1323 /***********************************************************************
1324 * PrintCacheHeader.
1325 **********************************************************************/
1326 static void PrintCacheHeader(void)
1327 {
1328 #ifdef OBJC_INSTRUMENTED
1329     printf ("Cache  Cache  Slots  Avg    Max   AvgS  MaxS  AvgS  MaxS  TotalD   AvgD  MaxD  TotalD   AvgD  MaxD  TotD  AvgD  MaxD\n");
1330     printf ("Size   Count  Used   Used   Used  Hit   Hit   Miss  Miss  Hits     Prbs  Prbs  Misses   Prbs  Prbs  Flsh  Flsh  Flsh\n");
1331     printf ("-----  -----  -----  -----  ----  ----  ----  ----  ----  -------  ----  ----  -------  ----  ----  ----  ----  ----\n");
1332 #else
1333     printf ("Cache  Cache  Slots  Avg    Max   AvgS  MaxS  AvgS  MaxS\n");
1334     printf ("Size   Count  Used   Used   Used  Hit   Hit   Miss  Miss\n");
1335     printf ("-----  -----  -----  -----  ----  ----  ----  ----  ----\n");
1336 #endif
1337 }
1338
1339
1340 /***********************************************************************
1341 * PrintCacheInfo.
1342 **********************************************************************/
1343 static void PrintCacheInfo(unsigned int cacheSize,
1344                            unsigned int cacheCount,
1345                            unsigned int slotsUsed,
1346                            float avgUsed, unsigned int maxUsed,
1347                            float avgSHit, unsigned int maxSHit,
1348                            float avgSMiss, unsigned int maxSMiss
1349 #ifdef OBJC_INSTRUMENTED
1350                            , unsigned int totDHits,
1351                            float avgDHit,
1352                            unsigned int maxDHit,
1353                            unsigned int totDMisses,
1354                            float avgDMiss,
1355                            unsigned int maxDMiss,
1356                            unsigned int totDFlsh,
1357                            float avgDFlsh,
1358                            unsigned int maxDFlsh
1359 #endif
1360                            )
1361 {
1362 #ifdef OBJC_INSTRUMENTED
1363     printf ("%5u  %5u  %5u  %5.1f  %4u  %4.1f  %4u  %4.1f  %4u  %7u  %4.1f  %4u  %7u  %4.1f  %4u  %4u  %4.1f  %4u\n",
1364 #else
1365             printf ("%5u  %5u  %5u  %5.1f  %4u  %4.1f  %4u  %4.1f  %4u\n",
1366 #endif
1367                     cacheSize, cacheCount, slotsUsed, avgUsed, maxUsed, avgSHit, maxSHit, avgSMiss, maxSMiss
1368 #ifdef OBJC_INSTRUMENTED
1369                     , totDHits, avgDHit, maxDHit, totDMisses, avgDMiss, maxDMiss, totDFlsh, avgDFlsh, maxDFlsh
1370 #endif
1371                     );
1372
1373 }
1374
1375
1376 #ifdef OBJC_INSTRUMENTED
1377 /***********************************************************************
1378 * PrintCacheHistogram.  Show the non-zero entries from the specified
1379 * cache histogram.
1380 **********************************************************************/
1381 static void PrintCacheHistogram(char *title,
1382                                 unsigned int *firstEntry,
1383                                 unsigned int entryCount)
1384 {
1385     unsigned int index;
1386     unsigned int *thisEntry;
1387
1388     printf ("%s\n", title);
1389     printf ("    Probes    Tally\n");
1390     printf ("    ------    -----\n");
1391     for (index = 0, thisEntry = firstEntry;
1392          index < entryCount;
1393          index += 1, thisEntry += 1)
1394     {
1395         if (*thisEntry == 0)
1396             continue;
1397
1398         printf ("    %6d    %5d\n", index, *thisEntry);
1399     }
1400 }
1401 #endif
1402
1403
1404 /***********************************************************************
1405 * _class_printMethodCacheStatistics.
1406 **********************************************************************/
1407
1408 #define MAX_LOG2_SIZE   32
1409 #define MAX_CHAIN_SIZE  100
1410
1411 void _class_printMethodCacheStatistics(void)
1412 {
1413     unsigned int isMeta;
1414     unsigned int index;
1415     NXHashState state;
1416     Class cls;
1417     unsigned int totalChain;
1418     unsigned int totalMissChain;
1419     unsigned int maxChain;
1420     unsigned int maxMissChain;
1421     unsigned int classCount;
1422     unsigned int negativeEntryCount;
1423     unsigned int cacheExpandCount;
1424     unsigned int cacheCountBySize[2][MAX_LOG2_SIZE]        = {{0}};
1425     unsigned int totalEntriesBySize[2][MAX_LOG2_SIZE]      = {{0}};
1426     unsigned int maxEntriesBySize[2][MAX_LOG2_SIZE]        = {{0}};
1427     unsigned int totalChainBySize[2][MAX_LOG2_SIZE]        = {{0}};
1428     unsigned int totalMissChainBySize[2][MAX_LOG2_SIZE]    = {{0}};
1429     unsigned int totalMaxChainBySize[2][MAX_LOG2_SIZE]     = {{0}};
1430     unsigned int totalMaxMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1431     unsigned int maxChainBySize[2][MAX_LOG2_SIZE]          = {{0}};
1432     unsigned int maxMissChainBySize[2][MAX_LOG2_SIZE]      = {{0}};
1433     unsigned int chainCount[MAX_CHAIN_SIZE]                = {0};
1434     unsigned int missChainCount[MAX_CHAIN_SIZE]            = {0};
1435 #ifdef OBJC_INSTRUMENTED
1436     unsigned int hitCountBySize[2][MAX_LOG2_SIZE]          = {{0}};
1437     unsigned int hitProbesBySize[2][MAX_LOG2_SIZE]         = {{0}};
1438     unsigned int maxHitProbesBySize[2][MAX_LOG2_SIZE]      = {{0}};
1439     unsigned int missCountBySize[2][MAX_LOG2_SIZE]         = {{0}};
1440     unsigned int missProbesBySize[2][MAX_LOG2_SIZE]        = {{0}};
1441     unsigned int maxMissProbesBySize[2][MAX_LOG2_SIZE]     = {{0}};
1442     unsigned int flushCountBySize[2][MAX_LOG2_SIZE]        = {{0}};
1443     unsigned int flushedEntriesBySize[2][MAX_LOG2_SIZE]    = {{0}};
1444     unsigned int maxFlushedEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1445 #endif
1446
1447     printf ("Printing cache statistics\n");
1448
1449     // Outermost loop - iterate over all classes
1450     state = NXInitHashState (class_hash);
1451     classCount = 0;
1452     negativeEntryCount = 0;
1453     cacheExpandCount = 0;
1454     while (NXNextHashState (class_hash, &state, (void **) &cls))
1455     {
1456         // Tally classes
1457         classCount += 1;
1458
1459         // Control loop - do given class' cache, then its isa's cache
1460         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1461         {
1462             Cache cache;
1463             unsigned int mask;
1464             unsigned int log2Size;
1465             unsigned int entryCount;
1466
1467             // Select cache of interest
1468             cache = (isMeta ? cls->ISA : cls)->cache;
1469
1470             // Ignore empty cache... should we?
1471             if (_cache_isEmpty(cache))
1472                 continue;
1473
1474             // Middle loop - do each entry in the given cache
1475             mask = cache->mask;
1476             entryCount = 0;
1477             totalChain = 0;
1478             totalMissChain = 0;
1479             maxChain = 0;
1480             maxMissChain = 0;
1481             for (index = 0; index < mask + 1; index += 1)
1482             {
1483                 cache_entry **buckets;
1484                 cache_entry *entry;
1485                 unsigned int hash;
1486                 unsigned int methodChain;
1487                 unsigned int methodMissChain;
1488                 unsigned int index2;
1489
1490                 // If entry is invalid, the only item of
1491                 // interest is that future insert hashes
1492                 // to this entry can use it directly.
1493                 buckets = (cache_entry **)cache->buckets;
1494                 if (!buckets[index])
1495                 {
1496                     missChainCount[0] += 1;
1497                     continue;
1498                 }
1499
1500                 entry = buckets[index];
1501
1502                 // Tally valid entries
1503                 entryCount += 1;
1504
1505                 // Tally "forward::" entries
1506                 if (entry->imp == _objc_msgForward_impcache)
1507                     negativeEntryCount += 1;
1508
1509                 // Calculate search distance (chain length) for this method
1510                 // The chain may wrap around to the beginning of the table.
1511                 hash = CACHE_HASH(entry->name, mask);
1512                 if (index >= hash) methodChain = index - hash;
1513                 else methodChain = (mask+1) + index - hash;
1514
1515                 // Tally chains of this length
1516                 if (methodChain < MAX_CHAIN_SIZE)
1517                     chainCount[methodChain] += 1;
1518
1519                 // Keep sum of all chain lengths
1520                 totalChain += methodChain;
1521
1522                 // Record greatest chain length
1523                 if (methodChain > maxChain)
1524                     maxChain = methodChain;
1525
1526                 // Calculate search distance for miss that hashes here
1527                 index2 = index;
1528                 while (buckets[index2])
1529                 {
1530                     index2 += 1;
1531                     index2 &= mask;
1532                 }
1533                 methodMissChain = ((index2 - index) & mask);
1534
1535                 // Tally miss chains of this length
1536                 if (methodMissChain < MAX_CHAIN_SIZE)
1537                     missChainCount[methodMissChain] += 1;
1538
1539                 // Keep sum of all miss chain lengths in this class
1540                 totalMissChain += methodMissChain;
1541
1542                 // Record greatest miss chain length
1543                 if (methodMissChain > maxMissChain)
1544                     maxMissChain = methodMissChain;
1545             }
1546
1547             // Factor this cache into statistics about caches of the same
1548             // type and size (all caches are a power of two in size)
1549             log2Size = log2u (mask + 1);
1550             cacheCountBySize[isMeta][log2Size] += 1;
1551             totalEntriesBySize[isMeta][log2Size] += entryCount;
1552             if (entryCount > maxEntriesBySize[isMeta][log2Size])
1553                 maxEntriesBySize[isMeta][log2Size] = entryCount;
1554             totalChainBySize[isMeta][log2Size] += totalChain;
1555             totalMissChainBySize[isMeta][log2Size] += totalMissChain;
1556             totalMaxChainBySize[isMeta][log2Size] += maxChain;
1557             totalMaxMissChainBySize[isMeta][log2Size] += maxMissChain;
1558             if (maxChain > maxChainBySize[isMeta][log2Size])
1559                 maxChainBySize[isMeta][log2Size] = maxChain;
1560             if (maxMissChain > maxMissChainBySize[isMeta][log2Size])
1561                 maxMissChainBySize[isMeta][log2Size] = maxMissChain;
1562 #ifdef OBJC_INSTRUMENTED
1563             {
1564                 CacheInstrumentation *cacheData;
1565
1566                 cacheData = CACHE_INSTRUMENTATION(cache);
1567                 hitCountBySize[isMeta][log2Size] += cacheData->hitCount;
1568                 hitProbesBySize[isMeta][log2Size] += cacheData->hitProbes;
1569                 if (cacheData->maxHitProbes > maxHitProbesBySize[isMeta][log2Size])
1570                     maxHitProbesBySize[isMeta][log2Size] = cacheData->maxHitProbes;
1571                 missCountBySize[isMeta][log2Size] += cacheData->missCount;
1572                 missProbesBySize[isMeta][log2Size] += cacheData->missProbes;
1573                 if (cacheData->maxMissProbes > maxMissProbesBySize[isMeta][log2Size])
1574                     maxMissProbesBySize[isMeta][log2Size] = cacheData->maxMissProbes;
1575                 flushCountBySize[isMeta][log2Size] += cacheData->flushCount;
1576                 flushedEntriesBySize[isMeta][log2Size] += cacheData->flushedEntries;
1577                 if (cacheData->maxFlushedEntries > maxFlushedEntriesBySize[isMeta][log2Size])
1578                     maxFlushedEntriesBySize[isMeta][log2Size] = cacheData->maxFlushedEntries;
1579             }
1580 #endif
1581             // Caches start with a power of two number of entries, and grow by doubling, so
1582             // we can calculate the number of times this cache has expanded
1583             cacheExpandCount += log2Size - INIT_CACHE_SIZE_LOG2;
1584         }
1585     }
1586
1587     {
1588         unsigned int cacheCountByType[2] = {0};
1589         unsigned int totalCacheCount     = 0;
1590         unsigned int totalEntries        = 0;
1591         unsigned int maxEntries          = 0;
1592         unsigned int totalSlots          = 0;
1593 #ifdef OBJC_INSTRUMENTED
1594         unsigned int totalHitCount       = 0;
1595         unsigned int totalHitProbes      = 0;
1596         unsigned int maxHitProbes        = 0;
1597         unsigned int totalMissCount      = 0;
1598         unsigned int totalMissProbes     = 0;
1599         unsigned int maxMissProbes       = 0;
1600         unsigned int totalFlushCount     = 0;
1601         unsigned int totalFlushedEntries = 0;
1602         unsigned int maxFlushedEntries   = 0;
1603 #endif
1604
1605         totalChain = 0;
1606         maxChain = 0;
1607         totalMissChain = 0;
1608         maxMissChain = 0;
1609
1610         // Sum information over all caches
1611         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1612         {
1613             for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1614             {
1615                 cacheCountByType[isMeta] += cacheCountBySize[isMeta][index];
1616                 totalEntries += totalEntriesBySize[isMeta][index];
1617                 totalSlots += cacheCountBySize[isMeta][index] * (1 << index);
1618                 totalChain += totalChainBySize[isMeta][index];
1619                 if (maxEntriesBySize[isMeta][index] > maxEntries)
1620                     maxEntries = maxEntriesBySize[isMeta][index];
1621                 if (maxChainBySize[isMeta][index] > maxChain)
1622                     maxChain   = maxChainBySize[isMeta][index];
1623                 totalMissChain += totalMissChainBySize[isMeta][index];
1624                 if (maxMissChainBySize[isMeta][index] > maxMissChain)
1625                     maxMissChain = maxMissChainBySize[isMeta][index];
1626 #ifdef OBJC_INSTRUMENTED
1627                 totalHitCount += hitCountBySize[isMeta][index];
1628                 totalHitProbes += hitProbesBySize[isMeta][index];
1629                 if (maxHitProbesBySize[isMeta][index] > maxHitProbes)
1630                     maxHitProbes = maxHitProbesBySize[isMeta][index];
1631                 totalMissCount += missCountBySize[isMeta][index];
1632                 totalMissProbes += missProbesBySize[isMeta][index];
1633                 if (maxMissProbesBySize[isMeta][index] > maxMissProbes)
1634                     maxMissProbes = maxMissProbesBySize[isMeta][index];
1635                 totalFlushCount += flushCountBySize[isMeta][index];
1636                 totalFlushedEntries += flushedEntriesBySize[isMeta][index];
1637                 if (maxFlushedEntriesBySize[isMeta][index] > maxFlushedEntries)
1638                     maxFlushedEntries = maxFlushedEntriesBySize[isMeta][index];
1639 #endif
1640             }
1641
1642             totalCacheCount += cacheCountByType[isMeta];
1643         }
1644
1645         // Log our findings
1646         printf ("There are %u classes\n", classCount);
1647
1648         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1649         {
1650             // Number of this type of class
1651             printf    ("\nThere are %u %s-method caches, broken down by size (slot count):\n",
1652                        cacheCountByType[isMeta],
1653                        isMeta ? "class" : "instance");
1654
1655             // Print header
1656             PrintCacheHeader ();
1657
1658             // Keep format consistent even if there are caches of this kind
1659             if (cacheCountByType[isMeta] == 0)
1660             {
1661                 printf ("(none)\n");
1662                 continue;
1663             }
1664
1665             // Usage information by cache size
1666             for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1667             {
1668                 unsigned int cacheCount;
1669                 unsigned int cacheSlotCount;
1670                 unsigned int cacheEntryCount;
1671
1672                 // Get number of caches of this type and size
1673                 cacheCount = cacheCountBySize[isMeta][index];
1674                 if (cacheCount == 0)
1675                     continue;
1676
1677                 // Get the cache slot count and the total number of valid entries
1678                 cacheSlotCount  = (1 << index);
1679                 cacheEntryCount = totalEntriesBySize[isMeta][index];
1680
1681                 // Give the analysis
1682                 PrintCacheInfo (cacheSlotCount,
1683                                 cacheCount,
1684                                 cacheEntryCount,
1685                                 (float) cacheEntryCount / (float) cacheCount,
1686                                 maxEntriesBySize[isMeta][index],
1687                                 (float) totalChainBySize[isMeta][index] / (float) cacheEntryCount,
1688                                 maxChainBySize[isMeta][index],
1689                                 (float) totalMissChainBySize[isMeta][index] / (float) (cacheCount * cacheSlotCount),
1690                                 maxMissChainBySize[isMeta][index]
1691 #ifdef OBJC_INSTRUMENTED
1692                                 , hitCountBySize[isMeta][index],
1693                                 hitCountBySize[isMeta][index] ?
1694                                 (float) hitProbesBySize[isMeta][index] / (float) hitCountBySize[isMeta][index] : 0.0,
1695                                 maxHitProbesBySize[isMeta][index],
1696                                 missCountBySize[isMeta][index],
1697                                 missCountBySize[isMeta][index] ?
1698                                 (float) missProbesBySize[isMeta][index] / (float) missCountBySize[isMeta][index] : 0.0,
1699                                 maxMissProbesBySize[isMeta][index],
1700                                 flushCountBySize[isMeta][index],
1701                                 flushCountBySize[isMeta][index] ?
1702                                 (float) flushedEntriesBySize[isMeta][index] / (float) flushCountBySize[isMeta][index] : 0.0,
1703                                 maxFlushedEntriesBySize[isMeta][index]
1704 #endif
1705                                 );
1706             }
1707         }
1708
1709         // Give overall numbers
1710         printf ("\nCumulative:\n");
1711         PrintCacheHeader ();
1712         PrintCacheInfo (totalSlots,
1713                         totalCacheCount,
1714                         totalEntries,
1715                         (float) totalEntries / (float) totalCacheCount,
1716                         maxEntries,
1717                         (float) totalChain / (float) totalEntries,
1718                         maxChain,
1719                         (float) totalMissChain / (float) totalSlots,
1720                         maxMissChain
1721 #ifdef OBJC_INSTRUMENTED
1722                         , totalHitCount,
1723                         totalHitCount ?
1724                         (float) totalHitProbes / (float) totalHitCount : 0.0,
1725                         maxHitProbes,
1726                         totalMissCount,
1727                         totalMissCount ?
1728                         (float) totalMissProbes / (float) totalMissCount : 0.0,
1729                         maxMissProbes,
1730                         totalFlushCount,
1731                         totalFlushCount ?
1732                         (float) totalFlushedEntries / (float) totalFlushCount : 0.0,
1733                         maxFlushedEntries
1734 #endif
1735                         );
1736
1737         printf ("\nNumber of \"forward::\" entries: %d\n", negativeEntryCount);
1738         printf ("Number of cache expansions: %d\n", cacheExpandCount);
1739 #ifdef OBJC_INSTRUMENTED
1740         printf ("flush_caches:   total calls  total visits  average visits  max visits  total classes  visits/class\n");
1741         printf ("                -----------  ------------  --------------  ----------  -------------  -------------\n");
1742         printf ("  linear        %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1743                 LinearFlushCachesCount,
1744                 LinearFlushCachesVisitedCount,
1745                 LinearFlushCachesCount ?
1746                 (float) LinearFlushCachesVisitedCount / (float) LinearFlushCachesCount : 0.0,
1747                 MaxLinearFlushCachesVisitedCount,
1748                 LinearFlushCachesVisitedCount,
1749                 1.0);
1750         printf ("  nonlinear     %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1751                 NonlinearFlushCachesCount,
1752                 NonlinearFlushCachesVisitedCount,
1753                 NonlinearFlushCachesCount ?
1754                 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesCount : 0.0,
1755                 MaxNonlinearFlushCachesVisitedCount,
1756                 NonlinearFlushCachesClassCount,
1757                 NonlinearFlushCachesClassCount ?
1758                 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesClassCount : 0.0);
1759         printf ("  ideal         %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1760                 LinearFlushCachesCount + NonlinearFlushCachesCount,
1761                 IdealFlushCachesCount,
1762                 LinearFlushCachesCount + NonlinearFlushCachesCount ?
1763                 (float) IdealFlushCachesCount / (float) (LinearFlushCachesCount + NonlinearFlushCachesCount) : 0.0,
1764                 MaxIdealFlushCachesCount,
1765                 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount,
1766                 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount ?
1767                 (float) IdealFlushCachesCount / (float) (LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount) : 0.0);
1768
1769         PrintCacheHistogram ("\nCache hit histogram:",  &CacheHitHistogram[0],  CACHE_HISTOGRAM_SIZE);
1770         PrintCacheHistogram ("\nCache miss histogram:", &CacheMissHistogram[0], CACHE_HISTOGRAM_SIZE);
1771 #endif
1772
1773 #if 0
1774         printf ("\nLookup chains:");
1775         for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1776         {
1777             if (chainCount[index] != 0)
1778                 printf ("  %u:%u", index, chainCount[index]);
1779         }
1780
1781         printf ("\nMiss chains:");
1782         for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1783         {
1784             if (missChainCount[index] != 0)
1785                 printf ("  %u:%u", index, missChainCount[index]);
1786         }
1787
1788         printf ("\nTotal memory usage for cache data structures: %lu bytes\n",
1789                 totalCacheCount * (sizeof(struct objc_cache) - sizeof(cache_entry *)) +
1790                 totalSlots * sizeof(cache_entry *) +
1791                 negativeEntryCount * sizeof(cache_entry));
1792 #endif
1793     }
1794 }
1795
1796 #endif
1797
1798 // !__OBJC2__
1799 #endif