runtime/objc-cache.m

   1 /*
   2  * Copyright (c) 1999-2007 Apple Inc.  All Rights Reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /***********************************************************************
  25 * objc-cache.m
  26 * Method cache management
  27 * Cache flushing
  28 * Cache garbage collection
  29 * Cache instrumentation
  30 * Dedicated allocator for large caches
  31 **********************************************************************/
  32
  33
  34 /***********************************************************************
  35  * Method cache locking (GrP 2001-1-14)
  36  *
  37  * For speed, objc_msgSend does not acquire any locks when it reads
  38  * method caches. Instead, all cache changes are performed so that any
  39  * objc_msgSend running concurrently with the cache mutator will not
  40  * crash or hang or get an incorrect result from the cache.
  41  *
  42  * When cache memory becomes unused (e.g. the old cache after cache
  43  * expansion), it is not immediately freed, because a concurrent
  44  * objc_msgSend could still be using it. Instead, the memory is
  45  * disconnected from the data structures and placed on a garbage list.
  46  * The memory is now only accessible to instances of objc_msgSend that
  47  * were running when the memory was disconnected; any further calls to
  48  * objc_msgSend will not see the garbage memory because the other data
  49  * structures don't point to it anymore. The collecting_in_critical
  50  * function checks the PC of all threads and returns FALSE when all threads
  51  * are found to be outside objc_msgSend. This means any call to objc_msgSend
  52  * that could have had access to the garbage has finished or moved past the
  53  * cache lookup stage, so it is safe to free the memory.
  54  *
  55  * All functions that modify cache data or structures must acquire the
  56  * cacheUpdateLock to prevent interference from concurrent modifications.
  57  * The function that frees cache garbage must acquire the cacheUpdateLock
  58  * and use collecting_in_critical() to flush out cache readers.
  59  * The cacheUpdateLock is also used to protect the custom allocator used
  60  * for large method cache blocks.
  61  *
  62  * Cache readers (PC-checked by collecting_in_critical())
  63  * objc_msgSend*
  64  * _cache_getImp
  65  * _cache_getMethod
  66  *
  67  * Cache writers (hold cacheUpdateLock while reading or writing; not PC-checked)
  68  * _cache_fill         (acquires lock)
  69  * _cache_expand       (only called from cache_fill)
  70  * _cache_create       (only called from cache_expand)
  71  * bcopy               (only called from instrumented cache_expand)
  72  * flush_caches        (acquires lock)
  73  * _cache_flush        (only called from cache_fill and flush_caches)
  74  * _cache_collect_free (only called from cache_expand and cache_flush)
  75  *
  76  * UNPROTECTED cache readers (NOT thread-safe; used for debug info only)
  77  * _cache_print
  78  * _class_printMethodCaches
  79  * _class_printDuplicateCacheEntries
  80  * _class_printMethodCacheStatistics
  81  *
  82  * _class_lookupMethodAndLoadCache is a special case. It may read a
  83  * method triplet out of one cache and store it in another cache. This
  84  * is unsafe if the method triplet is a forward:: entry, because the
  85  * triplet itself could be freed unless _class_lookupMethodAndLoadCache
  86  * were PC-checked or used a lock. Additionally, storing the method
  87  * triplet in both caches would result in double-freeing if both caches
  88  * were flushed or expanded. The solution is for _cache_getMethod to
  89  * ignore all entries whose implementation is _objc_msgForward, so
  90  * _class_lookupMethodAndLoadCache cannot look at a forward:: entry
  91  * unsafely or place it in multiple caches.
  92  ***********************************************************************/
  93
  94
  95 #include <mach/mach.h>
  96
  97 #import "objc-private.h"
  98 #import "hashtable2.h"
  99
 100 typedef struct {
 101     SEL name;     // same layout as struct old_method
 102     void *unused;
 103     IMP imp;  // same layout as struct old_method
 104 } cache_entry;
 105
 106 #if __OBJC2__
 107
 108 #ifndef __LP64__
 109 #define CACHE_HASH(sel, mask) (((uintptr_t)(sel)>>2) & (mask))
 110 #else
 111 #define CACHE_HASH(sel, mask) (((unsigned int)((uintptr_t)(sel)>>3)) & (mask))
 112 #endif
 113
 114 struct objc_cache {
 115     unsigned int mask;            /* total = mask + 1 */
 116     unsigned int occupied;
 117     cache_entry *buckets[1];
 118 };
 119
 120 #define CACHE_BUCKET(e) ((cache_entry *)e)
 121
 122 #else
 123
 124 /* Most definitions are in runtime.h */
 125 #define CACHE_BUCKET(e) ((Method)e)
 126
 127 #endif
 128
 129
 130 /* When _class_slow_grow is non-zero, any given cache is actually grown
 131  * only on the odd-numbered times it becomes full; on the even-numbered
 132  * times, it is simply emptied and re-used.  When this flag is zero,
 133  * caches are grown every time. */
 134 static const int _class_slow_grow = 1;
 135
 136 /* For min cache size: clear_cache=1, slow_grow=1
 137    For max cache size: clear_cache=0, slow_grow=0 */
 138
 139
 140 /* Lock for cache access.
 141  * Held when modifying a cache in place.
 142  * Held when installing a new cache on a class.
 143  * Held when adding to the cache garbage list.
 144  * Held when disposing cache garbage.
 145  * See "Method cache locking" above for notes about cache locking. */
 146 // classLock > cacheUpdateLock > infoLock
 147 #if __OBJC2__
 148 static
 149 #else
 150 __private_extern__
 151 #endif
 152 OBJC_DECLARE_LOCK(cacheUpdateLock);
 153
 154
 155 /* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
 156 enum {
 157     INIT_CACHE_SIZE_LOG2 = 2,
 158     INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2)
 159 };
 160
 161
 162 /* Amount of space required for `count` hash table buckets, knowing that
 163  * one entry is embedded in the cache structure itself. */
 164 #define TABLE_SIZE(count)  ((count - 1) * sizeof(cache_entry *))
 165
 166
 167 /* Custom cache allocator parameters.
 168  * CACHE_REGION_SIZE must be a multiple of CACHE_QUANTUM. */
 169 #define CACHE_QUANTUM 520
 170 #define CACHE_REGION_SIZE 131040  // quantized just under 128KB (131072)
 171 // #define CACHE_REGION_SIZE 262080  // quantized just under 256KB (262144)
 172
 173
 174 /* Cache instrumentation data. Immediately follows the cache block itself. */
 175 #ifdef OBJC_INSTRUMENTED
 176 typedef struct
 177 {
 178     unsigned int hitCount;           // cache lookup success tally
 179     unsigned int hitProbes;          // sum entries checked to hit
 180     unsigned int maxHitProbes;       // max entries checked to hit
 181     unsigned int missCount;          // cache lookup no-find tally
 182     unsigned int missProbes;         // sum entries checked to miss
 183     unsigned int maxMissProbes;      // max entries checked to miss
 184     unsigned int flushCount;         // cache flush tally
 185     unsigned int flushedEntries;     // sum cache entries flushed
 186     unsigned int maxFlushedEntries;  // max cache entries flushed
 187 } CacheInstrumentation;
 188
 189 #define CACHE_INSTRUMENTATION(cache)  (CacheInstrumentation *) &cache->buckets[cache->mask + 1];
 190 #endif
 191
 192 /* Cache filling and flushing instrumentation */
 193
 194 static int totalCacheFills NOBSS = 0;
 195
 196 #ifdef OBJC_INSTRUMENTED
 197 __private_extern__ unsigned int LinearFlushCachesCount              = 0;
 198 __private_extern__ unsigned int LinearFlushCachesVisitedCount       = 0;
 199 __private_extern__ unsigned int MaxLinearFlushCachesVisitedCount    = 0;
 200 __private_extern__ unsigned int NonlinearFlushCachesCount           = 0;
 201 __private_extern__ unsigned int NonlinearFlushCachesClassCount      = 0;
 202 __private_extern__ unsigned int NonlinearFlushCachesVisitedCount    = 0;
 203 __private_extern__ unsigned int MaxNonlinearFlushCachesVisitedCount = 0;
 204 __private_extern__ unsigned int IdealFlushCachesCount               = 0;
 205 __private_extern__ unsigned int MaxIdealFlushCachesCount            = 0;
 206 #endif
 207
 208
 209 /***********************************************************************
 210 * A static empty cache.  All classes initially point at this cache.
 211 * When the first message is sent it misses in the cache, and when
 212 * the cache is grown it checks for this case and uses malloc rather
 213 * than realloc.  This avoids the need to check for NULL caches in the
 214 * messenger.
 215 ***********************************************************************/
 216
 217 #ifndef OBJC_INSTRUMENTED
 218 const struct objc_cache _objc_empty_cache =
 219 {
 220     0,        // mask
 221     0,        // occupied
 222     { NULL }  // buckets
 223 };
 224 #else
 225 // OBJC_INSTRUMENTED requires writable data immediately following emptyCache.
 226 struct objc_cache _objc_empty_cache =
 227 {
 228     0,        // mask
 229     0,        // occupied
 230     { NULL }  // buckets
 231 };
 232 CacheInstrumentation emptyCacheInstrumentation = {0};
 233 #endif
 234
 235 #if __OBJC2__
 236 IMP _objc_empty_vtable[128] = {0};
 237 #endif
 238
 239
 240 /* Local prototypes */
 241
 242 static BOOL _cache_isEmpty(Cache cache);
 243 static Cache _cache_malloc(int slotCount);
 244 static Cache _cache_create(Class cls);
 245 static Cache _cache_expand(Class cls);
 246 #if __OBJC2__
 247 static void _cache_flush(Class cls);
 248 #endif
 249
 250 static uintptr_t _get_pc_for_thread(mach_port_t thread);
 251 static int _collecting_in_critical(void);
 252 static void _garbage_make_room(void);
 253 static void _cache_collect_free(void *data, size_t size, BOOL tryCollect);
 254
 255 #if !defined(__LP64__)
 256 static BOOL cache_allocator_is_block(void *block);
 257 static void *cache_allocator_calloc(size_t size);
 258 static void cache_allocator_free(void *block);
 259 #endif
 260
 261
 262
 263 /***********************************************************************
 264 * _cache_isEmpty.
 265 * Returns YES if the given cache is some empty cache.
 266 * Empty caches should never be allocated on the heap.
 267 **********************************************************************/
 268 static BOOL _cache_isEmpty(Cache cache)
 269 {
 270     return (cache == NULL  ||  cache == (Cache)&_objc_empty_cache  ||  cache->mask == 0);
 271 }
 272
 273
 274 /***********************************************************************
 275 * _cache_malloc.
 276 *
 277 * Called from _cache_create() and cache_expand()
 278 * Cache locks: cacheUpdateLock must be held by the caller.
 279 **********************************************************************/
 280 static Cache _cache_malloc(int slotCount)
 281 {
 282     Cache new_cache;
 283     size_t size;
 284
 285     OBJC_CHECK_LOCKED(&cacheUpdateLock);
 286
 287     // Allocate table (why not check for failure?)
 288     size = sizeof(struct objc_cache) + TABLE_SIZE(slotCount);
 289 #if defined(OBJC_INSTRUMENTED)
 290     // Custom cache allocator can't handle instrumentation.
 291     size += sizeof(CacheInstrumentation);
 292     new_cache = _calloc_internal(size, 1);
 293     new_cache->mask = slotCount - 1;
 294 #elif defined(__LP64__)
 295     // fixme cache allocator implementation isn't 64-bit clean
 296     new_cache = _calloc_internal(size, 1);
 297     new_cache->mask = slotCount - 1;
 298 #else
 299     if (size < CACHE_QUANTUM  ||  UseInternalZone) {
 300         new_cache = _calloc_internal(size, 1);
 301         new_cache->mask = slotCount - 1;
 302         // occupied and buckets and instrumentation are all zero
 303     } else {
 304         new_cache = cache_allocator_calloc(size);
 305         // mask is already set
 306         // occupied and buckets and instrumentation are all zero
 307     }
 308 #endif
 309
 310     return new_cache;
 311 }
 312
 313
 314 /***********************************************************************
 315 * _cache_free_block.
 316 *
 317 * Called from _cache_free() and _cache_collect_free().
 318 * block may be a cache or a forward:: entry.
 319 * If block is a cache, forward:: entries it points to will NOT be freed.
 320 * Cache locks: cacheUpdateLock must be held by the caller.
 321 **********************************************************************/
 322 static void _cache_free_block(void *block)
 323 {
 324     OBJC_CHECK_LOCKED(&cacheUpdateLock);
 325
 326 #if !defined(__LP64__)
 327     if (cache_allocator_is_block(block)) {
 328         cache_allocator_free(block);
 329     } else
 330 #endif
 331     {
 332         free(block);
 333     }
 334 }
 335
 336
 337 /***********************************************************************
 338 * _cache_free.
 339 *
 340 * Called from _objc_remove_classes_in_image().
 341 * forward:: entries in the cache ARE freed.
 342 * Cache locks: cacheUpdateLock must NOT be held by the caller.
 343 **********************************************************************/
 344 __private_extern__ void _cache_free(Cache cache)
 345 {
 346     int i;
 347
 348     OBJC_LOCK(&cacheUpdateLock);
 349
 350     for (i = 0; i < cache->mask + 1; i++) {
 351         cache_entry *entry = (cache_entry *)cache->buckets[i];
 352         if (entry  &&  entry->imp == &_objc_msgForward) {
 353             _cache_free_block(entry);
 354         }
 355     }
 356
 357     _cache_free_block(cache);
 358
 359     OBJC_UNLOCK(&cacheUpdateLock);
 360 }
 361
 362
 363 /***********************************************************************
 364 * _cache_create.
 365 *
 366 * Called from _cache_expand().
 367 * Cache locks: cacheUpdateLock must be held by the caller.
 368 **********************************************************************/
 369 static Cache _cache_create(Class cls)
 370 {
 371     Cache new_cache;
 372
 373     OBJC_CHECK_LOCKED(&cacheUpdateLock);
 374
 375     // Allocate new cache block
 376     new_cache = _cache_malloc(INIT_CACHE_SIZE);
 377
 378     // Install the cache
 379     _class_setCache(cls, new_cache);
 380
 381     // Clear the grow flag so that we will re-use the current storage,
 382     // rather than actually grow the cache, when expanding the cache
 383     // for the first time
 384     if (_class_slow_grow) {
 385         _class_setGrowCache(cls, NO);
 386     }
 387
 388     // Return our creation
 389     return new_cache;
 390 }
 391
 392
 393 /***********************************************************************
 394 * _cache_expand.
 395 *
 396 * Called from _cache_fill ()
 397 * Cache locks: cacheUpdateLock must be held by the caller.
 398 **********************************************************************/
 399 static Cache _cache_expand(Class cls)
 400 {
 401     Cache old_cache;
 402     Cache new_cache;
 403     unsigned int slotCount;
 404     unsigned int index;
 405
 406     OBJC_CHECK_LOCKED(&cacheUpdateLock);
 407
 408     // First growth goes from empty cache to a real one
 409     old_cache = _class_getCache(cls);
 410     if (_cache_isEmpty(old_cache))
 411         return _cache_create (cls);
 412
 413     if (_class_slow_grow) {
 414         // Cache grows every other time only.
 415         if (_class_shouldGrowCache(cls)) {
 416             // Grow the cache this time. Don't grow next time.
 417             _class_setGrowCache(cls, NO);
 418         }
 419         else {
 420             // Reuse the current cache storage this time. Do grow next time.
 421             _class_setGrowCache(cls, YES);
 422
 423             // Clear the valid-entry counter
 424             old_cache->occupied = 0;
 425
 426             // Invalidate all the cache entries
 427             for (index = 0; index < old_cache->mask + 1; index += 1)
 428             {
 429                 // Remember what this entry was, so we can possibly
 430                 // deallocate it after the bucket has been invalidated
 431                 cache_entry *oldEntry = (cache_entry *)old_cache->buckets[index];
 432
 433                 // Skip invalid entry
 434                 if (!oldEntry)
 435                     continue;
 436
 437                 // Invalidate this entry
 438                 old_cache->buckets[index] = NULL;
 439
 440                 // Deallocate "forward::" entry
 441                 if (oldEntry->imp == &_objc_msgForward) {
 442                     _cache_collect_free (oldEntry, sizeof(cache_entry), NO);
 443                 }
 444             }
 445
 446             // Return the same old cache, freshly emptied
 447             return old_cache;
 448         }
 449     }
 450
 451     // Double the cache size
 452     slotCount = (old_cache->mask + 1) << 1;
 453
 454     new_cache = _cache_malloc(slotCount);
 455
 456 #ifdef OBJC_INSTRUMENTED
 457     // Propagate the instrumentation data
 458     {
 459         CacheInstrumentation *oldCacheData;
 460         CacheInstrumentation *newCacheData;
 461
 462         oldCacheData = CACHE_INSTRUMENTATION(old_cache);
 463         newCacheData = CACHE_INSTRUMENTATION(new_cache);
 464         bcopy ((const char *)oldCacheData, (char *)newCacheData, sizeof(CacheInstrumentation));
 465     }
 466 #endif
 467
 468     // Deallocate "forward::" entries from the old cache
 469     for (index = 0; index < old_cache->mask + 1; index++) {
 470         cache_entry *entry = (cache_entry *)old_cache->buckets[index];
 471         if (entry && entry->imp == &_objc_msgForward) {
 472             _cache_collect_free (entry, sizeof(cache_entry), NO);
 473         }
 474     }
 475
 476     // Install new cache
 477     _class_setCache(cls, new_cache);
 478
 479     // Deallocate old cache, try freeing all the garbage
 480     _cache_collect_free (old_cache, old_cache->mask * sizeof(cache_entry *), YES);
 481     return new_cache;
 482 }
 483
 484
 485 /***********************************************************************
 486 * _cache_fill.  Add the specified method to the specified class' cache.
 487 * Returns NO if the cache entry wasn't added: cache was busy,
 488 *  class is still being initialized, new entry is a duplicate.
 489 *
 490 * Called only from _class_lookupMethodAndLoadCache and
 491 * class_respondsToMethod and _cache_addForwardEntry.
 492 *
 493 * Cache locks: cacheUpdateLock must not be held.
 494 **********************************************************************/
 495 __private_extern__ BOOL _cache_fill(Class cls, Method smt, SEL sel)
 496 {
 497     unsigned int newOccupied;
 498     uintptr_t index;
 499     cache_entry **buckets;
 500     cache_entry *entry;
 501     Cache cache;
 502
 503     OBJC_CHECK_UNLOCKED(&cacheUpdateLock);
 504
 505     // Never cache before +initialize is done
 506     if (!_class_isInitialized(cls)) {
 507         return NO;
 508     }
 509
 510     // Keep tally of cache additions
 511     totalCacheFills += 1;
 512
 513     OBJC_LOCK(&cacheUpdateLock);
 514
 515     entry = (cache_entry *)smt;
 516
 517     cache = _class_getCache(cls);
 518
 519     // Make sure the entry wasn't added to the cache by some other thread
 520     // before we grabbed the cacheUpdateLock.
 521     // Don't use _cache_getMethod() because _cache_getMethod() doesn't
 522     // return forward:: entries.
 523     if (_cache_getImp(cls, sel)) {
 524         OBJC_UNLOCK(&cacheUpdateLock);
 525         return NO; // entry is already cached, didn't add new one
 526     }
 527
 528     // Use the cache as-is if it is less than 3/4 full
 529     newOccupied = cache->occupied + 1;
 530     if ((newOccupied * 4) <= (cache->mask + 1) * 3) {
 531         // Cache is less than 3/4 full.
 532         cache->occupied = newOccupied;
 533     } else {
 534         // Cache is too full. Expand it.
 535         cache = _cache_expand (cls);
 536
 537         // Account for the addition
 538         cache->occupied += 1;
 539     }
 540
 541     // Insert the new entry.  This can be done by either:
 542     //  (a) Scanning for the first unused spot.  Easy!
 543     //  (b) Opening up an unused spot by sliding existing
 544     //      entries down by one.  The benefit of this
 545     //      extra work is that it puts the most recently
 546     //      loaded entries closest to where the selector
 547     //      hash starts the search.
 548     //
 549     // The loop is a little more complicated because there
 550     // are two kinds of entries, so there have to be two ways
 551     // to slide them.
 552     buckets = (cache_entry **)cache->buckets;
 553     index = CACHE_HASH(sel, cache->mask);
 554     for (;;)
 555     {
 556         // Slide existing entries down by one
 557         cache_entry *saveEntry;
 558
 559         // Copy current entry to a local
 560         saveEntry = buckets[index];
 561
 562         // Copy previous entry (or new entry) to current slot
 563         buckets[index] = entry;
 564
 565         // Done if current slot had been invalid
 566         if (saveEntry == NULL)
 567             break;
 568
 569         // Prepare to copy saved value into next slot
 570         entry = saveEntry;
 571
 572         // Move on to next slot
 573         index += 1;
 574         index &= cache->mask;
 575     }
 576
 577     OBJC_UNLOCK(&cacheUpdateLock);
 578
 579     return YES; // successfully added new cache entry
 580 }
 581
 582
 583 /***********************************************************************
 584 * _cache_addForwardEntry
 585 * Add a forward:: entry  for the given selector to cls's method cache.
 586 * Does nothing if the cache addition fails for any reason.
 587 * Called from class_respondsToMethod and _class_lookupMethodAndLoadCache.
 588 * Cache locks: cacheUpdateLock must not be held.
 589 **********************************************************************/
 590 __private_extern__ void _cache_addForwardEntry(Class cls, SEL sel)
 591 {
 592     cache_entry *smt;
 593
 594     smt = _malloc_internal(sizeof(cache_entry));
 595     smt->name = sel;
 596     smt->imp = &_objc_msgForward;
 597     if (! _cache_fill(cls, (Method)smt, sel)) {  // fixme hack
 598         // Entry not added to cache. Don't leak the method struct.
 599         _free_internal(smt);
 600     }
 601 }
 602
 603
 604 /***********************************************************************
 605 * _cache_flush.  Invalidate all valid entries in the given class' cache.
 606 *
 607 * Called from flush_caches() and _cache_fill()
 608 * Cache locks: cacheUpdateLock must be held by the caller.
 609 **********************************************************************/
 610 #if __OBJC2__
 611 static
 612 #else
 613 __private_extern__
 614 #endif
 615 void _cache_flush(Class cls)
 616 {
 617     Cache cache;
 618     unsigned int index;
 619
 620     OBJC_CHECK_LOCKED(&cacheUpdateLock);
 621
 622     // Locate cache.  Ignore unused cache.
 623     cache = _class_getCache(cls);
 624     if (_cache_isEmpty(cache)) return;
 625
 626 #ifdef OBJC_INSTRUMENTED
 627     {
 628         CacheInstrumentation *cacheData;
 629
 630         // Tally this flush
 631         cacheData = CACHE_INSTRUMENTATION(cache);
 632         cacheData->flushCount += 1;
 633         cacheData->flushedEntries += cache->occupied;
 634         if (cache->occupied > cacheData->maxFlushedEntries)
 635             cacheData->maxFlushedEntries = cache->occupied;
 636     }
 637 #endif
 638
 639     // Traverse the cache
 640     for (index = 0; index <= cache->mask; index += 1)
 641     {
 642         // Remember what this entry was, so we can possibly
 643         // deallocate it after the bucket has been invalidated
 644         cache_entry *oldEntry = (cache_entry *)cache->buckets[index];
 645
 646         // Invalidate this entry
 647         cache->buckets[index] = NULL;
 648
 649         // Deallocate "forward::" entry
 650         if (oldEntry && oldEntry->imp == &_objc_msgForward)
 651             _cache_collect_free (oldEntry, sizeof(cache_entry), NO);
 652     }
 653
 654     // Clear the valid-entry counter
 655     cache->occupied = 0;
 656 }
 657
 658
 659 /***********************************************************************
 660 * flush_cache.  Flushes the instance method cache for class cls only.
 661 * Use flush_caches() if cls might have in-use subclasses.
 662 **********************************************************************/
 663 __private_extern__ void flush_cache(Class cls)
 664 {
 665     if (cls) {
 666         OBJC_LOCK(&cacheUpdateLock);
 667         _cache_flush(cls);
 668         OBJC_UNLOCK(&cacheUpdateLock);
 669     }
 670 }
 671
 672
 673 /***********************************************************************
 674 * cache collection.
 675 **********************************************************************/
 676
 677 // A sentinal (magic value) to report bad thread_get_state status
 678 #define PC_SENTINEL  0
 679
 680 // UNIX03 compliance hack (4508809)
 681 #if !__DARWIN_UNIX03
 682 #define __srr0 srr0
 683 #define __eip eip
 684 #endif
 685
 686 static uintptr_t _get_pc_for_thread(mach_port_t thread)
 687 #if defined(__i386__)
 688 {
 689     i386_thread_state_t state;
 690     unsigned int count = i386_THREAD_STATE_COUNT;
 691     kern_return_t okay = thread_get_state (thread, i386_THREAD_STATE, (thread_state_t)&state, &count);
 692     return (okay == KERN_SUCCESS) ? state.__eip : PC_SENTINEL;
 693 }
 694 #elif defined(__ppc__)
 695 {
 696     ppc_thread_state_t state;
 697     unsigned int count = PPC_THREAD_STATE_COUNT;
 698     kern_return_t okay = thread_get_state (thread, PPC_THREAD_STATE, (thread_state_t)&state, &count);
 699     return (okay == KERN_SUCCESS) ? state.__srr0 : PC_SENTINEL;
 700 }
 701 #elif defined(__ppc64__)
 702 {
 703     ppc_thread_state64_t state;
 704     unsigned int count = PPC_THREAD_STATE64_COUNT;
 705     kern_return_t okay = thread_get_state (thread, PPC_THREAD_STATE64, (thread_state_t)&state, &count);
 706     return (okay == KERN_SUCCESS) ? state.__srr0 : PC_SENTINEL;
 707 }
 708 #elif defined(__x86_64__)
 709 {
 710     x86_thread_state64_t                        state;
 711     unsigned int count = x86_THREAD_STATE64_COUNT;
 712     kern_return_t okay = thread_get_state (thread, x86_THREAD_STATE64, (thread_state_t)&state, &count);
 713     return (okay == KERN_SUCCESS) ? state.__rip : PC_SENTINEL;
 714 }
 715 #else
 716 {
 717 #error _get_pc_for_thread () not implemented for this architecture
 718 }
 719 #endif
 720
 721
 722 /***********************************************************************
 723 * _collecting_in_critical.
 724 * Returns TRUE if some thread is currently executing a cache-reading
 725 * function. Collection of cache garbage is not allowed when a cache-
 726 * reading function is in progress because it might still be using
 727 * the garbage memory.
 728 **********************************************************************/
 729 OBJC_EXPORT uintptr_t objc_entryPoints[];
 730 OBJC_EXPORT uintptr_t objc_exitPoints[];
 731
 732 static int _collecting_in_critical(void)
 733 {
 734     thread_act_port_array_t threads;
 735     unsigned number;
 736     unsigned count;
 737     kern_return_t ret;
 738     int result;
 739
 740     mach_port_t mythread = pthread_mach_thread_np(pthread_self());
 741
 742     // Get a list of all the threads in the current task
 743     ret = task_threads (mach_task_self (), &threads, &number);
 744     if (ret != KERN_SUCCESS)
 745     {
 746         _objc_fatal("task_thread failed (result %d)\n", ret);
 747     }
 748
 749     // Check whether any thread is in the cache lookup code
 750     result = FALSE;
 751     for (count = 0; count < number; count++)
 752     {
 753         int region;
 754         uintptr_t pc;
 755
 756         // Don't bother checking ourselves
 757         if (threads[count] == mythread)
 758             continue;
 759
 760         // Find out where thread is executing
 761         pc = _get_pc_for_thread (threads[count]);
 762
 763         // Check for bad status, and if so, assume the worse (can't collect)
 764         if (pc == PC_SENTINEL)
 765         {
 766             result = TRUE;
 767             goto done;
 768         }
 769
 770         // Check whether it is in the cache lookup code
 771         for (region = 0; objc_entryPoints[region] != 0; region++)
 772         {
 773             if ((pc >= objc_entryPoints[region]) &&
 774                 (pc <= objc_exitPoints[region]))
 775             {
 776                 result = TRUE;
 777                 goto done;
 778             }
 779         }
 780     }
 781
 782  done:
 783     // Deallocate the port rights for the threads
 784     for (count = 0; count < number; count++) {
 785         mach_port_deallocate(mach_task_self (), threads[count]);
 786     }
 787
 788     // Deallocate the thread list
 789     vm_deallocate (mach_task_self (), (vm_address_t) threads, sizeof(threads) * number);
 790
 791     // Return our finding
 792     return result;
 793 }
 794
 795
 796 /***********************************************************************
 797 * _garbage_make_room.  Ensure that there is enough room for at least
 798 * one more ref in the garbage.
 799 **********************************************************************/
 800
 801 // amount of memory represented by all refs in the garbage
 802 static size_t garbage_byte_size = 0;
 803
 804 // do not empty the garbage until garbage_byte_size gets at least this big
 805 static size_t garbage_threshold = 1024;
 806
 807 // table of refs to free
 808 static void **garbage_refs = 0;
 809
 810 // current number of refs in garbage_refs
 811 static size_t garbage_count = 0;
 812
 813 // capacity of current garbage_refs
 814 static size_t garbage_max = 0;
 815
 816 // capacity of initial garbage_refs
 817 enum {
 818     INIT_GARBAGE_COUNT = 128
 819 };
 820
 821 static void _garbage_make_room(void)
 822 {
 823     static int first = 1;
 824     volatile void *tempGarbage;
 825
 826     // Create the collection table the first time it is needed
 827     if (first)
 828     {
 829         first = 0;
 830         garbage_refs = _malloc_internal(INIT_GARBAGE_COUNT * sizeof(void *));
 831         garbage_max = INIT_GARBAGE_COUNT;
 832     }
 833
 834     // Double the table if it is full
 835     else if (garbage_count == garbage_max)
 836     {
 837         tempGarbage = _realloc_internal(garbage_refs, garbage_max * 2 * sizeof(void *));
 838         garbage_refs = (void **) tempGarbage;
 839         garbage_max *= 2;
 840     }
 841 }
 842
 843
 844 /***********************************************************************
 845 * _cache_collect_free.  Add the specified malloc'd memory to the list
 846 * of them to free at some later point.
 847 * size is used for the collection threshold. It does not have to be
 848 * precisely the block's size.
 849 * Cache locks: cacheUpdateLock must be held by the caller.
 850 **********************************************************************/
 851 static void _cache_collect_free(void *data, size_t size, BOOL tryCollect)
 852 {
 853     OBJC_CHECK_LOCKED(&cacheUpdateLock);
 854
 855     // Insert new element in garbage list
 856     // Note that we do this even if we end up free'ing everything
 857     _garbage_make_room ();
 858     garbage_byte_size += size;
 859     garbage_refs[garbage_count++] = data;
 860
 861     // Done if caller says not to clean up
 862     if (!tryCollect) return;
 863
 864     // Done if the garbage is not full
 865     if (garbage_byte_size < garbage_threshold) {
 866         if (PrintCacheCollection) {
 867             _objc_inform ("CACHE COLLECTION: not collecting; not enough garbage (%zu < %zu)\n", garbage_byte_size, garbage_threshold);
 868         }
 869         return;
 870     }
 871
 872     // Synchronize garbage collection with objc_msgSend and other cache readers
 873     if (!_collecting_in_critical ()) {
 874         // No cache readers in progress - garbage is now deletable
 875
 876         // Log our progress
 877         if (PrintCacheCollection) {
 878             _objc_inform ("CACHE COLLECTION: collecting %zu bytes\n",
 879                           garbage_byte_size);
 880         }
 881
 882         // Dispose all refs now in the garbage
 883         while (garbage_count--) {
 884             _cache_free_block(garbage_refs[garbage_count]);
 885         }
 886
 887         // Clear the garbage count and total size indicator
 888         garbage_count = 0;
 889         garbage_byte_size = 0;
 890     }
 891     else {
 892         // objc_msgSend (or other cache reader) is currently looking in the
 893         // cache and might still be using some garbage.
 894         if (PrintCacheCollection) {
 895             _objc_inform ("CACHE COLLECTION: not collecting; objc_msgSend in progress\n");
 896         }
 897     }
 898 }
 899
 900
 901
 902
 903
 904 #if !defined(__LP64__)
 905
 906 /***********************************************************************
 907 * Custom method cache allocator.
 908 * Method cache block sizes are 2^slots+2 words, which is a pessimal
 909 * case for the system allocator. It wastes 504 bytes per cache block
 910 * with 128 or more slots, which adds up to tens of KB for an AppKit process.
 911 * To save memory, the custom cache allocator below is used.
 912 *
 913 * The cache allocator uses 128 KB allocation regions. Few processes will
 914 * require a second region. Within a region, allocation is address-ordered
 915 * first fit.
 916 *
 917 * The cache allocator uses a quantum of 520.
 918 * Cache block ideal sizes: 520, 1032, 2056, 4104
 919 * Cache allocator sizes:   520, 1040, 2080, 4160
 920 *
 921 * Because all blocks are known to be genuine method caches, the ordinary
 922 * cache->mask and cache->occupied fields are used as block headers.
 923 * No out-of-band headers are maintained. The number of blocks will
 924 * almost always be fewer than 200, so for simplicity there is no free
 925 * list or other optimization.
 926 *
 927 * Block in use: mask != 0, occupied != -1 (mask indicates block size)
 928 * Block free:   mask != 0, occupied == -1 (mask is precisely block size)
 929 *
 930 * No cache allocator functions take any locks. Instead, the caller
 931 * must hold the cacheUpdateLock.
 932 *
 933 * fixme with 128 KB regions and 520 B min block size, an allocation
 934 * bitmap would be only 32 bytes - better than free list?
 935 *
 936 * fixme not 64-bit clean?
 937 **********************************************************************/
 938
 939 typedef struct cache_allocator_block {
 940     unsigned int size;
 941     unsigned int state;
 942     struct cache_allocator_block *nextFree;
 943 } cache_allocator_block;
 944
 945 typedef struct cache_allocator_region {
 946     cache_allocator_block *start;
 947     cache_allocator_block *end;    // first non-block address
 948     cache_allocator_block *freeList;
 949     struct cache_allocator_region *next;
 950 } cache_allocator_region;
 951
 952 static cache_allocator_region *cacheRegion = NULL;
 953
 954
 955 static unsigned int cache_allocator_mask_for_size(size_t size)
 956 {
 957     return (size - sizeof(struct objc_cache)) / sizeof(cache_entry *);
 958 }
 959
 960 static size_t cache_allocator_size_for_mask(unsigned int mask)
 961 {
 962     size_t requested = sizeof(struct objc_cache) + TABLE_SIZE(mask+1);
 963     size_t actual = CACHE_QUANTUM;
 964     while (actual < requested) actual += CACHE_QUANTUM;
 965     return actual;
 966 }
 967
 968 /***********************************************************************
 969 * cache_allocator_add_region
 970 * Allocates and returns a new region that can hold at least size
 971 *   bytes of large method caches.
 972 * The actual size will be rounded up to a CACHE_QUANTUM boundary,
 973 *   with a minimum of CACHE_REGION_SIZE.
 974 * The new region is lowest-priority for new allocations. Callers that
 975 *   know the other regions are already full should allocate directly
 976 *   into the returned region.
 977 **********************************************************************/
 978 static cache_allocator_region *cache_allocator_add_region(size_t size)
 979 {
 980     vm_address_t addr;
 981     cache_allocator_block *b;
 982     cache_allocator_region **rgnP;
 983     cache_allocator_region *newRegion =
 984         _calloc_internal(1, sizeof(cache_allocator_region));
 985
 986     // Round size up to quantum boundary, and apply the minimum size.
 987     size += CACHE_QUANTUM - (size % CACHE_QUANTUM);
 988     if (size < CACHE_REGION_SIZE) size = CACHE_REGION_SIZE;
 989
 990     // Allocate the region
 991     addr = 0;
 992     vm_allocate(mach_task_self(), &addr, size, 1);
 993     newRegion->start = (cache_allocator_block *)addr;
 994     newRegion->end = (cache_allocator_block *)(addr + size);
 995
 996     // Mark the first block: free and covers the entire region
 997     b = newRegion->start;
 998     b->size = size;
 999     b->state = (unsigned int)-1;
1000     b->nextFree = NULL;
1001     newRegion->freeList = b;
1002
1003     // Add to end of the linked list of regions.
1004     // Other regions should be re-used before this one is touched.
1005     newRegion->next = NULL;
1006     rgnP = &cacheRegion;
1007     while (*rgnP) {
1008         rgnP = &(**rgnP).next;
1009     }
1010     *rgnP = newRegion;
1011
1012     return newRegion;
1013 }
1014
1015
1016 /***********************************************************************
1017 * cache_allocator_coalesce
1018 * Attempts to coalesce a free block with the single free block following
1019 * it in the free list, if any.
1020 **********************************************************************/
1021 static void cache_allocator_coalesce(cache_allocator_block *block)
1022 {
1023     if (block->size + (uintptr_t)block == (uintptr_t)block->nextFree) {
1024         block->size += block->nextFree->size;
1025         block->nextFree = block->nextFree->nextFree;
1026     }
1027 }
1028
1029
1030 /***********************************************************************
1031 * cache_region_calloc
1032 * Attempt to allocate a size-byte block in the given region.
1033 * Allocation is first-fit. The free list is already fully coalesced.
1034 * Returns NULL if there is not enough room in the region for the block.
1035 **********************************************************************/
1036 static void *cache_region_calloc(cache_allocator_region *rgn, size_t size)
1037 {
1038     cache_allocator_block **blockP;
1039     unsigned int mask;
1040
1041     // Save mask for allocated block, then round size
1042     // up to CACHE_QUANTUM boundary
1043     mask = cache_allocator_mask_for_size(size);
1044     size = cache_allocator_size_for_mask(mask);
1045
1046     // Search the free list for a sufficiently large free block.
1047
1048     for (blockP = &rgn->freeList;
1049          *blockP != NULL;
1050          blockP = &(**blockP).nextFree)
1051     {
1052         cache_allocator_block *block = *blockP;
1053         if (block->size < size) continue;  // not big enough
1054
1055         // block is now big enough. Allocate from it.
1056
1057         // Slice off unneeded fragment of block, if any,
1058         // and reconnect the free list around block.
1059         if (block->size - size >= CACHE_QUANTUM) {
1060             cache_allocator_block *leftover =
1061                 (cache_allocator_block *)(size + (uintptr_t)block);
1062             leftover->size = block->size - size;
1063             leftover->state = (unsigned int)-1;
1064             leftover->nextFree = block->nextFree;
1065             *blockP = leftover;
1066         } else {
1067             *blockP = block->nextFree;
1068         }
1069
1070         // block is now exactly the right size.
1071
1072         bzero(block, size);
1073         block->size = mask;  // Cache->mask
1074         block->state = 0;    // Cache->occupied
1075
1076         return block;
1077     }
1078
1079     // No room in this region.
1080     return NULL;
1081 }
1082
1083
1084 /***********************************************************************
1085 * cache_allocator_calloc
1086 * Custom allocator for large method caches (128+ slots)
1087 * The returned cache block already has cache->mask set.
1088 * cache->occupied and the cache contents are zero.
1089 * Cache locks: cacheUpdateLock must be held by the caller
1090 **********************************************************************/
1091 static void *cache_allocator_calloc(size_t size)
1092 {
1093     cache_allocator_region *rgn;
1094
1095     OBJC_CHECK_LOCKED(&cacheUpdateLock);
1096
1097     for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1098         void *p = cache_region_calloc(rgn, size);
1099         if (p) {
1100             return p;
1101         }
1102     }
1103
1104     // No regions or all regions full - make a region and try one more time
1105     // In the unlikely case of a cache over 256KB, it will get its own region.
1106     return cache_region_calloc(cache_allocator_add_region(size), size);
1107 }
1108
1109
1110 /***********************************************************************
1111 * cache_allocator_region_for_block
1112 * Returns the cache allocator region that ptr points into, or NULL.
1113 **********************************************************************/
1114 static cache_allocator_region *cache_allocator_region_for_block(cache_allocator_block *block)
1115 {
1116     cache_allocator_region *rgn;
1117     for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1118         if (block >= rgn->start  &&  block < rgn->end) return rgn;
1119     }
1120     return NULL;
1121 }
1122
1123
1124 /***********************************************************************
1125 * cache_allocator_is_block
1126 * If ptr is a live block from the cache allocator, return YES
1127 * If ptr is a block from some other allocator, return NO.
1128 * If ptr is a dead block from the cache allocator, result is undefined.
1129 * Cache locks: cacheUpdateLock must be held by the caller
1130 **********************************************************************/
1131 static BOOL cache_allocator_is_block(void *ptr)
1132 {
1133     OBJC_CHECK_LOCKED(&cacheUpdateLock);
1134     return (cache_allocator_region_for_block((cache_allocator_block *)ptr) != NULL);
1135 }
1136
1137 /***********************************************************************
1138 * cache_allocator_free
1139 * Frees a block allocated by the cache allocator.
1140 * Cache locks: cacheUpdateLock must be held by the caller.
1141 **********************************************************************/
1142 static void cache_allocator_free(void *ptr)
1143 {
1144     cache_allocator_block *dead = (cache_allocator_block *)ptr;
1145     cache_allocator_block *cur;
1146     cache_allocator_region *rgn;
1147
1148     OBJC_CHECK_LOCKED(&cacheUpdateLock);
1149
1150     if (! (rgn = cache_allocator_region_for_block(ptr))) {
1151         // free of non-pointer
1152         _objc_inform("cache_allocator_free of non-pointer %p", ptr);
1153         return;
1154     }
1155
1156     dead->size = cache_allocator_size_for_mask(dead->size);
1157     dead->state = (unsigned int)-1;
1158
1159     if (!rgn->freeList  ||  rgn->freeList > dead) {
1160         // dead block belongs at front of free list
1161         dead->nextFree = rgn->freeList;
1162         rgn->freeList = dead;
1163         cache_allocator_coalesce(dead);
1164         return;
1165     }
1166
1167     // dead block belongs in the middle or end of free list
1168     for (cur = rgn->freeList; cur != NULL; cur = cur->nextFree) {
1169         cache_allocator_block *ahead = cur->nextFree;
1170
1171         if (!ahead  ||  ahead > dead) {
1172             // cur and ahead straddle dead, OR dead belongs at end of free list
1173             cur->nextFree = dead;
1174             dead->nextFree = ahead;
1175
1176             // coalesce into dead first in case both succeed
1177             cache_allocator_coalesce(dead);
1178             cache_allocator_coalesce(cur);
1179             return;
1180         }
1181     }
1182
1183     // uh-oh
1184     _objc_inform("cache_allocator_free of non-pointer %p", ptr);
1185 }
1186
1187 // !defined(__LP64__)
1188 #endif
1189
1190
1191
1192
1193
1194 /***********************************************************************
1195 * Cache instrumentation and debugging
1196 **********************************************************************/
1197
1198 #ifdef OBJC_INSTRUMENTED
1199 enum {
1200     CACHE_HISTOGRAM_SIZE        = 512
1201 };
1202
1203 unsigned int    CacheHitHistogram [CACHE_HISTOGRAM_SIZE];
1204 unsigned int    CacheMissHistogram [CACHE_HISTOGRAM_SIZE];
1205 #endif
1206
1207
1208 /***********************************************************************
1209 * _cache_print.
1210 **********************************************************************/
1211 static void _cache_print(Cache cache)
1212 {
1213     unsigned int index;
1214     unsigned int count;
1215
1216     count = cache->mask + 1;
1217     for (index = 0; index < count; index += 1) {
1218         cache_entry *entry = (cache_entry *)cache->buckets[index];
1219         if (entry) {
1220             if (entry->imp == &_objc_msgForward)
1221                 printf ("does not recognize: \n");
1222             printf ("%s\n", sel_getName(entry->name));
1223         }
1224     }
1225 }
1226
1227
1228 /***********************************************************************
1229 * _class_printMethodCaches.
1230 **********************************************************************/
1231 __private_extern__ void _class_printMethodCaches(Class cls)
1232 {
1233     if (_cache_isEmpty(_class_getCache(cls))) {
1234         printf("no instance-method cache for class %s\n", _class_getName(cls));
1235     } else {
1236         printf("instance-method cache for class %s:\n", _class_getName(cls));
1237         _cache_print(_class_getCache(cls));
1238     }
1239
1240     if (_cache_isEmpty(_class_getCache(((id)cls)->isa))) {
1241         printf("no class-method cache for class %s\n", _class_getName(cls));
1242     } else {
1243         printf ("class-method cache for class %s:\n", _class_getName(cls));
1244         _cache_print(_class_getCache(((id)cls)->isa));
1245     }
1246 }
1247
1248
1249 #if 0
1250 #warning fixme
1251 /***********************************************************************
1252 * log2u.
1253 **********************************************************************/
1254 static unsigned int log2u(unsigned int x)
1255 {
1256     unsigned int log;
1257
1258     log = 0;
1259     while (x >>= 1)
1260         log += 1;
1261
1262     return log;
1263 }
1264
1265
1266 /***********************************************************************
1267 * _class_printDuplicateCacheEntries.
1268 **********************************************************************/
1269 void _class_printDuplicateCacheEntries(BOOL detail)
1270 {
1271     NXHashState state;
1272     Class cls;
1273     unsigned int duplicates;
1274     unsigned int index1;
1275     unsigned int index2;
1276     unsigned int mask;
1277     unsigned int count;
1278     unsigned int isMeta;
1279     Cache cache;
1280
1281
1282     printf ("Checking for duplicate cache entries \n");
1283
1284     // Outermost loop - iterate over all classes
1285     state = NXInitHashState (class_hash);
1286     duplicates = 0;
1287     while (NXNextHashState (class_hash, &state, (void **) &cls))
1288     {
1289         // Control loop - do given class' cache, then its isa's cache
1290         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1291         {
1292             // Select cache of interest and make sure it exists
1293             cache = _class_getCache(isMeta ? ((id)cls)->isa : cls);
1294             if (_cache_isEmpty(cache))
1295                 continue;
1296
1297             // Middle loop - check each entry in the given cache
1298             mask  = cache->mask;
1299             count = mask + 1;
1300             for (index1 = 0; index1 < count; index1 += 1)
1301             {
1302                 // Skip invalid entry
1303                 if (!cache->buckets[index1])
1304                     continue;
1305
1306                 // Inner loop - check that given entry matches no later entry
1307                 for (index2 = index1 + 1; index2 < count; index2 += 1)
1308                 {
1309                     // Skip invalid entry
1310                     if (!cache->buckets[index2])
1311                         continue;
1312
1313                     // Check for duplication by method name comparison
1314                     if (strcmp ((char *) cache->buckets[index1]->name),
1315                                 (char *) cache->buckets[index2]->name)) == 0)
1316                     {
1317                         if (detail)
1318                             printf ("%s %s\n", _class_getName(cls), sel_getName(cache->buckets[index1]->name));
1319                         duplicates += 1;
1320                         break;
1321                     }
1322                 }
1323             }
1324         }
1325     }
1326
1327     // Log the findings
1328     printf ("duplicates = %d\n", duplicates);
1329     printf ("total cache fills = %d\n", totalCacheFills);
1330 }
1331
1332
1333 /***********************************************************************
1334 * PrintCacheHeader.
1335 **********************************************************************/
1336 static void PrintCacheHeader(void)
1337 {
1338 #ifdef OBJC_INSTRUMENTED
1339     printf ("Cache  Cache  Slots  Avg    Max   AvgS  MaxS  AvgS  MaxS  TotalD   AvgD  MaxD  TotalD   AvgD  MaxD  TotD  AvgD  MaxD\n");
1340     printf ("Size   Count  Used   Used   Used  Hit   Hit   Miss  Miss  Hits     Prbs  Prbs  Misses   Prbs  Prbs  Flsh  Flsh  Flsh\n");
1341     printf ("-----  -----  -----  -----  ----  ----  ----  ----  ----  -------  ----  ----  -------  ----  ----  ----  ----  ----\n");
1342 #else
1343     printf ("Cache  Cache  Slots  Avg    Max   AvgS  MaxS  AvgS  MaxS\n");
1344     printf ("Size   Count  Used   Used   Used  Hit   Hit   Miss  Miss\n");
1345     printf ("-----  -----  -----  -----  ----  ----  ----  ----  ----\n");
1346 #endif
1347 }
1348
1349
1350 /***********************************************************************
1351 * PrintCacheInfo.
1352 **********************************************************************/
1353 static void PrintCacheInfo(unsigned int cacheSize,
1354                            unsigned int cacheCount,
1355                            unsigned int slotsUsed,
1356                            float avgUsed, unsigned int maxUsed,
1357                            float avgSHit, unsigned int maxSHit,
1358                            float avgSMiss, unsigned int maxSMiss
1359 #ifdef OBJC_INSTRUMENTED
1360                            , unsigned int totDHits,
1361                            float avgDHit,
1362                            unsigned int maxDHit,
1363                            unsigned int totDMisses,
1364                            float avgDMiss,
1365                            unsigned int maxDMiss,
1366                            unsigned int totDFlsh,
1367                            float avgDFlsh,
1368                            unsigned int maxDFlsh
1369 #endif
1370                            )
1371 {
1372 #ifdef OBJC_INSTRUMENTED
1373     printf ("%5u  %5u  %5u  %5.1f  %4u  %4.1f  %4u  %4.1f  %4u  %7u  %4.1f  %4u  %7u  %4.1f  %4u  %4u  %4.1f  %4u\n",
1374 #else
1375             printf ("%5u  %5u  %5u  %5.1f  %4u  %4.1f  %4u  %4.1f  %4u\n",
1376 #endif
1377                     cacheSize, cacheCount, slotsUsed, avgUsed, maxUsed, avgSHit, maxSHit, avgSMiss, maxSMiss
1378 #ifdef OBJC_INSTRUMENTED
1379                     , totDHits, avgDHit, maxDHit, totDMisses, avgDMiss, maxDMiss, totDFlsh, avgDFlsh, maxDFlsh
1380 #endif
1381                     );
1382
1383 }
1384
1385
1386 #ifdef OBJC_INSTRUMENTED
1387 /***********************************************************************
1388 * PrintCacheHistogram.  Show the non-zero entries from the specified
1389 * cache histogram.
1390 **********************************************************************/
1391 static void PrintCacheHistogram(char *title,
1392                                 unsigned int *firstEntry,
1393                                 unsigned int entryCount)
1394 {
1395     unsigned int index;
1396     unsigned int *thisEntry;
1397
1398     printf ("%s\n", title);
1399     printf ("    Probes    Tally\n");
1400     printf ("    ------    -----\n");
1401     for (index = 0, thisEntry = firstEntry;
1402          index < entryCount;
1403          index += 1, thisEntry += 1)
1404     {
1405         if (*thisEntry == 0)
1406             continue;
1407
1408         printf ("    %6d    %5d\n", index, *thisEntry);
1409     }
1410 }
1411 #endif
1412
1413
1414 /***********************************************************************
1415 * _class_printMethodCacheStatistics.
1416 **********************************************************************/
1417
1418 #define MAX_LOG2_SIZE   32
1419 #define MAX_CHAIN_SIZE  100
1420
1421 void _class_printMethodCacheStatistics(void)
1422 {
1423     unsigned int isMeta;
1424     unsigned int index;
1425     NXHashState state;
1426     Class cls;
1427     unsigned int totalChain;
1428     unsigned int totalMissChain;
1429     unsigned int maxChain;
1430     unsigned int maxMissChain;
1431     unsigned int classCount;
1432     unsigned int negativeEntryCount;
1433     unsigned int cacheExpandCount;
1434     unsigned int cacheCountBySize[2][MAX_LOG2_SIZE]        = {{0}};
1435     unsigned int totalEntriesBySize[2][MAX_LOG2_SIZE]      = {{0}};
1436     unsigned int maxEntriesBySize[2][MAX_LOG2_SIZE]        = {{0}};
1437     unsigned int totalChainBySize[2][MAX_LOG2_SIZE]        = {{0}};
1438     unsigned int totalMissChainBySize[2][MAX_LOG2_SIZE]    = {{0}};
1439     unsigned int totalMaxChainBySize[2][MAX_LOG2_SIZE]     = {{0}};
1440     unsigned int totalMaxMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1441     unsigned int maxChainBySize[2][MAX_LOG2_SIZE]          = {{0}};
1442     unsigned int maxMissChainBySize[2][MAX_LOG2_SIZE]      = {{0}};
1443     unsigned int chainCount[MAX_CHAIN_SIZE]                = {0};
1444     unsigned int missChainCount[MAX_CHAIN_SIZE]            = {0};
1445 #ifdef OBJC_INSTRUMENTED
1446     unsigned int hitCountBySize[2][MAX_LOG2_SIZE]          = {{0}};
1447     unsigned int hitProbesBySize[2][MAX_LOG2_SIZE]         = {{0}};
1448     unsigned int maxHitProbesBySize[2][MAX_LOG2_SIZE]      = {{0}};
1449     unsigned int missCountBySize[2][MAX_LOG2_SIZE]         = {{0}};
1450     unsigned int missProbesBySize[2][MAX_LOG2_SIZE]        = {{0}};
1451     unsigned int maxMissProbesBySize[2][MAX_LOG2_SIZE]     = {{0}};
1452     unsigned int flushCountBySize[2][MAX_LOG2_SIZE]        = {{0}};
1453     unsigned int flushedEntriesBySize[2][MAX_LOG2_SIZE]    = {{0}};
1454     unsigned int maxFlushedEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1455 #endif
1456
1457     printf ("Printing cache statistics\n");
1458
1459     // Outermost loop - iterate over all classes
1460     state = NXInitHashState (class_hash);
1461     classCount = 0;
1462     negativeEntryCount = 0;
1463     cacheExpandCount = 0;
1464     while (NXNextHashState (class_hash, &state, (void **) &cls))
1465     {
1466         // Tally classes
1467         classCount += 1;
1468
1469         // Control loop - do given class' cache, then its isa's cache
1470         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1471         {
1472             Cache cache;
1473             unsigned int mask;
1474             unsigned int log2Size;
1475             unsigned int entryCount;
1476
1477             // Select cache of interest
1478             cache = _class_getCache(isMeta ? ((id)cls)->isa : cls);
1479
1480             // Ignore empty cache... should we?
1481             if (_cache_isEmpty(cache))
1482                 continue;
1483
1484             // Middle loop - do each entry in the given cache
1485             mask = cache->mask;
1486             entryCount = 0;
1487             totalChain = 0;
1488             totalMissChain = 0;
1489             maxChain = 0;
1490             maxMissChain = 0;
1491             for (index = 0; index < mask + 1; index += 1)
1492             {
1493                 cache_entry **buckets;
1494                 cache_entry *entry;
1495                 unsigned int hash;
1496                 unsigned int methodChain;
1497                 unsigned int methodMissChain;
1498                 unsigned int index2;
1499
1500                 // If entry is invalid, the only item of
1501                 // interest is that future insert hashes
1502                 // to this entry can use it directly.
1503                 buckets = (cache_entry **)cache->buckets;
1504                 if (!buckets[index])
1505                 {
1506                     missChainCount[0] += 1;
1507                     continue;
1508                 }
1509
1510                 entry = buckets[index];
1511
1512                 // Tally valid entries
1513                 entryCount += 1;
1514
1515                 // Tally "forward::" entries
1516                 if (entry->imp == &_objc_msgForward)
1517                     negativeEntryCount += 1;
1518
1519                 // Calculate search distance (chain length) for this method
1520                 // The chain may wrap around to the beginning of the table.
1521                 hash = CACHE_HASH(entry->name, mask);
1522                 if (index >= hash) methodChain = index - hash;
1523                 else methodChain = (mask+1) + index - hash;
1524
1525                 // Tally chains of this length
1526                 if (methodChain < MAX_CHAIN_SIZE)
1527                     chainCount[methodChain] += 1;
1528
1529                 // Keep sum of all chain lengths
1530                 totalChain += methodChain;
1531
1532                 // Record greatest chain length
1533                 if (methodChain > maxChain)
1534                     maxChain = methodChain;
1535
1536                 // Calculate search distance for miss that hashes here
1537                 index2 = index;
1538                 while (buckets[index2])
1539                 {
1540                     index2 += 1;
1541                     index2 &= mask;
1542                 }
1543                 methodMissChain = ((index2 - index) & mask);
1544
1545                 // Tally miss chains of this length
1546                 if (methodMissChain < MAX_CHAIN_SIZE)
1547                     missChainCount[methodMissChain] += 1;
1548
1549                 // Keep sum of all miss chain lengths in this class
1550                 totalMissChain += methodMissChain;
1551
1552                 // Record greatest miss chain length
1553                 if (methodMissChain > maxMissChain)
1554                     maxMissChain = methodMissChain;
1555             }
1556
1557             // Factor this cache into statistics about caches of the same
1558             // type and size (all caches are a power of two in size)
1559             log2Size = log2u (mask + 1);
1560             cacheCountBySize[isMeta][log2Size] += 1;
1561             totalEntriesBySize[isMeta][log2Size] += entryCount;
1562             if (entryCount > maxEntriesBySize[isMeta][log2Size])
1563                 maxEntriesBySize[isMeta][log2Size] = entryCount;
1564             totalChainBySize[isMeta][log2Size] += totalChain;
1565             totalMissChainBySize[isMeta][log2Size] += totalMissChain;
1566             totalMaxChainBySize[isMeta][log2Size] += maxChain;
1567             totalMaxMissChainBySize[isMeta][log2Size] += maxMissChain;
1568             if (maxChain > maxChainBySize[isMeta][log2Size])
1569                 maxChainBySize[isMeta][log2Size] = maxChain;
1570             if (maxMissChain > maxMissChainBySize[isMeta][log2Size])
1571                 maxMissChainBySize[isMeta][log2Size] = maxMissChain;
1572 #ifdef OBJC_INSTRUMENTED
1573             {
1574                 CacheInstrumentation *cacheData;
1575
1576                 cacheData = CACHE_INSTRUMENTATION(cache);
1577                 hitCountBySize[isMeta][log2Size] += cacheData->hitCount;
1578                 hitProbesBySize[isMeta][log2Size] += cacheData->hitProbes;
1579                 if (cacheData->maxHitProbes > maxHitProbesBySize[isMeta][log2Size])
1580                     maxHitProbesBySize[isMeta][log2Size] = cacheData->maxHitProbes;
1581                 missCountBySize[isMeta][log2Size] += cacheData->missCount;
1582                 missProbesBySize[isMeta][log2Size] += cacheData->missProbes;
1583                 if (cacheData->maxMissProbes > maxMissProbesBySize[isMeta][log2Size])
1584                     maxMissProbesBySize[isMeta][log2Size] = cacheData->maxMissProbes;
1585                 flushCountBySize[isMeta][log2Size] += cacheData->flushCount;
1586                 flushedEntriesBySize[isMeta][log2Size] += cacheData->flushedEntries;
1587                 if (cacheData->maxFlushedEntries > maxFlushedEntriesBySize[isMeta][log2Size])
1588                     maxFlushedEntriesBySize[isMeta][log2Size] = cacheData->maxFlushedEntries;
1589             }
1590 #endif
1591             // Caches start with a power of two number of entries, and grow by doubling, so
1592             // we can calculate the number of times this cache has expanded
1593             cacheExpandCount += log2Size - INIT_CACHE_SIZE_LOG2;
1594         }
1595     }
1596
1597     {
1598         unsigned int cacheCountByType[2] = {0};
1599         unsigned int totalCacheCount     = 0;
1600         unsigned int totalEntries        = 0;
1601         unsigned int maxEntries          = 0;
1602         unsigned int totalSlots          = 0;
1603 #ifdef OBJC_INSTRUMENTED
1604         unsigned int totalHitCount       = 0;
1605         unsigned int totalHitProbes      = 0;
1606         unsigned int maxHitProbes        = 0;
1607         unsigned int totalMissCount      = 0;
1608         unsigned int totalMissProbes     = 0;
1609         unsigned int maxMissProbes       = 0;
1610         unsigned int totalFlushCount     = 0;
1611         unsigned int totalFlushedEntries = 0;
1612         unsigned int maxFlushedEntries   = 0;
1613 #endif
1614
1615         totalChain = 0;
1616         maxChain = 0;
1617         totalMissChain = 0;
1618         maxMissChain = 0;
1619
1620         // Sum information over all caches
1621         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1622         {
1623             for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1624             {
1625                 cacheCountByType[isMeta] += cacheCountBySize[isMeta][index];
1626                 totalEntries += totalEntriesBySize[isMeta][index];
1627                 totalSlots += cacheCountBySize[isMeta][index] * (1 << index);
1628                 totalChain += totalChainBySize[isMeta][index];
1629                 if (maxEntriesBySize[isMeta][index] > maxEntries)
1630                     maxEntries = maxEntriesBySize[isMeta][index];
1631                 if (maxChainBySize[isMeta][index] > maxChain)
1632                     maxChain   = maxChainBySize[isMeta][index];
1633                 totalMissChain += totalMissChainBySize[isMeta][index];
1634                 if (maxMissChainBySize[isMeta][index] > maxMissChain)
1635                     maxMissChain = maxMissChainBySize[isMeta][index];
1636 #ifdef OBJC_INSTRUMENTED
1637                 totalHitCount += hitCountBySize[isMeta][index];
1638                 totalHitProbes += hitProbesBySize[isMeta][index];
1639                 if (maxHitProbesBySize[isMeta][index] > maxHitProbes)
1640                     maxHitProbes = maxHitProbesBySize[isMeta][index];
1641                 totalMissCount += missCountBySize[isMeta][index];
1642                 totalMissProbes += missProbesBySize[isMeta][index];
1643                 if (maxMissProbesBySize[isMeta][index] > maxMissProbes)
1644                     maxMissProbes = maxMissProbesBySize[isMeta][index];
1645                 totalFlushCount += flushCountBySize[isMeta][index];
1646                 totalFlushedEntries += flushedEntriesBySize[isMeta][index];
1647                 if (maxFlushedEntriesBySize[isMeta][index] > maxFlushedEntries)
1648                     maxFlushedEntries = maxFlushedEntriesBySize[isMeta][index];
1649 #endif
1650             }
1651
1652             totalCacheCount += cacheCountByType[isMeta];
1653         }
1654
1655         // Log our findings
1656         printf ("There are %u classes\n", classCount);
1657
1658         for (isMeta = 0; isMeta <= 1; isMeta += 1)
1659         {
1660             // Number of this type of class
1661             printf    ("\nThere are %u %s-method caches, broken down by size (slot count):\n",
1662                        cacheCountByType[isMeta],
1663                        isMeta ? "class" : "instance");
1664
1665             // Print header
1666             PrintCacheHeader ();
1667
1668             // Keep format consistent even if there are caches of this kind
1669             if (cacheCountByType[isMeta] == 0)
1670             {
1671                 printf ("(none)\n");
1672                 continue;
1673             }
1674
1675             // Usage information by cache size
1676             for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1677             {
1678                 unsigned int cacheCount;
1679                 unsigned int cacheSlotCount;
1680                 unsigned int cacheEntryCount;
1681
1682                 // Get number of caches of this type and size
1683                 cacheCount = cacheCountBySize[isMeta][index];
1684                 if (cacheCount == 0)
1685                     continue;
1686
1687                 // Get the cache slot count and the total number of valid entries
1688                 cacheSlotCount  = (1 << index);
1689                 cacheEntryCount = totalEntriesBySize[isMeta][index];
1690
1691                 // Give the analysis
1692                 PrintCacheInfo (cacheSlotCount,
1693                                 cacheCount,
1694                                 cacheEntryCount,
1695                                 (float) cacheEntryCount / (float) cacheCount,
1696                                 maxEntriesBySize[isMeta][index],
1697                                 (float) totalChainBySize[isMeta][index] / (float) cacheEntryCount,
1698                                 maxChainBySize[isMeta][index],
1699                                 (float) totalMissChainBySize[isMeta][index] / (float) (cacheCount * cacheSlotCount),
1700                                 maxMissChainBySize[isMeta][index]
1701 #ifdef OBJC_INSTRUMENTED
1702                                 , hitCountBySize[isMeta][index],
1703                                 hitCountBySize[isMeta][index] ?
1704                                 (float) hitProbesBySize[isMeta][index] / (float) hitCountBySize[isMeta][index] : 0.0,
1705                                 maxHitProbesBySize[isMeta][index],
1706                                 missCountBySize[isMeta][index],
1707                                 missCountBySize[isMeta][index] ?
1708                                 (float) missProbesBySize[isMeta][index] / (float) missCountBySize[isMeta][index] : 0.0,
1709                                 maxMissProbesBySize[isMeta][index],
1710                                 flushCountBySize[isMeta][index],
1711                                 flushCountBySize[isMeta][index] ?
1712                                 (float) flushedEntriesBySize[isMeta][index] / (float) flushCountBySize[isMeta][index] : 0.0,
1713                                 maxFlushedEntriesBySize[isMeta][index]
1714 #endif
1715                                 );
1716             }
1717         }
1718
1719         // Give overall numbers
1720         printf ("\nCumulative:\n");
1721         PrintCacheHeader ();
1722         PrintCacheInfo (totalSlots,
1723                         totalCacheCount,
1724                         totalEntries,
1725                         (float) totalEntries / (float) totalCacheCount,
1726                         maxEntries,
1727                         (float) totalChain / (float) totalEntries,
1728                         maxChain,
1729                         (float) totalMissChain / (float) totalSlots,
1730                         maxMissChain
1731 #ifdef OBJC_INSTRUMENTED
1732                         , totalHitCount,
1733                         totalHitCount ?
1734                         (float) totalHitProbes / (float) totalHitCount : 0.0,
1735                         maxHitProbes,
1736                         totalMissCount,
1737                         totalMissCount ?
1738                         (float) totalMissProbes / (float) totalMissCount : 0.0,
1739                         maxMissProbes,
1740                         totalFlushCount,
1741                         totalFlushCount ?
1742                         (float) totalFlushedEntries / (float) totalFlushCount : 0.0,
1743                         maxFlushedEntries
1744 #endif
1745                         );
1746
1747         printf ("\nNumber of \"forward::\" entries: %d\n", negativeEntryCount);
1748         printf ("Number of cache expansions: %d\n", cacheExpandCount);
1749 #ifdef OBJC_INSTRUMENTED
1750         printf ("flush_caches:   total calls  total visits  average visits  max visits  total classes  visits/class\n");
1751         printf ("                -----------  ------------  --------------  ----------  -------------  -------------\n");
1752         printf ("  linear        %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1753                 LinearFlushCachesCount,
1754                 LinearFlushCachesVisitedCount,
1755                 LinearFlushCachesCount ?
1756                 (float) LinearFlushCachesVisitedCount / (float) LinearFlushCachesCount : 0.0,
1757                 MaxLinearFlushCachesVisitedCount,
1758                 LinearFlushCachesVisitedCount,
1759                 1.0);
1760         printf ("  nonlinear     %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1761                 NonlinearFlushCachesCount,
1762                 NonlinearFlushCachesVisitedCount,
1763                 NonlinearFlushCachesCount ?
1764                 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesCount : 0.0,
1765                 MaxNonlinearFlushCachesVisitedCount,
1766                 NonlinearFlushCachesClassCount,
1767                 NonlinearFlushCachesClassCount ?
1768                 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesClassCount : 0.0);
1769         printf ("  ideal         %11u  %12u  %14.1f  %10u  %13u  %12.2f\n",
1770                 LinearFlushCachesCount + NonlinearFlushCachesCount,
1771                 IdealFlushCachesCount,
1772                 LinearFlushCachesCount + NonlinearFlushCachesCount ?
1773                 (float) IdealFlushCachesCount / (float) (LinearFlushCachesCount + NonlinearFlushCachesCount) : 0.0,
1774                 MaxIdealFlushCachesCount,
1775                 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount,
1776                 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount ?
1777                 (float) IdealFlushCachesCount / (float) (LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount) : 0.0);
1778
1779         PrintCacheHistogram ("\nCache hit histogram:",  &CacheHitHistogram[0],  CACHE_HISTOGRAM_SIZE);
1780         PrintCacheHistogram ("\nCache miss histogram:", &CacheMissHistogram[0], CACHE_HISTOGRAM_SIZE);
1781 #endif
1782
1783 #if 0
1784         printf ("\nLookup chains:");
1785         for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1786         {
1787             if (chainCount[index] != 0)
1788                 printf ("  %u:%u", index, chainCount[index]);
1789         }
1790
1791         printf ("\nMiss chains:");
1792         for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1793         {
1794             if (missChainCount[index] != 0)
1795                 printf ("  %u:%u", index, missChainCount[index]);
1796         }
1797
1798         printf ("\nTotal memory usage for cache data structures: %lu bytes\n",
1799                 totalCacheCount * (sizeof(struct objc_cache) - sizeof(cache_entry *)) +
1800                 totalSlots * sizeof(cache_entry *) +
1801                 negativeEntryCount * sizeof(cache_entry));
1802 #endif
1803     }
1804 }
1805
1806 #endif