]> git.saurik.com Git - apple/objc4.git/blob - runtime/objc-cache.m
objc4-437.1.tar.gz
[apple/objc4.git] / runtime / objc-cache.m
1 /*
2 * Copyright (c) 1999-2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /***********************************************************************
25 * objc-cache.m
26 * Method cache management
27 * Cache flushing
28 * Cache garbage collection
29 * Cache instrumentation
30 * Dedicated allocator for large caches
31 **********************************************************************/
32
33
34 /***********************************************************************
35 * Method cache locking (GrP 2001-1-14)
36 *
37 * For speed, objc_msgSend does not acquire any locks when it reads
38 * method caches. Instead, all cache changes are performed so that any
39 * objc_msgSend running concurrently with the cache mutator will not
40 * crash or hang or get an incorrect result from the cache.
41 *
42 * When cache memory becomes unused (e.g. the old cache after cache
43 * expansion), it is not immediately freed, because a concurrent
44 * objc_msgSend could still be using it. Instead, the memory is
45 * disconnected from the data structures and placed on a garbage list.
46 * The memory is now only accessible to instances of objc_msgSend that
47 * were running when the memory was disconnected; any further calls to
48 * objc_msgSend will not see the garbage memory because the other data
49 * structures don't point to it anymore. The collecting_in_critical
50 * function checks the PC of all threads and returns FALSE when all threads
51 * are found to be outside objc_msgSend. This means any call to objc_msgSend
52 * that could have had access to the garbage has finished or moved past the
53 * cache lookup stage, so it is safe to free the memory.
54 *
55 * All functions that modify cache data or structures must acquire the
56 * cacheUpdateLock to prevent interference from concurrent modifications.
57 * The function that frees cache garbage must acquire the cacheUpdateLock
58 * and use collecting_in_critical() to flush out cache readers.
59 * The cacheUpdateLock is also used to protect the custom allocator used
60 * for large method cache blocks.
61 *
62 * Cache readers (PC-checked by collecting_in_critical())
63 * objc_msgSend*
64 * _cache_getImp
65 * _cache_getMethod
66 *
67 * Cache writers (hold cacheUpdateLock while reading or writing; not PC-checked)
68 * _cache_fill (acquires lock)
69 * _cache_expand (only called from cache_fill)
70 * _cache_create (only called from cache_expand)
71 * bcopy (only called from instrumented cache_expand)
72 * flush_caches (acquires lock)
73 * _cache_flush (only called from cache_fill and flush_caches)
74 * _cache_collect_free (only called from cache_expand and cache_flush)
75 *
76 * UNPROTECTED cache readers (NOT thread-safe; used for debug info only)
77 * _cache_print
78 * _class_printMethodCaches
79 * _class_printDuplicateCacheEntries
80 * _class_printMethodCacheStatistics
81 *
82 * _class_lookupMethodAndLoadCache is a special case. It may read a
83 * method triplet out of one cache and store it in another cache. This
84 * is unsafe if the method triplet is a forward:: entry, because the
85 * triplet itself could be freed unless _class_lookupMethodAndLoadCache
86 * were PC-checked or used a lock. Additionally, storing the method
87 * triplet in both caches would result in double-freeing if both caches
88 * were flushed or expanded. The solution is for _cache_getMethod to
89 * ignore all entries whose implementation is _objc_msgForward_internal,
90 * so _class_lookupMethodAndLoadCache cannot look at a forward:: entry
91 * unsafely or place it in multiple caches.
92 ***********************************************************************/
93
94 #include "objc-private.h"
95 #include "hashtable2.h"
96
97 typedef struct {
98 SEL name; // same layout as struct old_method
99 void *unused;
100 IMP imp; // same layout as struct old_method
101 } cache_entry;
102
103 #if __OBJC2__
104
105 #ifndef __LP64__
106 #define CACHE_HASH(sel, mask) (((uintptr_t)(sel)>>2) & (mask))
107 #else
108 #define CACHE_HASH(sel, mask) (((unsigned int)((uintptr_t)(sel)>>3)) & (mask))
109 #endif
110
111 struct objc_cache {
112 uintptr_t mask; /* total = mask + 1 */
113 uintptr_t occupied;
114 cache_entry *buckets[1];
115 };
116
117 #define CACHE_BUCKET(e) ((cache_entry *)e)
118
119 #else
120
121 /* Most definitions are in runtime.h */
122 #define CACHE_BUCKET(e) ((Method)e)
123
124 #endif
125
126
127 /* When _class_slow_grow is non-zero, any given cache is actually grown
128 * only on the odd-numbered times it becomes full; on the even-numbered
129 * times, it is simply emptied and re-used. When this flag is zero,
130 * caches are grown every time. */
131 static const int _class_slow_grow = 1;
132
133 /* For min cache size: clear_cache=1, slow_grow=1
134 For max cache size: clear_cache=0, slow_grow=0 */
135
136 /* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
137 enum {
138 INIT_CACHE_SIZE_LOG2 = 2,
139 INIT_CACHE_SIZE = (1 << INIT_CACHE_SIZE_LOG2)
140 };
141
142
143 /* Amount of space required for `count` hash table buckets, knowing that
144 * one entry is embedded in the cache structure itself. */
145 #define TABLE_SIZE(count) ((count - 1) * sizeof(cache_entry *))
146
147
148 #if !TARGET_OS_WIN32
149 # define CACHE_ALLOCATOR
150 #endif
151
152 /* Custom cache allocator parameters.
153 * CACHE_REGION_SIZE must be a multiple of CACHE_QUANTUM. */
154 #define CACHE_ALLOCATOR_MIN 512
155 #define CACHE_QUANTUM (CACHE_ALLOCATOR_MIN+sizeof(struct objc_cache)-sizeof(cache_entry*))
156 #define CACHE_REGION_SIZE ((128*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
157 // #define CACHE_REGION_SIZE ((256*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
158
159 static uintptr_t cache_allocator_mask_for_size(size_t size)
160 {
161 return (size - sizeof(struct objc_cache)) / sizeof(cache_entry *);
162 }
163
164 static size_t cache_allocator_size_for_mask(uintptr_t mask)
165 {
166 size_t requested = sizeof(struct objc_cache) + TABLE_SIZE(mask+1);
167 size_t actual = CACHE_QUANTUM;
168 while (actual < requested) actual += CACHE_QUANTUM;
169 return actual;
170 }
171
172
173 /* Cache instrumentation data. Immediately follows the cache block itself. */
174 #ifdef OBJC_INSTRUMENTED
175 typedef struct
176 {
177 unsigned int hitCount; // cache lookup success tally
178 unsigned int hitProbes; // sum entries checked to hit
179 unsigned int maxHitProbes; // max entries checked to hit
180 unsigned int missCount; // cache lookup no-find tally
181 unsigned int missProbes; // sum entries checked to miss
182 unsigned int maxMissProbes; // max entries checked to miss
183 unsigned int flushCount; // cache flush tally
184 unsigned int flushedEntries; // sum cache entries flushed
185 unsigned int maxFlushedEntries; // max cache entries flushed
186 } CacheInstrumentation;
187
188 #define CACHE_INSTRUMENTATION(cache) (CacheInstrumentation *) &cache->buckets[cache->mask + 1];
189 #endif
190
191 /* Cache filling and flushing instrumentation */
192
193 static int totalCacheFills NOBSS = 0;
194
195 #ifdef OBJC_INSTRUMENTED
196 __private_extern__ unsigned int LinearFlushCachesCount = 0;
197 __private_extern__ unsigned int LinearFlushCachesVisitedCount = 0;
198 __private_extern__ unsigned int MaxLinearFlushCachesVisitedCount = 0;
199 __private_extern__ unsigned int NonlinearFlushCachesCount = 0;
200 __private_extern__ unsigned int NonlinearFlushCachesClassCount = 0;
201 __private_extern__ unsigned int NonlinearFlushCachesVisitedCount = 0;
202 __private_extern__ unsigned int MaxNonlinearFlushCachesVisitedCount = 0;
203 __private_extern__ unsigned int IdealFlushCachesCount = 0;
204 __private_extern__ unsigned int MaxIdealFlushCachesCount = 0;
205 #endif
206
207
208 /***********************************************************************
209 * A static empty cache. All classes initially point at this cache.
210 * When the first message is sent it misses in the cache, and when
211 * the cache is grown it checks for this case and uses malloc rather
212 * than realloc. This avoids the need to check for NULL caches in the
213 * messenger.
214 ***********************************************************************/
215
216 #ifndef OBJC_INSTRUMENTED
217 const struct objc_cache _objc_empty_cache =
218 {
219 0, // mask
220 0, // occupied
221 { NULL } // buckets
222 };
223 #else
224 // OBJC_INSTRUMENTED requires writable data immediately following emptyCache.
225 struct objc_cache _objc_empty_cache =
226 {
227 0, // mask
228 0, // occupied
229 { NULL } // buckets
230 };
231 CacheInstrumentation emptyCacheInstrumentation = {0};
232 #endif
233
234
235 /* Local prototypes */
236
237 static BOOL _cache_isEmpty(Cache cache);
238 static Cache _cache_malloc(uintptr_t slotCount);
239 static Cache _cache_create(Class cls);
240 static Cache _cache_expand(Class cls);
241 #if __OBJC2__
242 static void _cache_flush(Class cls);
243 #endif
244
245 static int _collecting_in_critical(void);
246 static void _garbage_make_room(void);
247 static void _cache_collect_free(void *data, size_t size, BOOL tryCollect);
248
249 #if defined(CACHE_ALLOCATOR)
250 static BOOL cache_allocator_is_block(void *block);
251 static void *cache_allocator_calloc(size_t size);
252 static void cache_allocator_free(void *block);
253 #endif
254
255 /***********************************************************************
256 * Cache statistics for OBJC_PRINT_CACHE_SETUP
257 **********************************************************************/
258 static unsigned int cache_counts[16];
259 static size_t cache_allocations;
260 static size_t cache_collections;
261 static size_t cache_allocator_regions;
262
263 static size_t log2u(size_t x)
264 {
265 unsigned int log;
266
267 log = 0;
268 while (x >>= 1)
269 log += 1;
270
271 return log;
272 }
273
274
275 /***********************************************************************
276 * _cache_isEmpty.
277 * Returns YES if the given cache is some empty cache.
278 * Empty caches should never be allocated on the heap.
279 **********************************************************************/
280 static BOOL _cache_isEmpty(Cache cache)
281 {
282 return (cache == NULL || cache == (Cache)&_objc_empty_cache || cache->mask == 0);
283 }
284
285
286 /***********************************************************************
287 * _cache_malloc.
288 *
289 * Called from _cache_create() and cache_expand()
290 * Cache locks: cacheUpdateLock must be held by the caller.
291 **********************************************************************/
292 static Cache _cache_malloc(uintptr_t slotCount)
293 {
294 Cache new_cache;
295 size_t size;
296
297 mutex_assert_locked(&cacheUpdateLock);
298
299 // Allocate table (why not check for failure?)
300 size = sizeof(struct objc_cache) + TABLE_SIZE(slotCount);
301 #if defined(OBJC_INSTRUMENTED)
302 // Custom cache allocator can't handle instrumentation.
303 size += sizeof(CacheInstrumentation);
304 new_cache = _calloc_internal(size, 1);
305 new_cache->mask = slotCount - 1;
306 #elif !defined(CACHE_ALLOCATOR)
307 // fixme cache allocator implementation isn't 64-bit clean
308 new_cache = _calloc_internal(size, 1);
309 new_cache->mask = slotCount - 1;
310 #else
311 if (size < CACHE_ALLOCATOR_MIN || UseInternalZone) {
312 new_cache = _calloc_internal(size, 1);
313 new_cache->mask = slotCount - 1;
314 // occupied and buckets and instrumentation are all zero
315 } else {
316 new_cache = cache_allocator_calloc(size);
317 // mask is already set
318 // occupied and buckets and instrumentation are all zero
319 }
320 #endif
321
322 if (PrintCaches) {
323 size_t bucket = log2u(slotCount);
324 if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
325 cache_counts[bucket]++;
326 }
327 cache_allocations++;
328 }
329
330 return new_cache;
331 }
332
333 /***********************************************************************
334 * _cache_free_block.
335 *
336 * Called from _cache_free() and _cache_collect_free().
337 * block may be a cache or a forward:: entry.
338 * If block is a cache, forward:: entries it points to will NOT be freed.
339 * Cache locks: cacheUpdateLock must be held by the caller.
340 **********************************************************************/
341 static void _cache_free_block(void *block)
342 {
343 mutex_assert_locked(&cacheUpdateLock);
344
345 #if !TARGET_OS_WIN32
346 if (PrintCaches) {
347 Cache cache = (Cache)block;
348 size_t slotCount = cache->mask + 1;
349 if (isPowerOf2(slotCount)) {
350 size_t bucket = log2u(slotCount);
351 if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
352 cache_counts[bucket]--;
353 }
354 }
355 }
356 #endif
357
358 #if defined(CACHE_ALLOCATOR)
359 if (cache_allocator_is_block(block)) {
360 cache_allocator_free(block);
361 } else
362 #endif
363 {
364 free(block);
365 }
366 }
367
368
369 /***********************************************************************
370 * _cache_free.
371 *
372 * Called from _objc_remove_classes_in_image().
373 * forward:: entries in the cache ARE freed.
374 * Cache locks: cacheUpdateLock must NOT be held by the caller.
375 **********************************************************************/
376 __private_extern__ void _cache_free(Cache cache)
377 {
378 unsigned int i;
379
380 mutex_lock(&cacheUpdateLock);
381
382 for (i = 0; i < cache->mask + 1; i++) {
383 cache_entry *entry = (cache_entry *)cache->buckets[i];
384 if (entry && entry->imp == &_objc_msgForward_internal) {
385 _cache_free_block(entry);
386 }
387 }
388
389 _cache_free_block(cache);
390
391 mutex_unlock(&cacheUpdateLock);
392 }
393
394
395 /***********************************************************************
396 * _cache_create.
397 *
398 * Called from _cache_expand().
399 * Cache locks: cacheUpdateLock must be held by the caller.
400 **********************************************************************/
401 static Cache _cache_create(Class cls)
402 {
403 Cache new_cache;
404
405 mutex_assert_locked(&cacheUpdateLock);
406
407 // Allocate new cache block
408 new_cache = _cache_malloc(INIT_CACHE_SIZE);
409
410 // Install the cache
411 _class_setCache(cls, new_cache);
412
413 // Clear the grow flag so that we will re-use the current storage,
414 // rather than actually grow the cache, when expanding the cache
415 // for the first time
416 if (_class_slow_grow) {
417 _class_setGrowCache(cls, NO);
418 }
419
420 // Return our creation
421 return new_cache;
422 }
423
424
425 /***********************************************************************
426 * _cache_expand.
427 *
428 * Called from _cache_fill ()
429 * Cache locks: cacheUpdateLock must be held by the caller.
430 **********************************************************************/
431 static Cache _cache_expand(Class cls)
432 {
433 Cache old_cache;
434 Cache new_cache;
435 uintptr_t slotCount;
436 uintptr_t index;
437
438 mutex_assert_locked(&cacheUpdateLock);
439
440 // First growth goes from empty cache to a real one
441 old_cache = _class_getCache(cls);
442 if (_cache_isEmpty(old_cache))
443 return _cache_create (cls);
444
445 if (_class_slow_grow) {
446 // Cache grows every other time only.
447 if (_class_shouldGrowCache(cls)) {
448 // Grow the cache this time. Don't grow next time.
449 _class_setGrowCache(cls, NO);
450 }
451 else {
452 // Reuse the current cache storage this time. Do grow next time.
453 _class_setGrowCache(cls, YES);
454
455 // Clear the valid-entry counter
456 old_cache->occupied = 0;
457
458 // Invalidate all the cache entries
459 for (index = 0; index < old_cache->mask + 1; index += 1)
460 {
461 // Remember what this entry was, so we can possibly
462 // deallocate it after the bucket has been invalidated
463 cache_entry *oldEntry = (cache_entry *)old_cache->buckets[index];
464
465 // Skip invalid entry
466 if (!oldEntry)
467 continue;
468
469 // Invalidate this entry
470 old_cache->buckets[index] = NULL;
471
472 // Deallocate "forward::" entry
473 if (oldEntry->imp == &_objc_msgForward_internal) {
474 _cache_collect_free (oldEntry, sizeof(cache_entry), NO);
475 }
476 }
477
478 // Return the same old cache, freshly emptied
479 return old_cache;
480 }
481 }
482
483 // Double the cache size
484 slotCount = (old_cache->mask + 1) << 1;
485
486 new_cache = _cache_malloc(slotCount);
487
488 #ifdef OBJC_INSTRUMENTED
489 // Propagate the instrumentation data
490 {
491 CacheInstrumentation *oldCacheData;
492 CacheInstrumentation *newCacheData;
493
494 oldCacheData = CACHE_INSTRUMENTATION(old_cache);
495 newCacheData = CACHE_INSTRUMENTATION(new_cache);
496 bcopy ((const char *)oldCacheData, (char *)newCacheData, sizeof(CacheInstrumentation));
497 }
498 #endif
499
500 // Deallocate "forward::" entries from the old cache
501 for (index = 0; index < old_cache->mask + 1; index++) {
502 cache_entry *entry = (cache_entry *)old_cache->buckets[index];
503 if (entry && entry->imp == &_objc_msgForward_internal) {
504 _cache_collect_free (entry, sizeof(cache_entry), NO);
505 }
506 }
507
508 // Install new cache
509 _class_setCache(cls, new_cache);
510
511 // Deallocate old cache, try freeing all the garbage
512 _cache_collect_free (old_cache, old_cache->mask * sizeof(cache_entry *), YES);
513 return new_cache;
514 }
515
516
517 /***********************************************************************
518 * _cache_fill. Add the specified method to the specified class' cache.
519 * Returns NO if the cache entry wasn't added: cache was busy,
520 * class is still being initialized, new entry is a duplicate.
521 *
522 * Called only from _class_lookupMethodAndLoadCache and
523 * class_respondsToMethod and _cache_addForwardEntry.
524 *
525 * Cache locks: cacheUpdateLock must not be held.
526 **********************************************************************/
527 __private_extern__ BOOL _cache_fill(Class cls, Method smt, SEL sel)
528 {
529 uintptr_t newOccupied;
530 uintptr_t index;
531 cache_entry **buckets;
532 cache_entry *entry;
533 Cache cache;
534
535 mutex_assert_unlocked(&cacheUpdateLock);
536
537 // Never cache before +initialize is done
538 if (!_class_isInitialized(cls)) {
539 return NO;
540 }
541
542 // Keep tally of cache additions
543 totalCacheFills += 1;
544
545 mutex_lock(&cacheUpdateLock);
546
547 entry = (cache_entry *)smt;
548
549 cache = _class_getCache(cls);
550
551 // Make sure the entry wasn't added to the cache by some other thread
552 // before we grabbed the cacheUpdateLock.
553 // Don't use _cache_getMethod() because _cache_getMethod() doesn't
554 // return forward:: entries.
555 if (_cache_getImp(cls, sel)) {
556 mutex_unlock(&cacheUpdateLock);
557 return NO; // entry is already cached, didn't add new one
558 }
559
560 // Use the cache as-is if it is less than 3/4 full
561 newOccupied = cache->occupied + 1;
562 if ((newOccupied * 4) <= (cache->mask + 1) * 3) {
563 // Cache is less than 3/4 full.
564 cache->occupied = newOccupied;
565 } else {
566 // Cache is too full. Expand it.
567 cache = _cache_expand (cls);
568
569 // Account for the addition
570 cache->occupied += 1;
571 }
572
573 // Insert the new entry. This can be done by either:
574 // (a) Scanning for the first unused spot. Easy!
575 // (b) Opening up an unused spot by sliding existing
576 // entries down by one. The benefit of this
577 // extra work is that it puts the most recently
578 // loaded entries closest to where the selector
579 // hash starts the search.
580 //
581 // The loop is a little more complicated because there
582 // are two kinds of entries, so there have to be two ways
583 // to slide them.
584 buckets = (cache_entry **)cache->buckets;
585 index = CACHE_HASH(sel, cache->mask);
586 for (;;)
587 {
588 // Slide existing entries down by one
589 cache_entry *saveEntry;
590
591 // Copy current entry to a local
592 saveEntry = buckets[index];
593
594 // Copy previous entry (or new entry) to current slot
595 buckets[index] = entry;
596
597 // Done if current slot had been invalid
598 if (saveEntry == NULL)
599 break;
600
601 // Prepare to copy saved value into next slot
602 entry = saveEntry;
603
604 // Move on to next slot
605 index += 1;
606 index &= cache->mask;
607 }
608
609 mutex_unlock(&cacheUpdateLock);
610
611 return YES; // successfully added new cache entry
612 }
613
614
615 /***********************************************************************
616 * _cache_addForwardEntry
617 * Add a forward:: entry for the given selector to cls's method cache.
618 * Does nothing if the cache addition fails for any reason.
619 * Called from class_respondsToMethod and _class_lookupMethodAndLoadCache.
620 * Cache locks: cacheUpdateLock must not be held.
621 **********************************************************************/
622 __private_extern__ void _cache_addForwardEntry(Class cls, SEL sel)
623 {
624 cache_entry *smt;
625
626 smt = _malloc_internal(sizeof(cache_entry));
627 smt->name = sel;
628 smt->imp = &_objc_msgForward_internal;
629 if (! _cache_fill(cls, (Method)smt, sel)) { // fixme hack
630 // Entry not added to cache. Don't leak the method struct.
631 _free_internal(smt);
632 }
633 }
634
635
636 /***********************************************************************
637 * _cache_flush. Invalidate all valid entries in the given class' cache.
638 *
639 * Called from flush_caches() and _cache_fill()
640 * Cache locks: cacheUpdateLock must be held by the caller.
641 **********************************************************************/
642 #if __OBJC2__
643 static
644 #else
645 __private_extern__
646 #endif
647 void _cache_flush(Class cls)
648 {
649 Cache cache;
650 unsigned int index;
651
652 mutex_assert_locked(&cacheUpdateLock);
653
654 // Locate cache. Ignore unused cache.
655 cache = _class_getCache(cls);
656 if (_cache_isEmpty(cache)) return;
657
658 #ifdef OBJC_INSTRUMENTED
659 {
660 CacheInstrumentation *cacheData;
661
662 // Tally this flush
663 cacheData = CACHE_INSTRUMENTATION(cache);
664 cacheData->flushCount += 1;
665 cacheData->flushedEntries += cache->occupied;
666 if (cache->occupied > cacheData->maxFlushedEntries)
667 cacheData->maxFlushedEntries = cache->occupied;
668 }
669 #endif
670
671 // Traverse the cache
672 for (index = 0; index <= cache->mask; index += 1)
673 {
674 // Remember what this entry was, so we can possibly
675 // deallocate it after the bucket has been invalidated
676 cache_entry *oldEntry = (cache_entry *)cache->buckets[index];
677
678 // Invalidate this entry
679 cache->buckets[index] = NULL;
680
681 // Deallocate "forward::" entry
682 if (oldEntry && oldEntry->imp == &_objc_msgForward_internal)
683 _cache_collect_free (oldEntry, sizeof(cache_entry), NO);
684 }
685
686 // Clear the valid-entry counter
687 cache->occupied = 0;
688 }
689
690
691 /***********************************************************************
692 * flush_cache. Flushes the instance method cache for class cls only.
693 * Use flush_caches() if cls might have in-use subclasses.
694 **********************************************************************/
695 __private_extern__ void flush_cache(Class cls)
696 {
697 if (cls) {
698 mutex_lock(&cacheUpdateLock);
699 _cache_flush(cls);
700 mutex_unlock(&cacheUpdateLock);
701 }
702 }
703
704
705 /***********************************************************************
706 * cache collection.
707 **********************************************************************/
708
709 #if !TARGET_OS_WIN32
710
711 // A sentinal (magic value) to report bad thread_get_state status
712 #define PC_SENTINEL 0
713
714 // UNIX03 compliance hack (4508809)
715 #if !__DARWIN_UNIX03
716 #define __srr0 srr0
717 #define __eip eip
718 #endif
719
720 static uintptr_t _get_pc_for_thread(thread_t thread)
721 #if defined(__i386__)
722 {
723 i386_thread_state_t state;
724 unsigned int count = i386_THREAD_STATE_COUNT;
725 kern_return_t okay = thread_get_state (thread, i386_THREAD_STATE, (thread_state_t)&state, &count);
726 return (okay == KERN_SUCCESS) ? state.__eip : PC_SENTINEL;
727 }
728 #elif defined(__ppc__)
729 {
730 ppc_thread_state_t state;
731 unsigned int count = PPC_THREAD_STATE_COUNT;
732 kern_return_t okay = thread_get_state (thread, PPC_THREAD_STATE, (thread_state_t)&state, &count);
733 return (okay == KERN_SUCCESS) ? state.__srr0 : PC_SENTINEL;
734 }
735 #elif defined(__ppc64__)
736 {
737 ppc_thread_state64_t state;
738 unsigned int count = PPC_THREAD_STATE64_COUNT;
739 kern_return_t okay = thread_get_state (thread, PPC_THREAD_STATE64, (thread_state_t)&state, &count);
740 return (okay == KERN_SUCCESS) ? state.__srr0 : PC_SENTINEL;
741 }
742 #elif defined(__x86_64__)
743 {
744 x86_thread_state64_t state;
745 unsigned int count = x86_THREAD_STATE64_COUNT;
746 kern_return_t okay = thread_get_state (thread, x86_THREAD_STATE64, (thread_state_t)&state, &count);
747 return (okay == KERN_SUCCESS) ? state.__rip : PC_SENTINEL;
748 }
749 #elif defined(__arm__)
750 {
751 arm_thread_state_t state;
752 unsigned int count = ARM_THREAD_STATE_COUNT;
753 kern_return_t okay = thread_get_state (thread, ARM_THREAD_STATE, (thread_state_t)&state, &count);
754 return (okay == KERN_SUCCESS) ? state.__pc : PC_SENTINEL;
755 }
756 #else
757 {
758 #error _get_pc_for_thread () not implemented for this architecture
759 }
760 #endif
761
762 #endif
763
764 /***********************************************************************
765 * _collecting_in_critical.
766 * Returns TRUE if some thread is currently executing a cache-reading
767 * function. Collection of cache garbage is not allowed when a cache-
768 * reading function is in progress because it might still be using
769 * the garbage memory.
770 **********************************************************************/
771 OBJC_EXPORT uintptr_t objc_entryPoints[];
772 OBJC_EXPORT uintptr_t objc_exitPoints[];
773
774 static int _collecting_in_critical(void)
775 {
776 #if TARGET_OS_WIN32
777 return TRUE;
778 #else
779 thread_act_port_array_t threads;
780 unsigned number;
781 unsigned count;
782 kern_return_t ret;
783 int result;
784
785 mach_port_t mythread = pthread_mach_thread_np(pthread_self());
786
787 // Get a list of all the threads in the current task
788 ret = task_threads (mach_task_self (), &threads, &number);
789 if (ret != KERN_SUCCESS)
790 {
791 _objc_fatal("task_thread failed (result %d)\n", ret);
792 }
793
794 // Check whether any thread is in the cache lookup code
795 result = FALSE;
796 for (count = 0; count < number; count++)
797 {
798 int region;
799 uintptr_t pc;
800
801 // Don't bother checking ourselves
802 if (threads[count] == mythread)
803 continue;
804
805 // Find out where thread is executing
806 pc = _get_pc_for_thread (threads[count]);
807
808 // Check for bad status, and if so, assume the worse (can't collect)
809 if (pc == PC_SENTINEL)
810 {
811 result = TRUE;
812 goto done;
813 }
814
815 // Check whether it is in the cache lookup code
816 for (region = 0; objc_entryPoints[region] != 0; region++)
817 {
818 if ((pc >= objc_entryPoints[region]) &&
819 (pc <= objc_exitPoints[region]))
820 {
821 result = TRUE;
822 goto done;
823 }
824 }
825 }
826
827 done:
828 // Deallocate the port rights for the threads
829 for (count = 0; count < number; count++) {
830 mach_port_deallocate(mach_task_self (), threads[count]);
831 }
832
833 // Deallocate the thread list
834 vm_deallocate (mach_task_self (), (vm_address_t) threads, sizeof(threads) * number);
835
836 // Return our finding
837 return result;
838 #endif
839 }
840
841
842 /***********************************************************************
843 * _garbage_make_room. Ensure that there is enough room for at least
844 * one more ref in the garbage.
845 **********************************************************************/
846
847 // amount of memory represented by all refs in the garbage
848 static size_t garbage_byte_size = 0;
849
850 // do not empty the garbage until garbage_byte_size gets at least this big
851 static size_t garbage_threshold = 1024;
852
853 // table of refs to free
854 static void **garbage_refs = 0;
855
856 // current number of refs in garbage_refs
857 static size_t garbage_count = 0;
858
859 // capacity of current garbage_refs
860 static size_t garbage_max = 0;
861
862 // capacity of initial garbage_refs
863 enum {
864 INIT_GARBAGE_COUNT = 128
865 };
866
867 static void _garbage_make_room(void)
868 {
869 static int first = 1;
870 volatile void *tempGarbage;
871
872 // Create the collection table the first time it is needed
873 if (first)
874 {
875 first = 0;
876 garbage_refs = _malloc_internal(INIT_GARBAGE_COUNT * sizeof(void *));
877 garbage_max = INIT_GARBAGE_COUNT;
878 }
879
880 // Double the table if it is full
881 else if (garbage_count == garbage_max)
882 {
883 tempGarbage = _realloc_internal(garbage_refs, garbage_max * 2 * sizeof(void *));
884 garbage_refs = (void **) tempGarbage;
885 garbage_max *= 2;
886 }
887 }
888
889
890 /***********************************************************************
891 * _cache_collect_free. Add the specified malloc'd memory to the list
892 * of them to free at some later point.
893 * size is used for the collection threshold. It does not have to be
894 * precisely the block's size.
895 * Cache locks: cacheUpdateLock must be held by the caller.
896 **********************************************************************/
897 static void _cache_collect_free(void *data, size_t size, BOOL tryCollect)
898 {
899 mutex_assert_locked(&cacheUpdateLock);
900
901 // Insert new element in garbage list
902 // Note that we do this even if we end up free'ing everything
903 _garbage_make_room ();
904 garbage_byte_size += size;
905 garbage_refs[garbage_count++] = data;
906
907 // Done if caller says not to clean up
908 if (!tryCollect) return;
909
910 // Done if the garbage is not full
911 if (garbage_byte_size < garbage_threshold) {
912 // if (PrintCaches) {
913 // _objc_inform ("CACHES: not collecting; not enough garbage (%zu < %zu)", garbage_byte_size, garbage_threshold);
914 // }
915 return;
916 }
917
918 // Synchronize garbage collection with objc_msgSend and other cache readers
919 if (!_collecting_in_critical ()) {
920 // No cache readers in progress - garbage is now deletable
921
922 // Log our progress
923 if (PrintCaches) {
924 cache_collections++;
925 _objc_inform ("CACHES: COLLECTING %zu bytes (%zu regions, %zu allocations, %zu collections)", garbage_byte_size, cache_allocator_regions, cache_allocations, cache_collections);
926 }
927
928 // Dispose all refs now in the garbage
929 while (garbage_count--) {
930 _cache_free_block(garbage_refs[garbage_count]);
931 }
932
933 // Clear the garbage count and total size indicator
934 garbage_count = 0;
935 garbage_byte_size = 0;
936 }
937 else {
938 // objc_msgSend (or other cache reader) is currently looking in the
939 // cache and might still be using some garbage.
940 if (PrintCaches) {
941 _objc_inform ("CACHES: not collecting; objc_msgSend in progress");
942 }
943 }
944
945 if (PrintCaches) {
946 int i;
947 size_t total = 0;
948 size_t ideal_total = 0;
949 size_t malloc_total = 0;
950 size_t local_total = 0;
951
952 for (i = 0; i < sizeof(cache_counts) / sizeof(cache_counts[0]); i++) {
953 int count = cache_counts[i];
954 int slots = 1 << i;
955 size_t size = sizeof(struct objc_cache) + TABLE_SIZE(slots);
956 size_t ideal = size;
957 #if TARGET_OS_WIN32
958 size_t malloc = size;
959 #else
960 size_t malloc = malloc_good_size(size);
961 #endif
962 size_t local = size < CACHE_ALLOCATOR_MIN ? malloc : cache_allocator_size_for_mask(cache_allocator_mask_for_size(size));
963
964 if (!count) continue;
965
966 _objc_inform("CACHES: %4d slots: %4d caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", slots, count, ideal*count, malloc*count, local*count, malloc*count-ideal*count, local*count-ideal*count);
967
968 total += count;
969 ideal_total += ideal*count;
970 malloc_total += malloc*count;
971 local_total += local*count;
972 }
973
974 _objc_inform("CACHES: total: %4zu caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", total, ideal_total, malloc_total, local_total, malloc_total-ideal_total, local_total-ideal_total);
975 }
976 }
977
978
979
980
981
982 #if defined(CACHE_ALLOCATOR)
983
984 /***********************************************************************
985 * Custom method cache allocator.
986 * Method cache block sizes are 2^slots+2 words, which is a pessimal
987 * case for the system allocator. It wastes 504 bytes per cache block
988 * with 128 or more slots, which adds up to tens of KB for an AppKit process.
989 * To save memory, the custom cache allocator below is used.
990 *
991 * The cache allocator uses 128 KB allocation regions. Few processes will
992 * require a second region. Within a region, allocation is address-ordered
993 * first fit.
994 *
995 * The cache allocator uses a quantum of 520.
996 * Cache block ideal sizes: 520, 1032, 2056, 4104
997 * Cache allocator sizes: 520, 1040, 2080, 4160
998 *
999 * Because all blocks are known to be genuine method caches, the ordinary
1000 * cache->mask and cache->occupied fields are used as block headers.
1001 * No out-of-band headers are maintained. The number of blocks will
1002 * almost always be fewer than 200, so for simplicity there is no free
1003 * list or other optimization.
1004 *
1005 * Block in use: mask != 0, occupied != -1 (mask indicates block size)
1006 * Block free: mask != 0, occupied == -1 (mask is precisely block size)
1007 *
1008 * No cache allocator functions take any locks. Instead, the caller
1009 * must hold the cacheUpdateLock.
1010 *
1011 * fixme with 128 KB regions and 520 B min block size, an allocation
1012 * bitmap would be only 32 bytes - better than free list?
1013 **********************************************************************/
1014
1015 typedef struct cache_allocator_block {
1016 uintptr_t size;
1017 uintptr_t state;
1018 struct cache_allocator_block *nextFree;
1019 } cache_allocator_block;
1020
1021 typedef struct cache_allocator_region {
1022 cache_allocator_block *start;
1023 cache_allocator_block *end; // first non-block address
1024 cache_allocator_block *freeList;
1025 struct cache_allocator_region *next;
1026 } cache_allocator_region;
1027
1028 static cache_allocator_region *cacheRegion = NULL;
1029
1030
1031 /***********************************************************************
1032 * cache_allocator_add_region
1033 * Allocates and returns a new region that can hold at least size
1034 * bytes of large method caches.
1035 * The actual size will be rounded up to a CACHE_QUANTUM boundary,
1036 * with a minimum of CACHE_REGION_SIZE.
1037 * The new region is lowest-priority for new allocations. Callers that
1038 * know the other regions are already full should allocate directly
1039 * into the returned region.
1040 **********************************************************************/
1041 static cache_allocator_region *cache_allocator_add_region(size_t size)
1042 {
1043 vm_address_t addr;
1044 cache_allocator_block *b;
1045 cache_allocator_region **rgnP;
1046 cache_allocator_region *newRegion =
1047 _calloc_internal(1, sizeof(cache_allocator_region));
1048
1049 // Round size up to quantum boundary, and apply the minimum size.
1050 size += CACHE_QUANTUM - (size % CACHE_QUANTUM);
1051 if (size < CACHE_REGION_SIZE) size = CACHE_REGION_SIZE;
1052
1053 // Allocate the region
1054 addr = 0;
1055 vm_allocate(mach_task_self(), &addr, size, 1);
1056 newRegion->start = (cache_allocator_block *)addr;
1057 newRegion->end = (cache_allocator_block *)(addr + size);
1058
1059 // Mark the first block: free and covers the entire region
1060 b = newRegion->start;
1061 b->size = size;
1062 b->state = (uintptr_t)-1;
1063 b->nextFree = NULL;
1064 newRegion->freeList = b;
1065
1066 // Add to end of the linked list of regions.
1067 // Other regions should be re-used before this one is touched.
1068 newRegion->next = NULL;
1069 rgnP = &cacheRegion;
1070 while (*rgnP) {
1071 rgnP = &(**rgnP).next;
1072 }
1073 *rgnP = newRegion;
1074
1075 cache_allocator_regions++;
1076
1077 return newRegion;
1078 }
1079
1080
1081 /***********************************************************************
1082 * cache_allocator_coalesce
1083 * Attempts to coalesce a free block with the single free block following
1084 * it in the free list, if any.
1085 **********************************************************************/
1086 static void cache_allocator_coalesce(cache_allocator_block *block)
1087 {
1088 if (block->size + (uintptr_t)block == (uintptr_t)block->nextFree) {
1089 block->size += block->nextFree->size;
1090 block->nextFree = block->nextFree->nextFree;
1091 }
1092 }
1093
1094
1095 /***********************************************************************
1096 * cache_region_calloc
1097 * Attempt to allocate a size-byte block in the given region.
1098 * Allocation is first-fit. The free list is already fully coalesced.
1099 * Returns NULL if there is not enough room in the region for the block.
1100 **********************************************************************/
1101 static void *cache_region_calloc(cache_allocator_region *rgn, size_t size)
1102 {
1103 cache_allocator_block **blockP;
1104 uintptr_t mask;
1105
1106 // Save mask for allocated block, then round size
1107 // up to CACHE_QUANTUM boundary
1108 mask = cache_allocator_mask_for_size(size);
1109 size = cache_allocator_size_for_mask(mask);
1110
1111 // Search the free list for a sufficiently large free block.
1112
1113 for (blockP = &rgn->freeList;
1114 *blockP != NULL;
1115 blockP = &(**blockP).nextFree)
1116 {
1117 cache_allocator_block *block = *blockP;
1118 if (block->size < size) continue; // not big enough
1119
1120 // block is now big enough. Allocate from it.
1121
1122 // Slice off unneeded fragment of block, if any,
1123 // and reconnect the free list around block.
1124 if (block->size - size >= CACHE_QUANTUM) {
1125 cache_allocator_block *leftover =
1126 (cache_allocator_block *)(size + (uintptr_t)block);
1127 leftover->size = block->size - size;
1128 leftover->state = (uintptr_t)-1;
1129 leftover->nextFree = block->nextFree;
1130 *blockP = leftover;
1131 } else {
1132 *blockP = block->nextFree;
1133 }
1134
1135 // block is now exactly the right size.
1136
1137 bzero(block, size);
1138 block->size = mask; // Cache->mask
1139 block->state = 0; // Cache->occupied
1140
1141 return block;
1142 }
1143
1144 // No room in this region.
1145 return NULL;
1146 }
1147
1148
1149 /***********************************************************************
1150 * cache_allocator_calloc
1151 * Custom allocator for large method caches (128+ slots)
1152 * The returned cache block already has cache->mask set.
1153 * cache->occupied and the cache contents are zero.
1154 * Cache locks: cacheUpdateLock must be held by the caller
1155 **********************************************************************/
1156 static void *cache_allocator_calloc(size_t size)
1157 {
1158 cache_allocator_region *rgn;
1159
1160 mutex_assert_locked(&cacheUpdateLock);
1161
1162 for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1163 void *p = cache_region_calloc(rgn, size);
1164 if (p) {
1165 return p;
1166 }
1167 }
1168
1169 // No regions or all regions full - make a region and try one more time
1170 // In the unlikely case of a cache over 256KB, it will get its own region.
1171 return cache_region_calloc(cache_allocator_add_region(size), size);
1172 }
1173
1174
1175 /***********************************************************************
1176 * cache_allocator_region_for_block
1177 * Returns the cache allocator region that ptr points into, or NULL.
1178 **********************************************************************/
1179 static cache_allocator_region *cache_allocator_region_for_block(cache_allocator_block *block)
1180 {
1181 cache_allocator_region *rgn;
1182 for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1183 if (block >= rgn->start && block < rgn->end) return rgn;
1184 }
1185 return NULL;
1186 }
1187
1188
1189 /***********************************************************************
1190 * cache_allocator_is_block
1191 * If ptr is a live block from the cache allocator, return YES
1192 * If ptr is a block from some other allocator, return NO.
1193 * If ptr is a dead block from the cache allocator, result is undefined.
1194 * Cache locks: cacheUpdateLock must be held by the caller
1195 **********************************************************************/
1196 static BOOL cache_allocator_is_block(void *ptr)
1197 {
1198 mutex_assert_locked(&cacheUpdateLock);
1199 return (cache_allocator_region_for_block((cache_allocator_block *)ptr) != NULL);
1200 }
1201
1202 /***********************************************************************
1203 * cache_allocator_free
1204 * Frees a block allocated by the cache allocator.
1205 * Cache locks: cacheUpdateLock must be held by the caller.
1206 **********************************************************************/
1207 static void cache_allocator_free(void *ptr)
1208 {
1209 cache_allocator_block *dead = (cache_allocator_block *)ptr;
1210 cache_allocator_block *cur;
1211 cache_allocator_region *rgn;
1212
1213 mutex_assert_locked(&cacheUpdateLock);
1214
1215 if (! (rgn = cache_allocator_region_for_block(ptr))) {
1216 // free of non-pointer
1217 _objc_inform("cache_allocator_free of non-pointer %p", ptr);
1218 return;
1219 }
1220
1221 dead->size = cache_allocator_size_for_mask(dead->size);
1222 dead->state = (uintptr_t)-1;
1223
1224 if (!rgn->freeList || rgn->freeList > dead) {
1225 // dead block belongs at front of free list
1226 dead->nextFree = rgn->freeList;
1227 rgn->freeList = dead;
1228 cache_allocator_coalesce(dead);
1229 return;
1230 }
1231
1232 // dead block belongs in the middle or end of free list
1233 for (cur = rgn->freeList; cur != NULL; cur = cur->nextFree) {
1234 cache_allocator_block *ahead = cur->nextFree;
1235
1236 if (!ahead || ahead > dead) {
1237 // cur and ahead straddle dead, OR dead belongs at end of free list
1238 cur->nextFree = dead;
1239 dead->nextFree = ahead;
1240
1241 // coalesce into dead first in case both succeed
1242 cache_allocator_coalesce(dead);
1243 cache_allocator_coalesce(cur);
1244 return;
1245 }
1246 }
1247
1248 // uh-oh
1249 _objc_inform("cache_allocator_free of non-pointer %p", ptr);
1250 }
1251
1252 // defined(CACHE_ALLOCATOR)
1253 #endif
1254
1255 /***********************************************************************
1256 * Cache instrumentation and debugging
1257 **********************************************************************/
1258
1259 #ifdef OBJC_INSTRUMENTED
1260 enum {
1261 CACHE_HISTOGRAM_SIZE = 512
1262 };
1263
1264 unsigned int CacheHitHistogram [CACHE_HISTOGRAM_SIZE];
1265 unsigned int CacheMissHistogram [CACHE_HISTOGRAM_SIZE];
1266 #endif
1267
1268
1269 /***********************************************************************
1270 * _cache_print.
1271 **********************************************************************/
1272 static void _cache_print(Cache cache)
1273 {
1274 uintptr_t index;
1275 uintptr_t count;
1276
1277 count = cache->mask + 1;
1278 for (index = 0; index < count; index += 1) {
1279 cache_entry *entry = (cache_entry *)cache->buckets[index];
1280 if (entry) {
1281 if (entry->imp == &_objc_msgForward_internal)
1282 printf ("does not recognize: \n");
1283 printf ("%s\n", sel_getName(entry->name));
1284 }
1285 }
1286 }
1287
1288
1289 /***********************************************************************
1290 * _class_printMethodCaches.
1291 **********************************************************************/
1292 __private_extern__ void _class_printMethodCaches(Class cls)
1293 {
1294 if (_cache_isEmpty(_class_getCache(cls))) {
1295 printf("no instance-method cache for class %s\n", _class_getName(cls));
1296 } else {
1297 printf("instance-method cache for class %s:\n", _class_getName(cls));
1298 _cache_print(_class_getCache(cls));
1299 }
1300
1301 if (_cache_isEmpty(_class_getCache(((id)cls)->isa))) {
1302 printf("no class-method cache for class %s\n", _class_getName(cls));
1303 } else {
1304 printf ("class-method cache for class %s:\n", _class_getName(cls));
1305 _cache_print(_class_getCache(((id)cls)->isa));
1306 }
1307 }
1308
1309
1310 #if 0
1311 #warning fixme
1312
1313
1314 /***********************************************************************
1315 * _class_printDuplicateCacheEntries.
1316 **********************************************************************/
1317 void _class_printDuplicateCacheEntries(BOOL detail)
1318 {
1319 NXHashState state;
1320 Class cls;
1321 unsigned int duplicates;
1322 unsigned int index1;
1323 unsigned int index2;
1324 unsigned int mask;
1325 unsigned int count;
1326 unsigned int isMeta;
1327 Cache cache;
1328
1329
1330 printf ("Checking for duplicate cache entries \n");
1331
1332 // Outermost loop - iterate over all classes
1333 state = NXInitHashState (class_hash);
1334 duplicates = 0;
1335 while (NXNextHashState (class_hash, &state, (void **) &cls))
1336 {
1337 // Control loop - do given class' cache, then its isa's cache
1338 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1339 {
1340 // Select cache of interest and make sure it exists
1341 cache = _class_getCache(isMeta ? ((id)cls)->isa : cls);
1342 if (_cache_isEmpty(cache))
1343 continue;
1344
1345 // Middle loop - check each entry in the given cache
1346 mask = cache->mask;
1347 count = mask + 1;
1348 for (index1 = 0; index1 < count; index1 += 1)
1349 {
1350 // Skip invalid entry
1351 if (!cache->buckets[index1])
1352 continue;
1353
1354 // Inner loop - check that given entry matches no later entry
1355 for (index2 = index1 + 1; index2 < count; index2 += 1)
1356 {
1357 // Skip invalid entry
1358 if (!cache->buckets[index2])
1359 continue;
1360
1361 // Check for duplication by method name comparison
1362 if (strcmp ((char *) cache->buckets[index1]->name),
1363 (char *) cache->buckets[index2]->name)) == 0)
1364 {
1365 if (detail)
1366 printf ("%s %s\n", _class_getName(cls), sel_getName(cache->buckets[index1]->name));
1367 duplicates += 1;
1368 break;
1369 }
1370 }
1371 }
1372 }
1373 }
1374
1375 // Log the findings
1376 printf ("duplicates = %d\n", duplicates);
1377 printf ("total cache fills = %d\n", totalCacheFills);
1378 }
1379
1380
1381 /***********************************************************************
1382 * PrintCacheHeader.
1383 **********************************************************************/
1384 static void PrintCacheHeader(void)
1385 {
1386 #ifdef OBJC_INSTRUMENTED
1387 printf ("Cache Cache Slots Avg Max AvgS MaxS AvgS MaxS TotalD AvgD MaxD TotalD AvgD MaxD TotD AvgD MaxD\n");
1388 printf ("Size Count Used Used Used Hit Hit Miss Miss Hits Prbs Prbs Misses Prbs Prbs Flsh Flsh Flsh\n");
1389 printf ("----- ----- ----- ----- ---- ---- ---- ---- ---- ------- ---- ---- ------- ---- ---- ---- ---- ----\n");
1390 #else
1391 printf ("Cache Cache Slots Avg Max AvgS MaxS AvgS MaxS\n");
1392 printf ("Size Count Used Used Used Hit Hit Miss Miss\n");
1393 printf ("----- ----- ----- ----- ---- ---- ---- ---- ----\n");
1394 #endif
1395 }
1396
1397
1398 /***********************************************************************
1399 * PrintCacheInfo.
1400 **********************************************************************/
1401 static void PrintCacheInfo(unsigned int cacheSize,
1402 unsigned int cacheCount,
1403 unsigned int slotsUsed,
1404 float avgUsed, unsigned int maxUsed,
1405 float avgSHit, unsigned int maxSHit,
1406 float avgSMiss, unsigned int maxSMiss
1407 #ifdef OBJC_INSTRUMENTED
1408 , unsigned int totDHits,
1409 float avgDHit,
1410 unsigned int maxDHit,
1411 unsigned int totDMisses,
1412 float avgDMiss,
1413 unsigned int maxDMiss,
1414 unsigned int totDFlsh,
1415 float avgDFlsh,
1416 unsigned int maxDFlsh
1417 #endif
1418 )
1419 {
1420 #ifdef OBJC_INSTRUMENTED
1421 printf ("%5u %5u %5u %5.1f %4u %4.1f %4u %4.1f %4u %7u %4.1f %4u %7u %4.1f %4u %4u %4.1f %4u\n",
1422 #else
1423 printf ("%5u %5u %5u %5.1f %4u %4.1f %4u %4.1f %4u\n",
1424 #endif
1425 cacheSize, cacheCount, slotsUsed, avgUsed, maxUsed, avgSHit, maxSHit, avgSMiss, maxSMiss
1426 #ifdef OBJC_INSTRUMENTED
1427 , totDHits, avgDHit, maxDHit, totDMisses, avgDMiss, maxDMiss, totDFlsh, avgDFlsh, maxDFlsh
1428 #endif
1429 );
1430
1431 }
1432
1433
1434 #ifdef OBJC_INSTRUMENTED
1435 /***********************************************************************
1436 * PrintCacheHistogram. Show the non-zero entries from the specified
1437 * cache histogram.
1438 **********************************************************************/
1439 static void PrintCacheHistogram(char *title,
1440 unsigned int *firstEntry,
1441 unsigned int entryCount)
1442 {
1443 unsigned int index;
1444 unsigned int *thisEntry;
1445
1446 printf ("%s\n", title);
1447 printf (" Probes Tally\n");
1448 printf (" ------ -----\n");
1449 for (index = 0, thisEntry = firstEntry;
1450 index < entryCount;
1451 index += 1, thisEntry += 1)
1452 {
1453 if (*thisEntry == 0)
1454 continue;
1455
1456 printf (" %6d %5d\n", index, *thisEntry);
1457 }
1458 }
1459 #endif
1460
1461
1462 /***********************************************************************
1463 * _class_printMethodCacheStatistics.
1464 **********************************************************************/
1465
1466 #define MAX_LOG2_SIZE 32
1467 #define MAX_CHAIN_SIZE 100
1468
1469 void _class_printMethodCacheStatistics(void)
1470 {
1471 unsigned int isMeta;
1472 unsigned int index;
1473 NXHashState state;
1474 Class cls;
1475 unsigned int totalChain;
1476 unsigned int totalMissChain;
1477 unsigned int maxChain;
1478 unsigned int maxMissChain;
1479 unsigned int classCount;
1480 unsigned int negativeEntryCount;
1481 unsigned int cacheExpandCount;
1482 unsigned int cacheCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1483 unsigned int totalEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1484 unsigned int maxEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1485 unsigned int totalChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1486 unsigned int totalMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1487 unsigned int totalMaxChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1488 unsigned int totalMaxMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1489 unsigned int maxChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1490 unsigned int maxMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1491 unsigned int chainCount[MAX_CHAIN_SIZE] = {0};
1492 unsigned int missChainCount[MAX_CHAIN_SIZE] = {0};
1493 #ifdef OBJC_INSTRUMENTED
1494 unsigned int hitCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1495 unsigned int hitProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1496 unsigned int maxHitProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1497 unsigned int missCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1498 unsigned int missProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1499 unsigned int maxMissProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1500 unsigned int flushCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1501 unsigned int flushedEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1502 unsigned int maxFlushedEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1503 #endif
1504
1505 printf ("Printing cache statistics\n");
1506
1507 // Outermost loop - iterate over all classes
1508 state = NXInitHashState (class_hash);
1509 classCount = 0;
1510 negativeEntryCount = 0;
1511 cacheExpandCount = 0;
1512 while (NXNextHashState (class_hash, &state, (void **) &cls))
1513 {
1514 // Tally classes
1515 classCount += 1;
1516
1517 // Control loop - do given class' cache, then its isa's cache
1518 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1519 {
1520 Cache cache;
1521 unsigned int mask;
1522 unsigned int log2Size;
1523 unsigned int entryCount;
1524
1525 // Select cache of interest
1526 cache = _class_getCache(isMeta ? ((id)cls)->isa : cls);
1527
1528 // Ignore empty cache... should we?
1529 if (_cache_isEmpty(cache))
1530 continue;
1531
1532 // Middle loop - do each entry in the given cache
1533 mask = cache->mask;
1534 entryCount = 0;
1535 totalChain = 0;
1536 totalMissChain = 0;
1537 maxChain = 0;
1538 maxMissChain = 0;
1539 for (index = 0; index < mask + 1; index += 1)
1540 {
1541 cache_entry **buckets;
1542 cache_entry *entry;
1543 unsigned int hash;
1544 unsigned int methodChain;
1545 unsigned int methodMissChain;
1546 unsigned int index2;
1547
1548 // If entry is invalid, the only item of
1549 // interest is that future insert hashes
1550 // to this entry can use it directly.
1551 buckets = (cache_entry **)cache->buckets;
1552 if (!buckets[index])
1553 {
1554 missChainCount[0] += 1;
1555 continue;
1556 }
1557
1558 entry = buckets[index];
1559
1560 // Tally valid entries
1561 entryCount += 1;
1562
1563 // Tally "forward::" entries
1564 if (entry->imp == &_objc_msgForward_internal)
1565 negativeEntryCount += 1;
1566
1567 // Calculate search distance (chain length) for this method
1568 // The chain may wrap around to the beginning of the table.
1569 hash = CACHE_HASH(entry->name, mask);
1570 if (index >= hash) methodChain = index - hash;
1571 else methodChain = (mask+1) + index - hash;
1572
1573 // Tally chains of this length
1574 if (methodChain < MAX_CHAIN_SIZE)
1575 chainCount[methodChain] += 1;
1576
1577 // Keep sum of all chain lengths
1578 totalChain += methodChain;
1579
1580 // Record greatest chain length
1581 if (methodChain > maxChain)
1582 maxChain = methodChain;
1583
1584 // Calculate search distance for miss that hashes here
1585 index2 = index;
1586 while (buckets[index2])
1587 {
1588 index2 += 1;
1589 index2 &= mask;
1590 }
1591 methodMissChain = ((index2 - index) & mask);
1592
1593 // Tally miss chains of this length
1594 if (methodMissChain < MAX_CHAIN_SIZE)
1595 missChainCount[methodMissChain] += 1;
1596
1597 // Keep sum of all miss chain lengths in this class
1598 totalMissChain += methodMissChain;
1599
1600 // Record greatest miss chain length
1601 if (methodMissChain > maxMissChain)
1602 maxMissChain = methodMissChain;
1603 }
1604
1605 // Factor this cache into statistics about caches of the same
1606 // type and size (all caches are a power of two in size)
1607 log2Size = log2u (mask + 1);
1608 cacheCountBySize[isMeta][log2Size] += 1;
1609 totalEntriesBySize[isMeta][log2Size] += entryCount;
1610 if (entryCount > maxEntriesBySize[isMeta][log2Size])
1611 maxEntriesBySize[isMeta][log2Size] = entryCount;
1612 totalChainBySize[isMeta][log2Size] += totalChain;
1613 totalMissChainBySize[isMeta][log2Size] += totalMissChain;
1614 totalMaxChainBySize[isMeta][log2Size] += maxChain;
1615 totalMaxMissChainBySize[isMeta][log2Size] += maxMissChain;
1616 if (maxChain > maxChainBySize[isMeta][log2Size])
1617 maxChainBySize[isMeta][log2Size] = maxChain;
1618 if (maxMissChain > maxMissChainBySize[isMeta][log2Size])
1619 maxMissChainBySize[isMeta][log2Size] = maxMissChain;
1620 #ifdef OBJC_INSTRUMENTED
1621 {
1622 CacheInstrumentation *cacheData;
1623
1624 cacheData = CACHE_INSTRUMENTATION(cache);
1625 hitCountBySize[isMeta][log2Size] += cacheData->hitCount;
1626 hitProbesBySize[isMeta][log2Size] += cacheData->hitProbes;
1627 if (cacheData->maxHitProbes > maxHitProbesBySize[isMeta][log2Size])
1628 maxHitProbesBySize[isMeta][log2Size] = cacheData->maxHitProbes;
1629 missCountBySize[isMeta][log2Size] += cacheData->missCount;
1630 missProbesBySize[isMeta][log2Size] += cacheData->missProbes;
1631 if (cacheData->maxMissProbes > maxMissProbesBySize[isMeta][log2Size])
1632 maxMissProbesBySize[isMeta][log2Size] = cacheData->maxMissProbes;
1633 flushCountBySize[isMeta][log2Size] += cacheData->flushCount;
1634 flushedEntriesBySize[isMeta][log2Size] += cacheData->flushedEntries;
1635 if (cacheData->maxFlushedEntries > maxFlushedEntriesBySize[isMeta][log2Size])
1636 maxFlushedEntriesBySize[isMeta][log2Size] = cacheData->maxFlushedEntries;
1637 }
1638 #endif
1639 // Caches start with a power of two number of entries, and grow by doubling, so
1640 // we can calculate the number of times this cache has expanded
1641 cacheExpandCount += log2Size - INIT_CACHE_SIZE_LOG2;
1642 }
1643 }
1644
1645 {
1646 unsigned int cacheCountByType[2] = {0};
1647 unsigned int totalCacheCount = 0;
1648 unsigned int totalEntries = 0;
1649 unsigned int maxEntries = 0;
1650 unsigned int totalSlots = 0;
1651 #ifdef OBJC_INSTRUMENTED
1652 unsigned int totalHitCount = 0;
1653 unsigned int totalHitProbes = 0;
1654 unsigned int maxHitProbes = 0;
1655 unsigned int totalMissCount = 0;
1656 unsigned int totalMissProbes = 0;
1657 unsigned int maxMissProbes = 0;
1658 unsigned int totalFlushCount = 0;
1659 unsigned int totalFlushedEntries = 0;
1660 unsigned int maxFlushedEntries = 0;
1661 #endif
1662
1663 totalChain = 0;
1664 maxChain = 0;
1665 totalMissChain = 0;
1666 maxMissChain = 0;
1667
1668 // Sum information over all caches
1669 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1670 {
1671 for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1672 {
1673 cacheCountByType[isMeta] += cacheCountBySize[isMeta][index];
1674 totalEntries += totalEntriesBySize[isMeta][index];
1675 totalSlots += cacheCountBySize[isMeta][index] * (1 << index);
1676 totalChain += totalChainBySize[isMeta][index];
1677 if (maxEntriesBySize[isMeta][index] > maxEntries)
1678 maxEntries = maxEntriesBySize[isMeta][index];
1679 if (maxChainBySize[isMeta][index] > maxChain)
1680 maxChain = maxChainBySize[isMeta][index];
1681 totalMissChain += totalMissChainBySize[isMeta][index];
1682 if (maxMissChainBySize[isMeta][index] > maxMissChain)
1683 maxMissChain = maxMissChainBySize[isMeta][index];
1684 #ifdef OBJC_INSTRUMENTED
1685 totalHitCount += hitCountBySize[isMeta][index];
1686 totalHitProbes += hitProbesBySize[isMeta][index];
1687 if (maxHitProbesBySize[isMeta][index] > maxHitProbes)
1688 maxHitProbes = maxHitProbesBySize[isMeta][index];
1689 totalMissCount += missCountBySize[isMeta][index];
1690 totalMissProbes += missProbesBySize[isMeta][index];
1691 if (maxMissProbesBySize[isMeta][index] > maxMissProbes)
1692 maxMissProbes = maxMissProbesBySize[isMeta][index];
1693 totalFlushCount += flushCountBySize[isMeta][index];
1694 totalFlushedEntries += flushedEntriesBySize[isMeta][index];
1695 if (maxFlushedEntriesBySize[isMeta][index] > maxFlushedEntries)
1696 maxFlushedEntries = maxFlushedEntriesBySize[isMeta][index];
1697 #endif
1698 }
1699
1700 totalCacheCount += cacheCountByType[isMeta];
1701 }
1702
1703 // Log our findings
1704 printf ("There are %u classes\n", classCount);
1705
1706 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1707 {
1708 // Number of this type of class
1709 printf ("\nThere are %u %s-method caches, broken down by size (slot count):\n",
1710 cacheCountByType[isMeta],
1711 isMeta ? "class" : "instance");
1712
1713 // Print header
1714 PrintCacheHeader ();
1715
1716 // Keep format consistent even if there are caches of this kind
1717 if (cacheCountByType[isMeta] == 0)
1718 {
1719 printf ("(none)\n");
1720 continue;
1721 }
1722
1723 // Usage information by cache size
1724 for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1725 {
1726 unsigned int cacheCount;
1727 unsigned int cacheSlotCount;
1728 unsigned int cacheEntryCount;
1729
1730 // Get number of caches of this type and size
1731 cacheCount = cacheCountBySize[isMeta][index];
1732 if (cacheCount == 0)
1733 continue;
1734
1735 // Get the cache slot count and the total number of valid entries
1736 cacheSlotCount = (1 << index);
1737 cacheEntryCount = totalEntriesBySize[isMeta][index];
1738
1739 // Give the analysis
1740 PrintCacheInfo (cacheSlotCount,
1741 cacheCount,
1742 cacheEntryCount,
1743 (float) cacheEntryCount / (float) cacheCount,
1744 maxEntriesBySize[isMeta][index],
1745 (float) totalChainBySize[isMeta][index] / (float) cacheEntryCount,
1746 maxChainBySize[isMeta][index],
1747 (float) totalMissChainBySize[isMeta][index] / (float) (cacheCount * cacheSlotCount),
1748 maxMissChainBySize[isMeta][index]
1749 #ifdef OBJC_INSTRUMENTED
1750 , hitCountBySize[isMeta][index],
1751 hitCountBySize[isMeta][index] ?
1752 (float) hitProbesBySize[isMeta][index] / (float) hitCountBySize[isMeta][index] : 0.0,
1753 maxHitProbesBySize[isMeta][index],
1754 missCountBySize[isMeta][index],
1755 missCountBySize[isMeta][index] ?
1756 (float) missProbesBySize[isMeta][index] / (float) missCountBySize[isMeta][index] : 0.0,
1757 maxMissProbesBySize[isMeta][index],
1758 flushCountBySize[isMeta][index],
1759 flushCountBySize[isMeta][index] ?
1760 (float) flushedEntriesBySize[isMeta][index] / (float) flushCountBySize[isMeta][index] : 0.0,
1761 maxFlushedEntriesBySize[isMeta][index]
1762 #endif
1763 );
1764 }
1765 }
1766
1767 // Give overall numbers
1768 printf ("\nCumulative:\n");
1769 PrintCacheHeader ();
1770 PrintCacheInfo (totalSlots,
1771 totalCacheCount,
1772 totalEntries,
1773 (float) totalEntries / (float) totalCacheCount,
1774 maxEntries,
1775 (float) totalChain / (float) totalEntries,
1776 maxChain,
1777 (float) totalMissChain / (float) totalSlots,
1778 maxMissChain
1779 #ifdef OBJC_INSTRUMENTED
1780 , totalHitCount,
1781 totalHitCount ?
1782 (float) totalHitProbes / (float) totalHitCount : 0.0,
1783 maxHitProbes,
1784 totalMissCount,
1785 totalMissCount ?
1786 (float) totalMissProbes / (float) totalMissCount : 0.0,
1787 maxMissProbes,
1788 totalFlushCount,
1789 totalFlushCount ?
1790 (float) totalFlushedEntries / (float) totalFlushCount : 0.0,
1791 maxFlushedEntries
1792 #endif
1793 );
1794
1795 printf ("\nNumber of \"forward::\" entries: %d\n", negativeEntryCount);
1796 printf ("Number of cache expansions: %d\n", cacheExpandCount);
1797 #ifdef OBJC_INSTRUMENTED
1798 printf ("flush_caches: total calls total visits average visits max visits total classes visits/class\n");
1799 printf (" ----------- ------------ -------------- ---------- ------------- -------------\n");
1800 printf (" linear %11u %12u %14.1f %10u %13u %12.2f\n",
1801 LinearFlushCachesCount,
1802 LinearFlushCachesVisitedCount,
1803 LinearFlushCachesCount ?
1804 (float) LinearFlushCachesVisitedCount / (float) LinearFlushCachesCount : 0.0,
1805 MaxLinearFlushCachesVisitedCount,
1806 LinearFlushCachesVisitedCount,
1807 1.0);
1808 printf (" nonlinear %11u %12u %14.1f %10u %13u %12.2f\n",
1809 NonlinearFlushCachesCount,
1810 NonlinearFlushCachesVisitedCount,
1811 NonlinearFlushCachesCount ?
1812 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesCount : 0.0,
1813 MaxNonlinearFlushCachesVisitedCount,
1814 NonlinearFlushCachesClassCount,
1815 NonlinearFlushCachesClassCount ?
1816 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesClassCount : 0.0);
1817 printf (" ideal %11u %12u %14.1f %10u %13u %12.2f\n",
1818 LinearFlushCachesCount + NonlinearFlushCachesCount,
1819 IdealFlushCachesCount,
1820 LinearFlushCachesCount + NonlinearFlushCachesCount ?
1821 (float) IdealFlushCachesCount / (float) (LinearFlushCachesCount + NonlinearFlushCachesCount) : 0.0,
1822 MaxIdealFlushCachesCount,
1823 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount,
1824 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount ?
1825 (float) IdealFlushCachesCount / (float) (LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount) : 0.0);
1826
1827 PrintCacheHistogram ("\nCache hit histogram:", &CacheHitHistogram[0], CACHE_HISTOGRAM_SIZE);
1828 PrintCacheHistogram ("\nCache miss histogram:", &CacheMissHistogram[0], CACHE_HISTOGRAM_SIZE);
1829 #endif
1830
1831 #if 0
1832 printf ("\nLookup chains:");
1833 for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1834 {
1835 if (chainCount[index] != 0)
1836 printf (" %u:%u", index, chainCount[index]);
1837 }
1838
1839 printf ("\nMiss chains:");
1840 for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1841 {
1842 if (missChainCount[index] != 0)
1843 printf (" %u:%u", index, missChainCount[index]);
1844 }
1845
1846 printf ("\nTotal memory usage for cache data structures: %lu bytes\n",
1847 totalCacheCount * (sizeof(struct objc_cache) - sizeof(cache_entry *)) +
1848 totalSlots * sizeof(cache_entry *) +
1849 negativeEntryCount * sizeof(cache_entry));
1850 #endif
1851 }
1852 }
1853
1854 #endif