]> git.saurik.com Git - apple/objc4.git/blob - runtime/objc-cache.mm
objc4-532.tar.gz
[apple/objc4.git] / runtime / objc-cache.mm
1 /*
2 * Copyright (c) 1999-2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /***********************************************************************
25 * objc-cache.m
26 * Method cache management
27 * Cache flushing
28 * Cache garbage collection
29 * Cache instrumentation
30 * Dedicated allocator for large caches
31 **********************************************************************/
32
33
34 /***********************************************************************
35 * Method cache locking (GrP 2001-1-14)
36 *
37 * For speed, objc_msgSend does not acquire any locks when it reads
38 * method caches. Instead, all cache changes are performed so that any
39 * objc_msgSend running concurrently with the cache mutator will not
40 * crash or hang or get an incorrect result from the cache.
41 *
42 * When cache memory becomes unused (e.g. the old cache after cache
43 * expansion), it is not immediately freed, because a concurrent
44 * objc_msgSend could still be using it. Instead, the memory is
45 * disconnected from the data structures and placed on a garbage list.
46 * The memory is now only accessible to instances of objc_msgSend that
47 * were running when the memory was disconnected; any further calls to
48 * objc_msgSend will not see the garbage memory because the other data
49 * structures don't point to it anymore. The collecting_in_critical
50 * function checks the PC of all threads and returns FALSE when all threads
51 * are found to be outside objc_msgSend. This means any call to objc_msgSend
52 * that could have had access to the garbage has finished or moved past the
53 * cache lookup stage, so it is safe to free the memory.
54 *
55 * All functions that modify cache data or structures must acquire the
56 * cacheUpdateLock to prevent interference from concurrent modifications.
57 * The function that frees cache garbage must acquire the cacheUpdateLock
58 * and use collecting_in_critical() to flush out cache readers.
59 * The cacheUpdateLock is also used to protect the custom allocator used
60 * for large method cache blocks.
61 *
62 * Cache readers (PC-checked by collecting_in_critical())
63 * objc_msgSend*
64 * _cache_getImp
65 * _cache_getMethod
66 *
67 * Cache writers (hold cacheUpdateLock while reading or writing; not PC-checked)
68 * _cache_fill (acquires lock)
69 * _cache_expand (only called from cache_fill)
70 * _cache_create (only called from cache_expand)
71 * bcopy (only called from instrumented cache_expand)
72 * flush_caches (acquires lock)
73 * _cache_flush (only called from cache_fill and flush_caches)
74 * _cache_collect_free (only called from cache_expand and cache_flush)
75 *
76 * UNPROTECTED cache readers (NOT thread-safe; used for debug info only)
77 * _cache_print
78 * _class_printMethodCaches
79 * _class_printDuplicateCacheEntries
80 * _class_printMethodCacheStatistics
81 *
82 * _class_lookupMethodAndLoadCache is a special case. It may read a
83 * method triplet out of one cache and store it in another cache. This
84 * is unsafe if the method triplet is a forward:: entry, because the
85 * triplet itself could be freed unless _class_lookupMethodAndLoadCache
86 * were PC-checked or used a lock. Additionally, storing the method
87 * triplet in both caches would result in double-freeing if both caches
88 * were flushed or expanded. The solution is for _cache_getMethod to
89 * ignore all entries whose implementation is _objc_msgForward_internal,
90 * so _class_lookupMethodAndLoadCache cannot look at a forward:: entry
91 * unsafely or place it in multiple caches.
92 ***********************************************************************/
93
94 #include "objc-private.h"
95 #include "hashtable2.h"
96
97 typedef struct {
98 SEL name; // same layout as struct old_method
99 void *unused;
100 IMP imp; // same layout as struct old_method
101 } cache_entry;
102
103 #if __OBJC2__
104
105 #ifndef __LP64__
106 # define CACHE_HASH(sel, mask) (((uintptr_t)(sel)>>2) & (mask))
107 #else
108 # define CACHE_HASH(sel, mask) (((unsigned int)((uintptr_t)(sel)>>0)) & (mask))
109 #endif
110
111 struct objc_cache {
112 uintptr_t mask; /* total = mask + 1 */
113 uintptr_t occupied;
114 cache_entry *buckets[1];
115 };
116
117 #define CACHE_BUCKET(e) ((cache_entry *)e)
118
119 #else
120
121 /* Most definitions are in runtime.h */
122 #define CACHE_BUCKET(e) ((Method)e)
123
124 #endif
125
126
127 /* When _class_slow_grow is non-zero, any given cache is actually grown
128 * only on the odd-numbered times it becomes full; on the even-numbered
129 * times, it is simply emptied and re-used. When this flag is zero,
130 * caches are grown every time. */
131 static const int _class_slow_grow = 1;
132
133 /* For min cache size: clear_cache=1, slow_grow=1
134 For max cache size: clear_cache=0, slow_grow=0 */
135
136 /* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
137 enum {
138 INIT_CACHE_SIZE_LOG2 = 2,
139 INIT_CACHE_SIZE = (1 << INIT_CACHE_SIZE_LOG2)
140 };
141
142
143 /* Amount of space required for `count` hash table buckets, knowing that
144 * one entry is embedded in the cache structure itself. */
145 #define TABLE_SIZE(count) ((count - 1) * sizeof(cache_entry *))
146
147
148 #if !TARGET_OS_WIN32
149 # define CACHE_ALLOCATOR
150 #endif
151
152 /* Custom cache allocator parameters.
153 * CACHE_REGION_SIZE must be a multiple of CACHE_QUANTUM. */
154 #define CACHE_ALLOCATOR_MIN 512
155 #define CACHE_QUANTUM (CACHE_ALLOCATOR_MIN+sizeof(struct objc_cache)-sizeof(cache_entry*))
156 #define CACHE_REGION_SIZE ((128*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
157 // #define CACHE_REGION_SIZE ((256*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
158
159 static uintptr_t cache_allocator_mask_for_size(size_t size)
160 {
161 return (size - sizeof(struct objc_cache)) / sizeof(cache_entry *);
162 }
163
164 static size_t cache_allocator_size_for_mask(uintptr_t mask)
165 {
166 size_t requested = sizeof(struct objc_cache) + TABLE_SIZE(mask+1);
167 size_t actual = CACHE_QUANTUM;
168 while (actual < requested) actual += CACHE_QUANTUM;
169 return actual;
170 }
171
172
173 /* Cache instrumentation data. Immediately follows the cache block itself. */
174 #ifdef OBJC_INSTRUMENTED
175 typedef struct
176 {
177 unsigned int hitCount; // cache lookup success tally
178 unsigned int hitProbes; // sum entries checked to hit
179 unsigned int maxHitProbes; // max entries checked to hit
180 unsigned int missCount; // cache lookup no-find tally
181 unsigned int missProbes; // sum entries checked to miss
182 unsigned int maxMissProbes; // max entries checked to miss
183 unsigned int flushCount; // cache flush tally
184 unsigned int flushedEntries; // sum cache entries flushed
185 unsigned int maxFlushedEntries; // max cache entries flushed
186 } CacheInstrumentation;
187
188 #define CACHE_INSTRUMENTATION(cache) (CacheInstrumentation *) &cache->buckets[cache->mask + 1];
189 #endif
190
191 /* Cache filling and flushing instrumentation */
192
193 static int totalCacheFills = 0;
194
195 #ifdef OBJC_INSTRUMENTED
196 unsigned int LinearFlushCachesCount = 0;
197 unsigned int LinearFlushCachesVisitedCount = 0;
198 unsigned int MaxLinearFlushCachesVisitedCount = 0;
199 unsigned int NonlinearFlushCachesCount = 0;
200 unsigned int NonlinearFlushCachesClassCount = 0;
201 unsigned int NonlinearFlushCachesVisitedCount = 0;
202 unsigned int MaxNonlinearFlushCachesVisitedCount = 0;
203 unsigned int IdealFlushCachesCount = 0;
204 unsigned int MaxIdealFlushCachesCount = 0;
205 #endif
206
207
208 /***********************************************************************
209 * A static empty cache. All classes initially point at this cache.
210 * When the first message is sent it misses in the cache, and when
211 * the cache is grown it checks for this case and uses malloc rather
212 * than realloc. This avoids the need to check for NULL caches in the
213 * messenger.
214 ***********************************************************************/
215
216 struct objc_cache _objc_empty_cache =
217 {
218 0, // mask
219 0, // occupied
220 { NULL } // buckets
221 };
222 #ifdef OBJC_INSTRUMENTED
223 CacheInstrumentation emptyCacheInstrumentation = {0};
224 #endif
225
226
227 /* Local prototypes */
228
229 static BOOL _cache_isEmpty(Cache cache);
230 static Cache _cache_malloc(uintptr_t slotCount);
231 static Cache _cache_create(Class cls);
232 static Cache _cache_expand(Class cls);
233 #if __OBJC2__
234 static void _cache_flush(Class cls);
235 #else
236 extern "C" void _cache_flush(Class cls);
237 #endif
238
239 static int _collecting_in_critical(void);
240 static void _garbage_make_room(void);
241 static void _cache_collect_free(void *data, size_t size);
242
243 #if defined(CACHE_ALLOCATOR)
244 static BOOL cache_allocator_is_block(void *block);
245 static Cache cache_allocator_calloc(size_t size);
246 static void cache_allocator_free(void *block);
247 #endif
248
249 /***********************************************************************
250 * Cache statistics for OBJC_PRINT_CACHE_SETUP
251 **********************************************************************/
252 static unsigned int cache_counts[16];
253 static size_t cache_allocations;
254 static size_t cache_collections;
255 static size_t cache_allocator_regions;
256
257 static size_t log2u(size_t x)
258 {
259 unsigned int log;
260
261 log = 0;
262 while (x >>= 1)
263 log += 1;
264
265 return log;
266 }
267
268
269 /***********************************************************************
270 * _cache_isEmpty.
271 * Returns YES if the given cache is some empty cache.
272 * Empty caches should never be allocated on the heap.
273 **********************************************************************/
274 static BOOL _cache_isEmpty(Cache cache)
275 {
276 return (cache == NULL || cache == (Cache)&_objc_empty_cache || cache->mask == 0);
277 }
278
279
280 /***********************************************************************
281 * _cache_malloc.
282 *
283 * Called from _cache_create() and cache_expand()
284 * Cache locks: cacheUpdateLock must be held by the caller.
285 **********************************************************************/
286 static Cache _cache_malloc(uintptr_t slotCount)
287 {
288 Cache new_cache;
289 size_t size;
290
291 mutex_assert_locked(&cacheUpdateLock);
292
293 // Allocate table (why not check for failure?)
294 size = sizeof(struct objc_cache) + TABLE_SIZE(slotCount);
295 #if defined(OBJC_INSTRUMENTED)
296 // Custom cache allocator can't handle instrumentation.
297 size += sizeof(CacheInstrumentation);
298 new_cache = _calloc_internal(size, 1);
299 new_cache->mask = slotCount - 1;
300 #elif !defined(CACHE_ALLOCATOR)
301 // fixme cache allocator implementation isn't 64-bit clean
302 new_cache = _calloc_internal(size, 1);
303 new_cache->mask = (unsigned int)(slotCount - 1);
304 #else
305 if (size < CACHE_ALLOCATOR_MIN || UseInternalZone) {
306 new_cache = (Cache)_calloc_internal(size, 1);
307 new_cache->mask = slotCount - 1;
308 // occupied and buckets and instrumentation are all zero
309 } else {
310 new_cache = cache_allocator_calloc(size);
311 // mask is already set
312 // occupied and buckets and instrumentation are all zero
313 }
314 #endif
315
316 if (PrintCaches) {
317 size_t bucket = log2u(slotCount);
318 if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
319 cache_counts[bucket]++;
320 }
321 cache_allocations++;
322 }
323
324 return new_cache;
325 }
326
327 /***********************************************************************
328 * _cache_free_block.
329 *
330 * Called from _cache_free() and _cache_collect_free().
331 * block may be a cache or a forward:: entry.
332 * If block is a cache, forward:: entries it points to will NOT be freed.
333 * Cache locks: cacheUpdateLock must be held by the caller.
334 **********************************************************************/
335 static void _cache_free_block(void *block)
336 {
337 mutex_assert_locked(&cacheUpdateLock);
338
339 #if !TARGET_OS_WIN32
340 if (PrintCaches) {
341 Cache cache = (Cache)block;
342 size_t slotCount = cache->mask + 1;
343 if (isPowerOf2(slotCount)) {
344 size_t bucket = log2u(slotCount);
345 if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
346 cache_counts[bucket]--;
347 }
348 }
349 }
350 #endif
351
352 #if defined(CACHE_ALLOCATOR)
353 if (cache_allocator_is_block(block)) {
354 cache_allocator_free(block);
355 } else
356 #endif
357 {
358 free(block);
359 }
360 }
361
362
363 /***********************************************************************
364 * _cache_free.
365 *
366 * Called from _objc_remove_classes_in_image().
367 * forward:: entries in the cache ARE freed.
368 * Cache locks: cacheUpdateLock must NOT be held by the caller.
369 **********************************************************************/
370 void _cache_free(Cache cache)
371 {
372 unsigned int i;
373
374 mutex_lock(&cacheUpdateLock);
375
376 for (i = 0; i < cache->mask + 1; i++) {
377 cache_entry *entry = (cache_entry *)cache->buckets[i];
378 if (entry && entry->imp == _objc_msgForward_internal) {
379 _cache_free_block(entry);
380 }
381 }
382
383 _cache_free_block(cache);
384
385 mutex_unlock(&cacheUpdateLock);
386 }
387
388
389 /***********************************************************************
390 * _cache_create.
391 *
392 * Called from _cache_expand().
393 * Cache locks: cacheUpdateLock must be held by the caller.
394 **********************************************************************/
395 static Cache _cache_create(Class cls)
396 {
397 Cache new_cache;
398
399 mutex_assert_locked(&cacheUpdateLock);
400
401 // Allocate new cache block
402 new_cache = _cache_malloc(INIT_CACHE_SIZE);
403
404 // Install the cache
405 _class_setCache(cls, new_cache);
406
407 // Clear the grow flag so that we will re-use the current storage,
408 // rather than actually grow the cache, when expanding the cache
409 // for the first time
410 if (_class_slow_grow) {
411 _class_setGrowCache(cls, NO);
412 }
413
414 // Return our creation
415 return new_cache;
416 }
417
418
419 /***********************************************************************
420 * _cache_expand.
421 *
422 * Called from _cache_fill ()
423 * Cache locks: cacheUpdateLock must be held by the caller.
424 **********************************************************************/
425 static Cache _cache_expand(Class cls)
426 {
427 Cache old_cache;
428 Cache new_cache;
429 uintptr_t slotCount;
430 uintptr_t index;
431
432 mutex_assert_locked(&cacheUpdateLock);
433
434 // First growth goes from empty cache to a real one
435 old_cache = _class_getCache(cls);
436 if (_cache_isEmpty(old_cache))
437 return _cache_create (cls);
438
439 if (_class_slow_grow) {
440 // Cache grows every other time only.
441 if (_class_shouldGrowCache(cls)) {
442 // Grow the cache this time. Don't grow next time.
443 _class_setGrowCache(cls, NO);
444 }
445 else {
446 // Reuse the current cache storage this time. Do grow next time.
447 _class_setGrowCache(cls, YES);
448
449 // Clear the valid-entry counter
450 old_cache->occupied = 0;
451
452 // Invalidate all the cache entries
453 for (index = 0; index < old_cache->mask + 1; index += 1)
454 {
455 // Remember what this entry was, so we can possibly
456 // deallocate it after the bucket has been invalidated
457 cache_entry *oldEntry = (cache_entry *)old_cache->buckets[index];
458
459 // Skip invalid entry
460 if (!oldEntry)
461 continue;
462
463 // Invalidate this entry
464 old_cache->buckets[index] = NULL;
465
466 // Deallocate "forward::" entry
467 if (oldEntry->imp == _objc_msgForward_internal) {
468 _cache_collect_free (oldEntry, sizeof(cache_entry));
469 }
470 }
471
472 // Return the same old cache, freshly emptied
473 return old_cache;
474 }
475 }
476
477 // Double the cache size
478 slotCount = (old_cache->mask + 1) << 1;
479
480 new_cache = _cache_malloc(slotCount);
481
482 #ifdef OBJC_INSTRUMENTED
483 // Propagate the instrumentation data
484 {
485 CacheInstrumentation *oldCacheData;
486 CacheInstrumentation *newCacheData;
487
488 oldCacheData = CACHE_INSTRUMENTATION(old_cache);
489 newCacheData = CACHE_INSTRUMENTATION(new_cache);
490 bcopy ((const char *)oldCacheData, (char *)newCacheData, sizeof(CacheInstrumentation));
491 }
492 #endif
493
494 // Deallocate "forward::" entries from the old cache
495 for (index = 0; index < old_cache->mask + 1; index++) {
496 cache_entry *entry = (cache_entry *)old_cache->buckets[index];
497 if (entry && entry->imp == _objc_msgForward_internal) {
498 _cache_collect_free (entry, sizeof(cache_entry));
499 }
500 }
501
502 // Install new cache
503 _class_setCache(cls, new_cache);
504
505 // Deallocate old cache, try freeing all the garbage
506 _cache_collect_free (old_cache, old_cache->mask * sizeof(cache_entry *));
507 _cache_collect(false);
508
509 return new_cache;
510 }
511
512
513 /***********************************************************************
514 * _cache_fill. Add the specified method to the specified class' cache.
515 * Returns NO if the cache entry wasn't added: cache was busy,
516 * class is still being initialized, new entry is a duplicate.
517 *
518 * Called only from _class_lookupMethodAndLoadCache and
519 * class_respondsToMethod and _cache_addForwardEntry.
520 *
521 * Cache locks: cacheUpdateLock must not be held.
522 **********************************************************************/
523 BOOL _cache_fill(Class cls, Method smt, SEL sel)
524 {
525 uintptr_t newOccupied;
526 uintptr_t index;
527 cache_entry **buckets;
528 cache_entry *entry;
529 Cache cache;
530
531 mutex_assert_unlocked(&cacheUpdateLock);
532
533 // Never cache before +initialize is done
534 if (!_class_isInitialized(cls)) {
535 return NO;
536 }
537
538 // Keep tally of cache additions
539 totalCacheFills += 1;
540
541 mutex_lock(&cacheUpdateLock);
542
543 entry = (cache_entry *)smt;
544
545 cache = _class_getCache(cls);
546
547 // Make sure the entry wasn't added to the cache by some other thread
548 // before we grabbed the cacheUpdateLock.
549 // Don't use _cache_getMethod() because _cache_getMethod() doesn't
550 // return forward:: entries.
551 if (_cache_getImp(cls, sel)) {
552 mutex_unlock(&cacheUpdateLock);
553 return NO; // entry is already cached, didn't add new one
554 }
555
556 // Use the cache as-is if it is less than 3/4 full
557 newOccupied = cache->occupied + 1;
558 if ((newOccupied * 4) <= (cache->mask + 1) * 3) {
559 // Cache is less than 3/4 full.
560 cache->occupied = (unsigned int)newOccupied;
561 } else {
562 // Cache is too full. Expand it.
563 cache = _cache_expand (cls);
564
565 // Account for the addition
566 cache->occupied += 1;
567 }
568
569 // Scan for the first unused slot and insert there.
570 // There is guaranteed to be an empty slot because the
571 // minimum size is 4 and we resized at 3/4 full.
572 buckets = (cache_entry **)cache->buckets;
573 for (index = CACHE_HASH(sel, cache->mask);
574 buckets[index] != NULL;
575 index = (index+1) & cache->mask)
576 {
577 // empty
578 }
579 buckets[index] = entry;
580
581 mutex_unlock(&cacheUpdateLock);
582
583 return YES; // successfully added new cache entry
584 }
585
586
587 /***********************************************************************
588 * _cache_addForwardEntry
589 * Add a forward:: entry for the given selector to cls's method cache.
590 * Does nothing if the cache addition fails for any reason.
591 * Called from class_respondsToMethod and _class_lookupMethodAndLoadCache.
592 * Cache locks: cacheUpdateLock must not be held.
593 **********************************************************************/
594 void _cache_addForwardEntry(Class cls, SEL sel)
595 {
596 cache_entry *smt;
597
598 smt = (cache_entry *)_malloc_internal(sizeof(cache_entry));
599 smt->name = sel;
600 smt->imp = _objc_msgForward_internal;
601 if (! _cache_fill(cls, (Method)smt, sel)) { // fixme hack
602 // Entry not added to cache. Don't leak the method struct.
603 _free_internal(smt);
604 }
605 }
606
607
608 /***********************************************************************
609 * _cache_addIgnoredEntry
610 * Add an entry for the ignored selector to cls's method cache.
611 * Does nothing if the cache addition fails for any reason.
612 * Returns the ignored IMP.
613 * Cache locks: cacheUpdateLock must not be held.
614 **********************************************************************/
615 #if SUPPORT_GC && !SUPPORT_IGNORED_SELECTOR_CONSTANT
616 static cache_entry *alloc_ignored_entries(void)
617 {
618 cache_entry *e = (cache_entry *)_malloc_internal(5 * sizeof(cache_entry));
619 e[0] = (cache_entry){ @selector(retain), 0,(IMP)&_objc_ignored_method};
620 e[1] = (cache_entry){ @selector(release), 0,(IMP)&_objc_ignored_method};
621 e[2] = (cache_entry){ @selector(autorelease),0,(IMP)&_objc_ignored_method};
622 e[3] = (cache_entry){ @selector(retainCount),0,(IMP)&_objc_ignored_method};
623 e[4] = (cache_entry){ @selector(dealloc), 0,(IMP)&_objc_ignored_method};
624 return e;
625 }
626 #endif
627
628 IMP _cache_addIgnoredEntry(Class cls, SEL sel)
629 {
630 cache_entry *entryp = NULL;
631
632 #if !SUPPORT_GC
633 _objc_fatal("selector ignored with GC off");
634 #elif SUPPORT_IGNORED_SELECTOR_CONSTANT
635 static cache_entry entry = { (SEL)kIgnore, 0, (IMP)&_objc_ignored_method };
636 entryp = &entry;
637 assert(sel == (SEL)kIgnore);
638 #else
639 // hack
640 int i;
641 static cache_entry *entries;
642 INIT_ONCE_PTR(entries, alloc_ignored_entries(), free(v));
643
644 assert(ignoreSelector(sel));
645 for (i = 0; i < 5; i++) {
646 if (sel == entries[i].name) {
647 entryp = &entries[i];
648 break;
649 }
650 }
651 if (!entryp) _objc_fatal("selector %s (%p) is not ignored",
652 sel_getName(sel), sel);
653 #endif
654
655 _cache_fill(cls, (Method)entryp, sel);
656 return entryp->imp;
657 }
658
659
660 /***********************************************************************
661 * _cache_flush. Invalidate all valid entries in the given class' cache.
662 *
663 * Called from flush_caches() and _cache_fill()
664 * Cache locks: cacheUpdateLock must be held by the caller.
665 **********************************************************************/
666 #if __OBJC2__
667 static
668 #endif
669 void _cache_flush(Class cls)
670 {
671 Cache cache;
672 unsigned int index;
673
674 mutex_assert_locked(&cacheUpdateLock);
675
676 // Locate cache. Ignore unused cache.
677 cache = _class_getCache(cls);
678 if (_cache_isEmpty(cache)) return;
679
680 #ifdef OBJC_INSTRUMENTED
681 {
682 CacheInstrumentation *cacheData;
683
684 // Tally this flush
685 cacheData = CACHE_INSTRUMENTATION(cache);
686 cacheData->flushCount += 1;
687 cacheData->flushedEntries += cache->occupied;
688 if (cache->occupied > cacheData->maxFlushedEntries)
689 cacheData->maxFlushedEntries = cache->occupied;
690 }
691 #endif
692
693 // Traverse the cache
694 for (index = 0; index <= cache->mask; index += 1)
695 {
696 // Remember what this entry was, so we can possibly
697 // deallocate it after the bucket has been invalidated
698 cache_entry *oldEntry = (cache_entry *)cache->buckets[index];
699
700 // Invalidate this entry
701 cache->buckets[index] = NULL;
702
703 // Deallocate "forward::" entry
704 if (oldEntry && oldEntry->imp == _objc_msgForward_internal)
705 _cache_collect_free (oldEntry, sizeof(cache_entry));
706 }
707
708 // Clear the valid-entry counter
709 cache->occupied = 0;
710 }
711
712
713 /***********************************************************************
714 * flush_cache. Flushes the instance method cache for class cls only.
715 * Use flush_caches() if cls might have in-use subclasses.
716 **********************************************************************/
717 void flush_cache(Class cls)
718 {
719 if (cls) {
720 mutex_lock(&cacheUpdateLock);
721 _cache_flush(cls);
722 mutex_unlock(&cacheUpdateLock);
723 }
724 }
725
726
727 /***********************************************************************
728 * cache collection.
729 **********************************************************************/
730
731 #if !TARGET_OS_WIN32
732
733 // A sentinel (magic value) to report bad thread_get_state status.
734 // Must not be a valid PC.
735 // Must not be zero - thread_get_state() on a new thread returns PC == 0.
736 #define PC_SENTINEL 1
737
738 // UNIX03 compliance hack (4508809)
739 #if !__DARWIN_UNIX03
740 #define __srr0 srr0
741 #define __eip eip
742 #endif
743
744 static uintptr_t _get_pc_for_thread(thread_t thread)
745 #if defined(__i386__)
746 {
747 i386_thread_state_t state;
748 unsigned int count = i386_THREAD_STATE_COUNT;
749 kern_return_t okay = thread_get_state (thread, i386_THREAD_STATE, (thread_state_t)&state, &count);
750 return (okay == KERN_SUCCESS) ? state.__eip : PC_SENTINEL;
751 }
752 #elif defined(__x86_64__)
753 {
754 x86_thread_state64_t state;
755 unsigned int count = x86_THREAD_STATE64_COUNT;
756 kern_return_t okay = thread_get_state (thread, x86_THREAD_STATE64, (thread_state_t)&state, &count);
757 return (okay == KERN_SUCCESS) ? state.__rip : PC_SENTINEL;
758 }
759 #elif defined(__arm__)
760 {
761 arm_thread_state_t state;
762 unsigned int count = ARM_THREAD_STATE_COUNT;
763 kern_return_t okay = thread_get_state (thread, ARM_THREAD_STATE, (thread_state_t)&state, &count);
764 return (okay == KERN_SUCCESS) ? state.__pc : PC_SENTINEL;
765 }
766 #else
767 {
768 #error _get_pc_for_thread () not implemented for this architecture
769 }
770 #endif
771
772 #endif
773
774 /***********************************************************************
775 * _collecting_in_critical.
776 * Returns TRUE if some thread is currently executing a cache-reading
777 * function. Collection of cache garbage is not allowed when a cache-
778 * reading function is in progress because it might still be using
779 * the garbage memory.
780 **********************************************************************/
781 OBJC_EXPORT uintptr_t objc_entryPoints[];
782 OBJC_EXPORT uintptr_t objc_exitPoints[];
783
784 static int _collecting_in_critical(void)
785 {
786 #if TARGET_OS_WIN32
787 return TRUE;
788 #else
789 thread_act_port_array_t threads;
790 unsigned number;
791 unsigned count;
792 kern_return_t ret;
793 int result;
794
795 mach_port_t mythread = pthread_mach_thread_np(pthread_self());
796
797 // Get a list of all the threads in the current task
798 ret = task_threads (mach_task_self (), &threads, &number);
799 if (ret != KERN_SUCCESS)
800 {
801 _objc_fatal("task_thread failed (result %d)\n", ret);
802 }
803
804 // Check whether any thread is in the cache lookup code
805 result = FALSE;
806 for (count = 0; count < number; count++)
807 {
808 int region;
809 uintptr_t pc;
810
811 // Don't bother checking ourselves
812 if (threads[count] == mythread)
813 continue;
814
815 // Find out where thread is executing
816 pc = _get_pc_for_thread (threads[count]);
817
818 // Check for bad status, and if so, assume the worse (can't collect)
819 if (pc == PC_SENTINEL)
820 {
821 result = TRUE;
822 goto done;
823 }
824
825 // Check whether it is in the cache lookup code
826 for (region = 0; objc_entryPoints[region] != 0; region++)
827 {
828 if ((pc >= objc_entryPoints[region]) &&
829 (pc <= objc_exitPoints[region]))
830 {
831 result = TRUE;
832 goto done;
833 }
834 }
835 }
836
837 done:
838 // Deallocate the port rights for the threads
839 for (count = 0; count < number; count++) {
840 mach_port_deallocate(mach_task_self (), threads[count]);
841 }
842
843 // Deallocate the thread list
844 vm_deallocate (mach_task_self (), (vm_address_t) threads, sizeof(threads[0]) * number);
845
846 // Return our finding
847 return result;
848 #endif
849 }
850
851
852 /***********************************************************************
853 * _garbage_make_room. Ensure that there is enough room for at least
854 * one more ref in the garbage.
855 **********************************************************************/
856
857 // amount of memory represented by all refs in the garbage
858 static size_t garbage_byte_size = 0;
859
860 // do not empty the garbage until garbage_byte_size gets at least this big
861 static size_t garbage_threshold = 32*1024;
862
863 // table of refs to free
864 static void **garbage_refs = 0;
865
866 // current number of refs in garbage_refs
867 static size_t garbage_count = 0;
868
869 // capacity of current garbage_refs
870 static size_t garbage_max = 0;
871
872 // capacity of initial garbage_refs
873 enum {
874 INIT_GARBAGE_COUNT = 128
875 };
876
877 static void _garbage_make_room(void)
878 {
879 static int first = 1;
880
881 // Create the collection table the first time it is needed
882 if (first)
883 {
884 first = 0;
885 garbage_refs = (void**)
886 _malloc_internal(INIT_GARBAGE_COUNT * sizeof(void *));
887 garbage_max = INIT_GARBAGE_COUNT;
888 }
889
890 // Double the table if it is full
891 else if (garbage_count == garbage_max)
892 {
893 garbage_refs = (void**)
894 _realloc_internal(garbage_refs, garbage_max * 2 * sizeof(void *));
895 garbage_max *= 2;
896 }
897 }
898
899
900 /***********************************************************************
901 * _cache_collect_free. Add the specified malloc'd memory to the list
902 * of them to free at some later point.
903 * size is used for the collection threshold. It does not have to be
904 * precisely the block's size.
905 * Cache locks: cacheUpdateLock must be held by the caller.
906 **********************************************************************/
907 static void _cache_collect_free(void *data, size_t size)
908 {
909 mutex_assert_locked(&cacheUpdateLock);
910
911 _garbage_make_room ();
912 garbage_byte_size += size;
913 garbage_refs[garbage_count++] = data;
914 }
915
916
917 /***********************************************************************
918 * _cache_collect. Try to free accumulated dead caches.
919 * collectALot tries harder to free memory.
920 * Cache locks: cacheUpdateLock must be held by the caller.
921 **********************************************************************/
922 void _cache_collect(bool collectALot)
923 {
924 mutex_assert_locked(&cacheUpdateLock);
925
926 // Done if the garbage is not full
927 if (garbage_byte_size < garbage_threshold && !collectALot) {
928 return;
929 }
930
931 // Synchronize collection with objc_msgSend and other cache readers
932 if (!collectALot) {
933 if (_collecting_in_critical ()) {
934 // objc_msgSend (or other cache reader) is currently looking in
935 // the cache and might still be using some garbage.
936 if (PrintCaches) {
937 _objc_inform ("CACHES: not collecting; "
938 "objc_msgSend in progress");
939 }
940 return;
941 }
942 }
943 else {
944 // No excuses.
945 while (_collecting_in_critical())
946 ;
947 }
948
949 // No cache readers in progress - garbage is now deletable
950
951 // Log our progress
952 if (PrintCaches) {
953 cache_collections++;
954 _objc_inform ("CACHES: COLLECTING %zu bytes (%zu regions, %zu allocations, %zu collections)", garbage_byte_size, cache_allocator_regions, cache_allocations, cache_collections);
955 }
956
957 // Dispose all refs now in the garbage
958 while (garbage_count--) {
959 _cache_free_block(garbage_refs[garbage_count]);
960 }
961
962 // Clear the garbage count and total size indicator
963 garbage_count = 0;
964 garbage_byte_size = 0;
965
966 if (PrintCaches) {
967 size_t i;
968 size_t total = 0;
969 size_t ideal_total = 0;
970 size_t malloc_total = 0;
971 size_t local_total = 0;
972
973 for (i = 0; i < sizeof(cache_counts) / sizeof(cache_counts[0]); i++) {
974 int count = cache_counts[i];
975 int slots = 1 << i;
976 size_t size = sizeof(struct objc_cache) + TABLE_SIZE(slots);
977 size_t ideal = size;
978 #if TARGET_OS_WIN32
979 size_t malloc = size;
980 #else
981 size_t malloc = malloc_good_size(size);
982 #endif
983 size_t local = size < CACHE_ALLOCATOR_MIN ? malloc : cache_allocator_size_for_mask(cache_allocator_mask_for_size(size));
984
985 if (!count) continue;
986
987 _objc_inform("CACHES: %4d slots: %4d caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", slots, count, ideal*count, malloc*count, local*count, malloc*count-ideal*count, local*count-ideal*count);
988
989 total += count;
990 ideal_total += ideal*count;
991 malloc_total += malloc*count;
992 local_total += local*count;
993 }
994
995 _objc_inform("CACHES: total: %4zu caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", total, ideal_total, malloc_total, local_total, malloc_total-ideal_total, local_total-ideal_total);
996 }
997 }
998
999
1000
1001
1002
1003 #if defined(CACHE_ALLOCATOR)
1004
1005 /***********************************************************************
1006 * Custom method cache allocator.
1007 * Method cache block sizes are 2^slots+2 words, which is a pessimal
1008 * case for the system allocator. It wastes 504 bytes per cache block
1009 * with 128 or more slots, which adds up to tens of KB for an AppKit process.
1010 * To save memory, the custom cache allocator below is used.
1011 *
1012 * The cache allocator uses 128 KB allocation regions. Few processes will
1013 * require a second region. Within a region, allocation is address-ordered
1014 * first fit.
1015 *
1016 * The cache allocator uses a quantum of 520.
1017 * Cache block ideal sizes: 520, 1032, 2056, 4104
1018 * Cache allocator sizes: 520, 1040, 2080, 4160
1019 *
1020 * Because all blocks are known to be genuine method caches, the ordinary
1021 * cache->mask and cache->occupied fields are used as block headers.
1022 * No out-of-band headers are maintained. The number of blocks will
1023 * almost always be fewer than 200, so for simplicity there is no free
1024 * list or other optimization.
1025 *
1026 * Block in use: mask != 0, occupied != -1 (mask indicates block size)
1027 * Block free: mask != 0, occupied == -1 (mask is precisely block size)
1028 *
1029 * No cache allocator functions take any locks. Instead, the caller
1030 * must hold the cacheUpdateLock.
1031 *
1032 * fixme with 128 KB regions and 520 B min block size, an allocation
1033 * bitmap would be only 32 bytes - better than free list?
1034 **********************************************************************/
1035
1036 typedef struct cache_allocator_block {
1037 uintptr_t size;
1038 uintptr_t state;
1039 struct cache_allocator_block *nextFree;
1040 } cache_allocator_block;
1041
1042 typedef struct cache_allocator_region {
1043 cache_allocator_block *start;
1044 cache_allocator_block *end; // first non-block address
1045 cache_allocator_block *freeList;
1046 struct cache_allocator_region *next;
1047 } cache_allocator_region;
1048
1049 static cache_allocator_region *cacheRegion = NULL;
1050
1051
1052 /***********************************************************************
1053 * cache_allocator_add_region
1054 * Allocates and returns a new region that can hold at least size
1055 * bytes of large method caches.
1056 * The actual size will be rounded up to a CACHE_QUANTUM boundary,
1057 * with a minimum of CACHE_REGION_SIZE.
1058 * The new region is lowest-priority for new allocations. Callers that
1059 * know the other regions are already full should allocate directly
1060 * into the returned region.
1061 **********************************************************************/
1062 static cache_allocator_region *cache_allocator_add_region(size_t size)
1063 {
1064 vm_address_t addr;
1065 cache_allocator_block *b;
1066 cache_allocator_region **rgnP;
1067 cache_allocator_region *newRegion = (cache_allocator_region *)
1068 _calloc_internal(1, sizeof(cache_allocator_region));
1069
1070 // Round size up to quantum boundary, and apply the minimum size.
1071 size += CACHE_QUANTUM - (size % CACHE_QUANTUM);
1072 if (size < CACHE_REGION_SIZE) size = CACHE_REGION_SIZE;
1073
1074 // Allocate the region
1075 addr = (vm_address_t)calloc(size, 1);
1076 newRegion->start = (cache_allocator_block *)addr;
1077 newRegion->end = (cache_allocator_block *)(addr + size);
1078
1079 // Mark the first block: free and covers the entire region
1080 b = newRegion->start;
1081 b->size = size;
1082 b->state = (uintptr_t)-1;
1083 b->nextFree = NULL;
1084 newRegion->freeList = b;
1085
1086 // Add to end of the linked list of regions.
1087 // Other regions should be re-used before this one is touched.
1088 newRegion->next = NULL;
1089 rgnP = &cacheRegion;
1090 while (*rgnP) {
1091 rgnP = &(**rgnP).next;
1092 }
1093 *rgnP = newRegion;
1094
1095 cache_allocator_regions++;
1096
1097 return newRegion;
1098 }
1099
1100
1101 /***********************************************************************
1102 * cache_allocator_coalesce
1103 * Attempts to coalesce a free block with the single free block following
1104 * it in the free list, if any.
1105 **********************************************************************/
1106 static void cache_allocator_coalesce(cache_allocator_block *block)
1107 {
1108 if (block->size + (uintptr_t)block == (uintptr_t)block->nextFree) {
1109 block->size += block->nextFree->size;
1110 block->nextFree = block->nextFree->nextFree;
1111 }
1112 }
1113
1114
1115 /***********************************************************************
1116 * cache_region_calloc
1117 * Attempt to allocate a size-byte block in the given region.
1118 * Allocation is first-fit. The free list is already fully coalesced.
1119 * Returns NULL if there is not enough room in the region for the block.
1120 **********************************************************************/
1121 static void *cache_region_calloc(cache_allocator_region *rgn, size_t size)
1122 {
1123 cache_allocator_block **blockP;
1124 uintptr_t mask;
1125
1126 // Save mask for allocated block, then round size
1127 // up to CACHE_QUANTUM boundary
1128 mask = cache_allocator_mask_for_size(size);
1129 size = cache_allocator_size_for_mask(mask);
1130
1131 // Search the free list for a sufficiently large free block.
1132
1133 for (blockP = &rgn->freeList;
1134 *blockP != NULL;
1135 blockP = &(**blockP).nextFree)
1136 {
1137 cache_allocator_block *block = *blockP;
1138 if (block->size < size) continue; // not big enough
1139
1140 // block is now big enough. Allocate from it.
1141
1142 // Slice off unneeded fragment of block, if any,
1143 // and reconnect the free list around block.
1144 if (block->size - size >= CACHE_QUANTUM) {
1145 cache_allocator_block *leftover =
1146 (cache_allocator_block *)(size + (uintptr_t)block);
1147 leftover->size = block->size - size;
1148 leftover->state = (uintptr_t)-1;
1149 leftover->nextFree = block->nextFree;
1150 *blockP = leftover;
1151 } else {
1152 *blockP = block->nextFree;
1153 }
1154
1155 // block is now exactly the right size.
1156
1157 bzero(block, size);
1158 block->size = mask; // Cache->mask
1159 block->state = 0; // Cache->occupied
1160
1161 return block;
1162 }
1163
1164 // No room in this region.
1165 return NULL;
1166 }
1167
1168
1169 /***********************************************************************
1170 * cache_allocator_calloc
1171 * Custom allocator for large method caches (128+ slots)
1172 * The returned cache block already has cache->mask set.
1173 * cache->occupied and the cache contents are zero.
1174 * Cache locks: cacheUpdateLock must be held by the caller
1175 **********************************************************************/
1176 static Cache cache_allocator_calloc(size_t size)
1177 {
1178 cache_allocator_region *rgn;
1179
1180 mutex_assert_locked(&cacheUpdateLock);
1181
1182 for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1183 void *p = cache_region_calloc(rgn, size);
1184 if (p) {
1185 return (Cache)p;
1186 }
1187 }
1188
1189 // No regions or all regions full - make a region and try one more time
1190 // In the unlikely case of a cache over 256KB, it will get its own region.
1191 return (Cache)cache_region_calloc(cache_allocator_add_region(size), size);
1192 }
1193
1194
1195 /***********************************************************************
1196 * cache_allocator_region_for_block
1197 * Returns the cache allocator region that ptr points into, or NULL.
1198 **********************************************************************/
1199 static cache_allocator_region *cache_allocator_region_for_block(cache_allocator_block *block)
1200 {
1201 cache_allocator_region *rgn;
1202 for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1203 if (block >= rgn->start && block < rgn->end) return rgn;
1204 }
1205 return NULL;
1206 }
1207
1208
1209 /***********************************************************************
1210 * cache_allocator_is_block
1211 * If ptr is a live block from the cache allocator, return YES
1212 * If ptr is a block from some other allocator, return NO.
1213 * If ptr is a dead block from the cache allocator, result is undefined.
1214 * Cache locks: cacheUpdateLock must be held by the caller
1215 **********************************************************************/
1216 static BOOL cache_allocator_is_block(void *ptr)
1217 {
1218 mutex_assert_locked(&cacheUpdateLock);
1219 return (cache_allocator_region_for_block((cache_allocator_block *)ptr) != NULL);
1220 }
1221
1222 /***********************************************************************
1223 * cache_allocator_free
1224 * Frees a block allocated by the cache allocator.
1225 * Cache locks: cacheUpdateLock must be held by the caller.
1226 **********************************************************************/
1227 static void cache_allocator_free(void *ptr)
1228 {
1229 cache_allocator_block *dead = (cache_allocator_block *)ptr;
1230 cache_allocator_block *cur;
1231 cache_allocator_region *rgn;
1232
1233 mutex_assert_locked(&cacheUpdateLock);
1234
1235 if (! (rgn = cache_allocator_region_for_block(dead))) {
1236 // free of non-pointer
1237 _objc_inform("cache_allocator_free of non-pointer %p", dead);
1238 return;
1239 }
1240
1241 dead->size = cache_allocator_size_for_mask(dead->size);
1242 dead->state = (uintptr_t)-1;
1243
1244 if (!rgn->freeList || rgn->freeList > dead) {
1245 // dead block belongs at front of free list
1246 dead->nextFree = rgn->freeList;
1247 rgn->freeList = dead;
1248 cache_allocator_coalesce(dead);
1249 return;
1250 }
1251
1252 // dead block belongs in the middle or end of free list
1253 for (cur = rgn->freeList; cur != NULL; cur = cur->nextFree) {
1254 cache_allocator_block *ahead = cur->nextFree;
1255
1256 if (!ahead || ahead > dead) {
1257 // cur and ahead straddle dead, OR dead belongs at end of free list
1258 cur->nextFree = dead;
1259 dead->nextFree = ahead;
1260
1261 // coalesce into dead first in case both succeed
1262 cache_allocator_coalesce(dead);
1263 cache_allocator_coalesce(cur);
1264 return;
1265 }
1266 }
1267
1268 // uh-oh
1269 _objc_inform("cache_allocator_free of non-pointer %p", ptr);
1270 }
1271
1272 // defined(CACHE_ALLOCATOR)
1273 #endif
1274
1275 /***********************************************************************
1276 * Cache instrumentation and debugging
1277 **********************************************************************/
1278
1279 #ifdef OBJC_INSTRUMENTED
1280 enum {
1281 CACHE_HISTOGRAM_SIZE = 512
1282 };
1283
1284 unsigned int CacheHitHistogram [CACHE_HISTOGRAM_SIZE];
1285 unsigned int CacheMissHistogram [CACHE_HISTOGRAM_SIZE];
1286 #endif
1287
1288
1289 /***********************************************************************
1290 * _cache_print.
1291 **********************************************************************/
1292 static void _cache_print(Cache cache)
1293 {
1294 uintptr_t index;
1295 uintptr_t count;
1296
1297 count = cache->mask + 1;
1298 for (index = 0; index < count; index += 1) {
1299 cache_entry *entry = (cache_entry *)cache->buckets[index];
1300 if (entry) {
1301 if (entry->imp == _objc_msgForward_internal)
1302 printf ("does not recognize: \n");
1303 printf ("%s\n", sel_getName(entry->name));
1304 }
1305 }
1306 }
1307
1308
1309 /***********************************************************************
1310 * _class_printMethodCaches.
1311 **********************************************************************/
1312 void _class_printMethodCaches(Class cls)
1313 {
1314 if (_cache_isEmpty(_class_getCache(cls))) {
1315 printf("no instance-method cache for class %s\n", _class_getName(cls));
1316 } else {
1317 printf("instance-method cache for class %s:\n", _class_getName(cls));
1318 _cache_print(_class_getCache(cls));
1319 }
1320
1321 if (_cache_isEmpty(_class_getCache(((id)cls)->isa))) {
1322 printf("no class-method cache for class %s\n", _class_getName(cls));
1323 } else {
1324 printf ("class-method cache for class %s:\n", _class_getName(cls));
1325 _cache_print(_class_getCache(((id)cls)->isa));
1326 }
1327 }
1328
1329
1330 #if 0
1331 #warning fixme
1332
1333
1334 /***********************************************************************
1335 * _class_printDuplicateCacheEntries.
1336 **********************************************************************/
1337 void _class_printDuplicateCacheEntries(BOOL detail)
1338 {
1339 NXHashState state;
1340 Class cls;
1341 unsigned int duplicates;
1342 unsigned int index1;
1343 unsigned int index2;
1344 unsigned int mask;
1345 unsigned int count;
1346 unsigned int isMeta;
1347 Cache cache;
1348
1349
1350 printf ("Checking for duplicate cache entries \n");
1351
1352 // Outermost loop - iterate over all classes
1353 state = NXInitHashState (class_hash);
1354 duplicates = 0;
1355 while (NXNextHashState (class_hash, &state, (void **) &cls))
1356 {
1357 // Control loop - do given class' cache, then its isa's cache
1358 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1359 {
1360 // Select cache of interest and make sure it exists
1361 cache = _class_getCache(isMeta ? ((id)cls)->isa : cls);
1362 if (_cache_isEmpty(cache))
1363 continue;
1364
1365 // Middle loop - check each entry in the given cache
1366 mask = cache->mask;
1367 count = mask + 1;
1368 for (index1 = 0; index1 < count; index1 += 1)
1369 {
1370 // Skip invalid entry
1371 if (!cache->buckets[index1])
1372 continue;
1373
1374 // Inner loop - check that given entry matches no later entry
1375 for (index2 = index1 + 1; index2 < count; index2 += 1)
1376 {
1377 // Skip invalid entry
1378 if (!cache->buckets[index2])
1379 continue;
1380
1381 // Check for duplication by method name comparison
1382 if (strcmp ((char *) cache->buckets[index1]->name),
1383 (char *) cache->buckets[index2]->name)) == 0)
1384 {
1385 if (detail)
1386 printf ("%s %s\n", _class_getName(cls), sel_getName(cache->buckets[index1]->name));
1387 duplicates += 1;
1388 break;
1389 }
1390 }
1391 }
1392 }
1393 }
1394
1395 // Log the findings
1396 printf ("duplicates = %d\n", duplicates);
1397 printf ("total cache fills = %d\n", totalCacheFills);
1398 }
1399
1400
1401 /***********************************************************************
1402 * PrintCacheHeader.
1403 **********************************************************************/
1404 static void PrintCacheHeader(void)
1405 {
1406 #ifdef OBJC_INSTRUMENTED
1407 printf ("Cache Cache Slots Avg Max AvgS MaxS AvgS MaxS TotalD AvgD MaxD TotalD AvgD MaxD TotD AvgD MaxD\n");
1408 printf ("Size Count Used Used Used Hit Hit Miss Miss Hits Prbs Prbs Misses Prbs Prbs Flsh Flsh Flsh\n");
1409 printf ("----- ----- ----- ----- ---- ---- ---- ---- ---- ------- ---- ---- ------- ---- ---- ---- ---- ----\n");
1410 #else
1411 printf ("Cache Cache Slots Avg Max AvgS MaxS AvgS MaxS\n");
1412 printf ("Size Count Used Used Used Hit Hit Miss Miss\n");
1413 printf ("----- ----- ----- ----- ---- ---- ---- ---- ----\n");
1414 #endif
1415 }
1416
1417
1418 /***********************************************************************
1419 * PrintCacheInfo.
1420 **********************************************************************/
1421 static void PrintCacheInfo(unsigned int cacheSize,
1422 unsigned int cacheCount,
1423 unsigned int slotsUsed,
1424 float avgUsed, unsigned int maxUsed,
1425 float avgSHit, unsigned int maxSHit,
1426 float avgSMiss, unsigned int maxSMiss
1427 #ifdef OBJC_INSTRUMENTED
1428 , unsigned int totDHits,
1429 float avgDHit,
1430 unsigned int maxDHit,
1431 unsigned int totDMisses,
1432 float avgDMiss,
1433 unsigned int maxDMiss,
1434 unsigned int totDFlsh,
1435 float avgDFlsh,
1436 unsigned int maxDFlsh
1437 #endif
1438 )
1439 {
1440 #ifdef OBJC_INSTRUMENTED
1441 printf ("%5u %5u %5u %5.1f %4u %4.1f %4u %4.1f %4u %7u %4.1f %4u %7u %4.1f %4u %4u %4.1f %4u\n",
1442 #else
1443 printf ("%5u %5u %5u %5.1f %4u %4.1f %4u %4.1f %4u\n",
1444 #endif
1445 cacheSize, cacheCount, slotsUsed, avgUsed, maxUsed, avgSHit, maxSHit, avgSMiss, maxSMiss
1446 #ifdef OBJC_INSTRUMENTED
1447 , totDHits, avgDHit, maxDHit, totDMisses, avgDMiss, maxDMiss, totDFlsh, avgDFlsh, maxDFlsh
1448 #endif
1449 );
1450
1451 }
1452
1453
1454 #ifdef OBJC_INSTRUMENTED
1455 /***********************************************************************
1456 * PrintCacheHistogram. Show the non-zero entries from the specified
1457 * cache histogram.
1458 **********************************************************************/
1459 static void PrintCacheHistogram(char *title,
1460 unsigned int *firstEntry,
1461 unsigned int entryCount)
1462 {
1463 unsigned int index;
1464 unsigned int *thisEntry;
1465
1466 printf ("%s\n", title);
1467 printf (" Probes Tally\n");
1468 printf (" ------ -----\n");
1469 for (index = 0, thisEntry = firstEntry;
1470 index < entryCount;
1471 index += 1, thisEntry += 1)
1472 {
1473 if (*thisEntry == 0)
1474 continue;
1475
1476 printf (" %6d %5d\n", index, *thisEntry);
1477 }
1478 }
1479 #endif
1480
1481
1482 /***********************************************************************
1483 * _class_printMethodCacheStatistics.
1484 **********************************************************************/
1485
1486 #define MAX_LOG2_SIZE 32
1487 #define MAX_CHAIN_SIZE 100
1488
1489 void _class_printMethodCacheStatistics(void)
1490 {
1491 unsigned int isMeta;
1492 unsigned int index;
1493 NXHashState state;
1494 Class cls;
1495 unsigned int totalChain;
1496 unsigned int totalMissChain;
1497 unsigned int maxChain;
1498 unsigned int maxMissChain;
1499 unsigned int classCount;
1500 unsigned int negativeEntryCount;
1501 unsigned int cacheExpandCount;
1502 unsigned int cacheCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1503 unsigned int totalEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1504 unsigned int maxEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1505 unsigned int totalChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1506 unsigned int totalMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1507 unsigned int totalMaxChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1508 unsigned int totalMaxMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1509 unsigned int maxChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1510 unsigned int maxMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1511 unsigned int chainCount[MAX_CHAIN_SIZE] = {0};
1512 unsigned int missChainCount[MAX_CHAIN_SIZE] = {0};
1513 #ifdef OBJC_INSTRUMENTED
1514 unsigned int hitCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1515 unsigned int hitProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1516 unsigned int maxHitProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1517 unsigned int missCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1518 unsigned int missProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1519 unsigned int maxMissProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1520 unsigned int flushCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1521 unsigned int flushedEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1522 unsigned int maxFlushedEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1523 #endif
1524
1525 printf ("Printing cache statistics\n");
1526
1527 // Outermost loop - iterate over all classes
1528 state = NXInitHashState (class_hash);
1529 classCount = 0;
1530 negativeEntryCount = 0;
1531 cacheExpandCount = 0;
1532 while (NXNextHashState (class_hash, &state, (void **) &cls))
1533 {
1534 // Tally classes
1535 classCount += 1;
1536
1537 // Control loop - do given class' cache, then its isa's cache
1538 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1539 {
1540 Cache cache;
1541 unsigned int mask;
1542 unsigned int log2Size;
1543 unsigned int entryCount;
1544
1545 // Select cache of interest
1546 cache = _class_getCache(isMeta ? ((id)cls)->isa : cls);
1547
1548 // Ignore empty cache... should we?
1549 if (_cache_isEmpty(cache))
1550 continue;
1551
1552 // Middle loop - do each entry in the given cache
1553 mask = cache->mask;
1554 entryCount = 0;
1555 totalChain = 0;
1556 totalMissChain = 0;
1557 maxChain = 0;
1558 maxMissChain = 0;
1559 for (index = 0; index < mask + 1; index += 1)
1560 {
1561 cache_entry **buckets;
1562 cache_entry *entry;
1563 unsigned int hash;
1564 unsigned int methodChain;
1565 unsigned int methodMissChain;
1566 unsigned int index2;
1567
1568 // If entry is invalid, the only item of
1569 // interest is that future insert hashes
1570 // to this entry can use it directly.
1571 buckets = (cache_entry **)cache->buckets;
1572 if (!buckets[index])
1573 {
1574 missChainCount[0] += 1;
1575 continue;
1576 }
1577
1578 entry = buckets[index];
1579
1580 // Tally valid entries
1581 entryCount += 1;
1582
1583 // Tally "forward::" entries
1584 if (entry->imp == _objc_msgForward_internal)
1585 negativeEntryCount += 1;
1586
1587 // Calculate search distance (chain length) for this method
1588 // The chain may wrap around to the beginning of the table.
1589 hash = CACHE_HASH(entry->name, mask);
1590 if (index >= hash) methodChain = index - hash;
1591 else methodChain = (mask+1) + index - hash;
1592
1593 // Tally chains of this length
1594 if (methodChain < MAX_CHAIN_SIZE)
1595 chainCount[methodChain] += 1;
1596
1597 // Keep sum of all chain lengths
1598 totalChain += methodChain;
1599
1600 // Record greatest chain length
1601 if (methodChain > maxChain)
1602 maxChain = methodChain;
1603
1604 // Calculate search distance for miss that hashes here
1605 index2 = index;
1606 while (buckets[index2])
1607 {
1608 index2 += 1;
1609 index2 &= mask;
1610 }
1611 methodMissChain = ((index2 - index) & mask);
1612
1613 // Tally miss chains of this length
1614 if (methodMissChain < MAX_CHAIN_SIZE)
1615 missChainCount[methodMissChain] += 1;
1616
1617 // Keep sum of all miss chain lengths in this class
1618 totalMissChain += methodMissChain;
1619
1620 // Record greatest miss chain length
1621 if (methodMissChain > maxMissChain)
1622 maxMissChain = methodMissChain;
1623 }
1624
1625 // Factor this cache into statistics about caches of the same
1626 // type and size (all caches are a power of two in size)
1627 log2Size = log2u (mask + 1);
1628 cacheCountBySize[isMeta][log2Size] += 1;
1629 totalEntriesBySize[isMeta][log2Size] += entryCount;
1630 if (entryCount > maxEntriesBySize[isMeta][log2Size])
1631 maxEntriesBySize[isMeta][log2Size] = entryCount;
1632 totalChainBySize[isMeta][log2Size] += totalChain;
1633 totalMissChainBySize[isMeta][log2Size] += totalMissChain;
1634 totalMaxChainBySize[isMeta][log2Size] += maxChain;
1635 totalMaxMissChainBySize[isMeta][log2Size] += maxMissChain;
1636 if (maxChain > maxChainBySize[isMeta][log2Size])
1637 maxChainBySize[isMeta][log2Size] = maxChain;
1638 if (maxMissChain > maxMissChainBySize[isMeta][log2Size])
1639 maxMissChainBySize[isMeta][log2Size] = maxMissChain;
1640 #ifdef OBJC_INSTRUMENTED
1641 {
1642 CacheInstrumentation *cacheData;
1643
1644 cacheData = CACHE_INSTRUMENTATION(cache);
1645 hitCountBySize[isMeta][log2Size] += cacheData->hitCount;
1646 hitProbesBySize[isMeta][log2Size] += cacheData->hitProbes;
1647 if (cacheData->maxHitProbes > maxHitProbesBySize[isMeta][log2Size])
1648 maxHitProbesBySize[isMeta][log2Size] = cacheData->maxHitProbes;
1649 missCountBySize[isMeta][log2Size] += cacheData->missCount;
1650 missProbesBySize[isMeta][log2Size] += cacheData->missProbes;
1651 if (cacheData->maxMissProbes > maxMissProbesBySize[isMeta][log2Size])
1652 maxMissProbesBySize[isMeta][log2Size] = cacheData->maxMissProbes;
1653 flushCountBySize[isMeta][log2Size] += cacheData->flushCount;
1654 flushedEntriesBySize[isMeta][log2Size] += cacheData->flushedEntries;
1655 if (cacheData->maxFlushedEntries > maxFlushedEntriesBySize[isMeta][log2Size])
1656 maxFlushedEntriesBySize[isMeta][log2Size] = cacheData->maxFlushedEntries;
1657 }
1658 #endif
1659 // Caches start with a power of two number of entries, and grow by doubling, so
1660 // we can calculate the number of times this cache has expanded
1661 cacheExpandCount += log2Size - INIT_CACHE_SIZE_LOG2;
1662 }
1663 }
1664
1665 {
1666 unsigned int cacheCountByType[2] = {0};
1667 unsigned int totalCacheCount = 0;
1668 unsigned int totalEntries = 0;
1669 unsigned int maxEntries = 0;
1670 unsigned int totalSlots = 0;
1671 #ifdef OBJC_INSTRUMENTED
1672 unsigned int totalHitCount = 0;
1673 unsigned int totalHitProbes = 0;
1674 unsigned int maxHitProbes = 0;
1675 unsigned int totalMissCount = 0;
1676 unsigned int totalMissProbes = 0;
1677 unsigned int maxMissProbes = 0;
1678 unsigned int totalFlushCount = 0;
1679 unsigned int totalFlushedEntries = 0;
1680 unsigned int maxFlushedEntries = 0;
1681 #endif
1682
1683 totalChain = 0;
1684 maxChain = 0;
1685 totalMissChain = 0;
1686 maxMissChain = 0;
1687
1688 // Sum information over all caches
1689 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1690 {
1691 for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1692 {
1693 cacheCountByType[isMeta] += cacheCountBySize[isMeta][index];
1694 totalEntries += totalEntriesBySize[isMeta][index];
1695 totalSlots += cacheCountBySize[isMeta][index] * (1 << index);
1696 totalChain += totalChainBySize[isMeta][index];
1697 if (maxEntriesBySize[isMeta][index] > maxEntries)
1698 maxEntries = maxEntriesBySize[isMeta][index];
1699 if (maxChainBySize[isMeta][index] > maxChain)
1700 maxChain = maxChainBySize[isMeta][index];
1701 totalMissChain += totalMissChainBySize[isMeta][index];
1702 if (maxMissChainBySize[isMeta][index] > maxMissChain)
1703 maxMissChain = maxMissChainBySize[isMeta][index];
1704 #ifdef OBJC_INSTRUMENTED
1705 totalHitCount += hitCountBySize[isMeta][index];
1706 totalHitProbes += hitProbesBySize[isMeta][index];
1707 if (maxHitProbesBySize[isMeta][index] > maxHitProbes)
1708 maxHitProbes = maxHitProbesBySize[isMeta][index];
1709 totalMissCount += missCountBySize[isMeta][index];
1710 totalMissProbes += missProbesBySize[isMeta][index];
1711 if (maxMissProbesBySize[isMeta][index] > maxMissProbes)
1712 maxMissProbes = maxMissProbesBySize[isMeta][index];
1713 totalFlushCount += flushCountBySize[isMeta][index];
1714 totalFlushedEntries += flushedEntriesBySize[isMeta][index];
1715 if (maxFlushedEntriesBySize[isMeta][index] > maxFlushedEntries)
1716 maxFlushedEntries = maxFlushedEntriesBySize[isMeta][index];
1717 #endif
1718 }
1719
1720 totalCacheCount += cacheCountByType[isMeta];
1721 }
1722
1723 // Log our findings
1724 printf ("There are %u classes\n", classCount);
1725
1726 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1727 {
1728 // Number of this type of class
1729 printf ("\nThere are %u %s-method caches, broken down by size (slot count):\n",
1730 cacheCountByType[isMeta],
1731 isMeta ? "class" : "instance");
1732
1733 // Print header
1734 PrintCacheHeader ();
1735
1736 // Keep format consistent even if there are caches of this kind
1737 if (cacheCountByType[isMeta] == 0)
1738 {
1739 printf ("(none)\n");
1740 continue;
1741 }
1742
1743 // Usage information by cache size
1744 for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1745 {
1746 unsigned int cacheCount;
1747 unsigned int cacheSlotCount;
1748 unsigned int cacheEntryCount;
1749
1750 // Get number of caches of this type and size
1751 cacheCount = cacheCountBySize[isMeta][index];
1752 if (cacheCount == 0)
1753 continue;
1754
1755 // Get the cache slot count and the total number of valid entries
1756 cacheSlotCount = (1 << index);
1757 cacheEntryCount = totalEntriesBySize[isMeta][index];
1758
1759 // Give the analysis
1760 PrintCacheInfo (cacheSlotCount,
1761 cacheCount,
1762 cacheEntryCount,
1763 (float) cacheEntryCount / (float) cacheCount,
1764 maxEntriesBySize[isMeta][index],
1765 (float) totalChainBySize[isMeta][index] / (float) cacheEntryCount,
1766 maxChainBySize[isMeta][index],
1767 (float) totalMissChainBySize[isMeta][index] / (float) (cacheCount * cacheSlotCount),
1768 maxMissChainBySize[isMeta][index]
1769 #ifdef OBJC_INSTRUMENTED
1770 , hitCountBySize[isMeta][index],
1771 hitCountBySize[isMeta][index] ?
1772 (float) hitProbesBySize[isMeta][index] / (float) hitCountBySize[isMeta][index] : 0.0,
1773 maxHitProbesBySize[isMeta][index],
1774 missCountBySize[isMeta][index],
1775 missCountBySize[isMeta][index] ?
1776 (float) missProbesBySize[isMeta][index] / (float) missCountBySize[isMeta][index] : 0.0,
1777 maxMissProbesBySize[isMeta][index],
1778 flushCountBySize[isMeta][index],
1779 flushCountBySize[isMeta][index] ?
1780 (float) flushedEntriesBySize[isMeta][index] / (float) flushCountBySize[isMeta][index] : 0.0,
1781 maxFlushedEntriesBySize[isMeta][index]
1782 #endif
1783 );
1784 }
1785 }
1786
1787 // Give overall numbers
1788 printf ("\nCumulative:\n");
1789 PrintCacheHeader ();
1790 PrintCacheInfo (totalSlots,
1791 totalCacheCount,
1792 totalEntries,
1793 (float) totalEntries / (float) totalCacheCount,
1794 maxEntries,
1795 (float) totalChain / (float) totalEntries,
1796 maxChain,
1797 (float) totalMissChain / (float) totalSlots,
1798 maxMissChain
1799 #ifdef OBJC_INSTRUMENTED
1800 , totalHitCount,
1801 totalHitCount ?
1802 (float) totalHitProbes / (float) totalHitCount : 0.0,
1803 maxHitProbes,
1804 totalMissCount,
1805 totalMissCount ?
1806 (float) totalMissProbes / (float) totalMissCount : 0.0,
1807 maxMissProbes,
1808 totalFlushCount,
1809 totalFlushCount ?
1810 (float) totalFlushedEntries / (float) totalFlushCount : 0.0,
1811 maxFlushedEntries
1812 #endif
1813 );
1814
1815 printf ("\nNumber of \"forward::\" entries: %d\n", negativeEntryCount);
1816 printf ("Number of cache expansions: %d\n", cacheExpandCount);
1817 #ifdef OBJC_INSTRUMENTED
1818 printf ("flush_caches: total calls total visits average visits max visits total classes visits/class\n");
1819 printf (" ----------- ------------ -------------- ---------- ------------- -------------\n");
1820 printf (" linear %11u %12u %14.1f %10u %13u %12.2f\n",
1821 LinearFlushCachesCount,
1822 LinearFlushCachesVisitedCount,
1823 LinearFlushCachesCount ?
1824 (float) LinearFlushCachesVisitedCount / (float) LinearFlushCachesCount : 0.0,
1825 MaxLinearFlushCachesVisitedCount,
1826 LinearFlushCachesVisitedCount,
1827 1.0);
1828 printf (" nonlinear %11u %12u %14.1f %10u %13u %12.2f\n",
1829 NonlinearFlushCachesCount,
1830 NonlinearFlushCachesVisitedCount,
1831 NonlinearFlushCachesCount ?
1832 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesCount : 0.0,
1833 MaxNonlinearFlushCachesVisitedCount,
1834 NonlinearFlushCachesClassCount,
1835 NonlinearFlushCachesClassCount ?
1836 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesClassCount : 0.0);
1837 printf (" ideal %11u %12u %14.1f %10u %13u %12.2f\n",
1838 LinearFlushCachesCount + NonlinearFlushCachesCount,
1839 IdealFlushCachesCount,
1840 LinearFlushCachesCount + NonlinearFlushCachesCount ?
1841 (float) IdealFlushCachesCount / (float) (LinearFlushCachesCount + NonlinearFlushCachesCount) : 0.0,
1842 MaxIdealFlushCachesCount,
1843 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount,
1844 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount ?
1845 (float) IdealFlushCachesCount / (float) (LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount) : 0.0);
1846
1847 PrintCacheHistogram ("\nCache hit histogram:", &CacheHitHistogram[0], CACHE_HISTOGRAM_SIZE);
1848 PrintCacheHistogram ("\nCache miss histogram:", &CacheMissHistogram[0], CACHE_HISTOGRAM_SIZE);
1849 #endif
1850
1851 #if 0
1852 printf ("\nLookup chains:");
1853 for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1854 {
1855 if (chainCount[index] != 0)
1856 printf (" %u:%u", index, chainCount[index]);
1857 }
1858
1859 printf ("\nMiss chains:");
1860 for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1861 {
1862 if (missChainCount[index] != 0)
1863 printf (" %u:%u", index, missChainCount[index]);
1864 }
1865
1866 printf ("\nTotal memory usage for cache data structures: %lu bytes\n",
1867 totalCacheCount * (sizeof(struct objc_cache) - sizeof(cache_entry *)) +
1868 totalSlots * sizeof(cache_entry *) +
1869 negativeEntryCount * sizeof(cache_entry));
1870 #endif
1871 }
1872 }
1873
1874 #endif