]> git.saurik.com Git - apple/objc4.git/blob - runtime/objc-cache-old.mm
objc4-818.2.tar.gz
[apple/objc4.git] / runtime / objc-cache-old.mm
1 /*
2 * Copyright (c) 1999-2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /***********************************************************************
25 * objc-cache.m
26 * Method cache management
27 * Cache flushing
28 * Cache garbage collection
29 * Cache instrumentation
30 * Dedicated allocator for large caches
31 **********************************************************************/
32
33
34 /***********************************************************************
35 * Method cache locking (GrP 2001-1-14)
36 *
37 * For speed, objc_msgSend does not acquire any locks when it reads
38 * method caches. Instead, all cache changes are performed so that any
39 * objc_msgSend running concurrently with the cache mutator will not
40 * crash or hang or get an incorrect result from the cache.
41 *
42 * When cache memory becomes unused (e.g. the old cache after cache
43 * expansion), it is not immediately freed, because a concurrent
44 * objc_msgSend could still be using it. Instead, the memory is
45 * disconnected from the data structures and placed on a garbage list.
46 * The memory is now only accessible to instances of objc_msgSend that
47 * were running when the memory was disconnected; any further calls to
48 * objc_msgSend will not see the garbage memory because the other data
49 * structures don't point to it anymore. The collecting_in_critical
50 * function checks the PC of all threads and returns FALSE when all threads
51 * are found to be outside objc_msgSend. This means any call to objc_msgSend
52 * that could have had access to the garbage has finished or moved past the
53 * cache lookup stage, so it is safe to free the memory.
54 *
55 * All functions that modify cache data or structures must acquire the
56 * cacheUpdateLock to prevent interference from concurrent modifications.
57 * The function that frees cache garbage must acquire the cacheUpdateLock
58 * and use collecting_in_critical() to flush out cache readers.
59 * The cacheUpdateLock is also used to protect the custom allocator used
60 * for large method cache blocks.
61 *
62 * Cache readers (PC-checked by collecting_in_critical())
63 * objc_msgSend*
64 * _cache_getImp
65 * _cache_getMethod
66 *
67 * Cache writers (hold cacheUpdateLock while reading or writing; not PC-checked)
68 * _cache_fill (acquires lock)
69 * _cache_expand (only called from cache_fill)
70 * _cache_create (only called from cache_expand)
71 * bcopy (only called from instrumented cache_expand)
72 * flush_caches (acquires lock)
73 * _cache_flush (only called from cache_fill and flush_caches)
74 * _cache_collect_free (only called from cache_expand and cache_flush)
75 *
76 * UNPROTECTED cache readers (NOT thread-safe; used for debug info only)
77 * _cache_print
78 * _class_printMethodCaches
79 * _class_printDuplicateCacheEntries
80 * _class_printMethodCacheStatistics
81 *
82 * _class_lookupMethodAndLoadCache is a special case. It may read a
83 * method triplet out of one cache and store it in another cache. This
84 * is unsafe if the method triplet is a forward:: entry, because the
85 * triplet itself could be freed unless _class_lookupMethodAndLoadCache
86 * were PC-checked or used a lock. Additionally, storing the method
87 * triplet in both caches would result in double-freeing if both caches
88 * were flushed or expanded. The solution is for _cache_getMethod to
89 * ignore all entries whose implementation is _objc_msgForward_impcache,
90 * so _class_lookupMethodAndLoadCache cannot look at a forward:: entry
91 * unsafely or place it in multiple caches.
92 ***********************************************************************/
93
94 #if !__OBJC2__
95
96 #include "objc-private.h"
97 #include "objc-cache-old.h"
98 #include "hashtable2.h"
99
100 typedef struct {
101 SEL name; // same layout as struct old_method
102 void *unused;
103 IMP imp; // same layout as struct old_method
104 } cache_entry;
105
106
107 /* When _class_slow_grow is non-zero, any given cache is actually grown
108 * only on the odd-numbered times it becomes full; on the even-numbered
109 * times, it is simply emptied and re-used. When this flag is zero,
110 * caches are grown every time. */
111 static const int _class_slow_grow = 1;
112
113 /* For min cache size: clear_cache=1, slow_grow=1
114 For max cache size: clear_cache=0, slow_grow=0 */
115
116 /* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
117 enum {
118 INIT_CACHE_SIZE_LOG2 = 2,
119 INIT_CACHE_SIZE = (1 << INIT_CACHE_SIZE_LOG2)
120 };
121
122
123 /* Amount of space required for `count` hash table buckets, knowing that
124 * one entry is embedded in the cache structure itself. */
125 #define TABLE_SIZE(count) ((count - 1) * sizeof(cache_entry *))
126
127
128 #if !TARGET_OS_WIN32
129 # define CACHE_ALLOCATOR
130 #endif
131
132 /* Custom cache allocator parameters.
133 * CACHE_REGION_SIZE must be a multiple of CACHE_QUANTUM. */
134 #define CACHE_ALLOCATOR_MIN 512
135 #define CACHE_QUANTUM (CACHE_ALLOCATOR_MIN+sizeof(struct objc_cache)-sizeof(cache_entry*))
136 #define CACHE_REGION_SIZE ((128*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
137 // #define CACHE_REGION_SIZE ((256*1024 / CACHE_QUANTUM) * CACHE_QUANTUM)
138
139 static uintptr_t cache_allocator_mask_for_size(size_t size)
140 {
141 return (size - sizeof(struct objc_cache)) / sizeof(cache_entry *);
142 }
143
144 static size_t cache_allocator_size_for_mask(uintptr_t mask)
145 {
146 size_t requested = sizeof(struct objc_cache) + TABLE_SIZE(mask+1);
147 size_t actual = CACHE_QUANTUM;
148 while (actual < requested) actual += CACHE_QUANTUM;
149 return actual;
150 }
151
152
153 /* Cache instrumentation data. Immediately follows the cache block itself. */
154 #ifdef OBJC_INSTRUMENTED
155 typedef struct
156 {
157 unsigned int hitCount; // cache lookup success tally
158 unsigned int hitProbes; // sum entries checked to hit
159 unsigned int maxHitProbes; // max entries checked to hit
160 unsigned int missCount; // cache lookup no-find tally
161 unsigned int missProbes; // sum entries checked to miss
162 unsigned int maxMissProbes; // max entries checked to miss
163 unsigned int flushCount; // cache flush tally
164 unsigned int flushedEntries; // sum cache entries flushed
165 unsigned int maxFlushedEntries; // max cache entries flushed
166 } CacheInstrumentation;
167
168 #define CACHE_INSTRUMENTATION(cache) (CacheInstrumentation *) &cache->buckets[cache->mask + 1];
169 #endif
170
171 /* Cache filling and flushing instrumentation */
172
173 static int totalCacheFills = 0;
174
175 #ifdef OBJC_INSTRUMENTED
176 unsigned int LinearFlushCachesCount = 0;
177 unsigned int LinearFlushCachesVisitedCount = 0;
178 unsigned int MaxLinearFlushCachesVisitedCount = 0;
179 unsigned int NonlinearFlushCachesCount = 0;
180 unsigned int NonlinearFlushCachesClassCount = 0;
181 unsigned int NonlinearFlushCachesVisitedCount = 0;
182 unsigned int MaxNonlinearFlushCachesVisitedCount = 0;
183 unsigned int IdealFlushCachesCount = 0;
184 unsigned int MaxIdealFlushCachesCount = 0;
185 #endif
186
187
188 /***********************************************************************
189 * A static empty cache. All classes initially point at this cache.
190 * When the first message is sent it misses in the cache, and when
191 * the cache is grown it checks for this case and uses malloc rather
192 * than realloc. This avoids the need to check for NULL caches in the
193 * messenger.
194 ***********************************************************************/
195
196 struct objc_cache _objc_empty_cache =
197 {
198 0, // mask
199 0, // occupied
200 { NULL } // buckets
201 };
202 #ifdef OBJC_INSTRUMENTED
203 CacheInstrumentation emptyCacheInstrumentation = {0};
204 #endif
205
206
207 /* Local prototypes */
208
209 static bool _cache_isEmpty(Cache cache);
210 static Cache _cache_malloc(uintptr_t slotCount);
211 static Cache _cache_create(Class cls);
212 static Cache _cache_expand(Class cls);
213
214 static int _collecting_in_critical(void);
215 static void _garbage_make_room(void);
216 static void _cache_collect_free(void *data, size_t size);
217
218 #if defined(CACHE_ALLOCATOR)
219 static bool cache_allocator_is_block(void *block);
220 static Cache cache_allocator_calloc(size_t size);
221 static void cache_allocator_free(void *block);
222 #endif
223
224 /***********************************************************************
225 * Cache statistics for OBJC_PRINT_CACHE_SETUP
226 **********************************************************************/
227 static unsigned int cache_counts[16];
228 static size_t cache_allocations;
229 static size_t cache_collections;
230 static size_t cache_allocator_regions;
231
232 static size_t log2u(size_t x)
233 {
234 unsigned int log;
235
236 log = 0;
237 while (x >>= 1)
238 log += 1;
239
240 return log;
241 }
242
243
244 /***********************************************************************
245 * _cache_isEmpty.
246 * Returns YES if the given cache is some empty cache.
247 * Empty caches should never be allocated on the heap.
248 **********************************************************************/
249 static bool _cache_isEmpty(Cache cache)
250 {
251 return (cache == NULL || cache == (Cache)&_objc_empty_cache || cache->mask == 0);
252 }
253
254
255 /***********************************************************************
256 * _cache_malloc.
257 *
258 * Called from _cache_create() and cache_expand()
259 * Cache locks: cacheUpdateLock must be held by the caller.
260 **********************************************************************/
261 static Cache _cache_malloc(uintptr_t slotCount)
262 {
263 Cache new_cache;
264 size_t size;
265
266 cacheUpdateLock.assertLocked();
267
268 // Allocate table (why not check for failure?)
269 size = sizeof(struct objc_cache) + TABLE_SIZE(slotCount);
270 #if defined(OBJC_INSTRUMENTED)
271 // Custom cache allocator can't handle instrumentation.
272 size += sizeof(CacheInstrumentation);
273 new_cache = calloc(size, 1);
274 new_cache->mask = slotCount - 1;
275 #elif !defined(CACHE_ALLOCATOR)
276 // fixme cache allocator implementation isn't 64-bit clean
277 new_cache = calloc(size, 1);
278 new_cache->mask = (unsigned int)(slotCount - 1);
279 #else
280 if (size < CACHE_ALLOCATOR_MIN) {
281 new_cache = (Cache)calloc(size, 1);
282 new_cache->mask = slotCount - 1;
283 // occupied and buckets and instrumentation are all zero
284 } else {
285 new_cache = cache_allocator_calloc(size);
286 // mask is already set
287 // occupied and buckets and instrumentation are all zero
288 }
289 #endif
290
291 if (PrintCaches) {
292 size_t bucket = log2u(slotCount);
293 if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
294 cache_counts[bucket]++;
295 }
296 cache_allocations++;
297 }
298
299 return new_cache;
300 }
301
302 /***********************************************************************
303 * _cache_free_block.
304 *
305 * Called from _cache_free() and _cache_collect_free().
306 * block may be a cache or a forward:: entry.
307 * If block is a cache, forward:: entries it points to will NOT be freed.
308 * Cache locks: cacheUpdateLock must be held by the caller.
309 **********************************************************************/
310 static inline int isPowerOf2(unsigned long l) { return 1 == __builtin_popcountl(l); }
311 static void _cache_free_block(void *block)
312 {
313 cacheUpdateLock.assertLocked();
314
315 #if !TARGET_OS_WIN32
316 if (PrintCaches) {
317 Cache cache = (Cache)block;
318 size_t slotCount = cache->mask + 1;
319 if (isPowerOf2(slotCount)) {
320 size_t bucket = log2u(slotCount);
321 if (bucket < sizeof(cache_counts) / sizeof(cache_counts[0])) {
322 cache_counts[bucket]--;
323 }
324 }
325 }
326 #endif
327
328 #if defined(CACHE_ALLOCATOR)
329 if (cache_allocator_is_block(block)) {
330 cache_allocator_free(block);
331 } else
332 #endif
333 {
334 free(block);
335 }
336 }
337
338
339 /***********************************************************************
340 * _cache_free.
341 *
342 * Called from _objc_remove_classes_in_image().
343 * forward:: entries in the cache ARE freed.
344 * Cache locks: cacheUpdateLock must NOT be held by the caller.
345 **********************************************************************/
346 void _cache_free(Cache cache)
347 {
348 unsigned int i;
349
350 mutex_locker_t lock(cacheUpdateLock);
351
352 for (i = 0; i < cache->mask + 1; i++) {
353 cache_entry *entry = (cache_entry *)cache->buckets[i];
354 if (entry && entry->imp == _objc_msgForward_impcache) {
355 _cache_free_block(entry);
356 }
357 }
358
359 _cache_free_block(cache);
360 }
361
362
363 /***********************************************************************
364 * _cache_create.
365 *
366 * Called from _cache_expand().
367 * Cache locks: cacheUpdateLock must be held by the caller.
368 **********************************************************************/
369 static Cache _cache_create(Class cls)
370 {
371 Cache new_cache;
372
373 cacheUpdateLock.assertLocked();
374
375 // Allocate new cache block
376 new_cache = _cache_malloc(INIT_CACHE_SIZE);
377
378 // Install the cache
379 cls->cache = new_cache;
380
381 // Clear the grow flag so that we will re-use the current storage,
382 // rather than actually grow the cache, when expanding the cache
383 // for the first time
384 if (_class_slow_grow) {
385 cls->setShouldGrowCache(false);
386 }
387
388 // Return our creation
389 return new_cache;
390 }
391
392
393 /***********************************************************************
394 * _cache_expand.
395 *
396 * Called from _cache_fill ()
397 * Cache locks: cacheUpdateLock must be held by the caller.
398 **********************************************************************/
399 static Cache _cache_expand(Class cls)
400 {
401 Cache old_cache;
402 Cache new_cache;
403 uintptr_t slotCount;
404 uintptr_t index;
405
406 cacheUpdateLock.assertLocked();
407
408 // First growth goes from empty cache to a real one
409 old_cache = cls->cache;
410 if (_cache_isEmpty(old_cache))
411 return _cache_create (cls);
412
413 if (_class_slow_grow) {
414 // Cache grows every other time only.
415 if (cls->shouldGrowCache()) {
416 // Grow the cache this time. Don't grow next time.
417 cls->setShouldGrowCache(false);
418 }
419 else {
420 // Reuse the current cache storage this time. Do grow next time.
421 cls->setShouldGrowCache(true);
422
423 // Clear the valid-entry counter
424 old_cache->occupied = 0;
425
426 // Invalidate all the cache entries
427 for (index = 0; index < old_cache->mask + 1; index += 1)
428 {
429 // Remember what this entry was, so we can possibly
430 // deallocate it after the bucket has been invalidated
431 cache_entry *oldEntry = (cache_entry *)old_cache->buckets[index];
432
433 // Skip invalid entry
434 if (!oldEntry)
435 continue;
436
437 // Invalidate this entry
438 old_cache->buckets[index] = NULL;
439
440 // Deallocate "forward::" entry
441 if (oldEntry->imp == _objc_msgForward_impcache) {
442 _cache_collect_free (oldEntry, sizeof(cache_entry));
443 }
444 }
445
446 // Return the same old cache, freshly emptied
447 return old_cache;
448 }
449 }
450
451 // Double the cache size
452 slotCount = (old_cache->mask + 1) << 1;
453
454 new_cache = _cache_malloc(slotCount);
455
456 #ifdef OBJC_INSTRUMENTED
457 // Propagate the instrumentation data
458 {
459 CacheInstrumentation *oldCacheData;
460 CacheInstrumentation *newCacheData;
461
462 oldCacheData = CACHE_INSTRUMENTATION(old_cache);
463 newCacheData = CACHE_INSTRUMENTATION(new_cache);
464 bcopy ((const char *)oldCacheData, (char *)newCacheData, sizeof(CacheInstrumentation));
465 }
466 #endif
467
468 // Deallocate "forward::" entries from the old cache
469 for (index = 0; index < old_cache->mask + 1; index++) {
470 cache_entry *entry = (cache_entry *)old_cache->buckets[index];
471 if (entry && entry->imp == _objc_msgForward_impcache) {
472 _cache_collect_free (entry, sizeof(cache_entry));
473 }
474 }
475
476 // Install new cache
477 cls->cache = new_cache;
478
479 // Deallocate old cache, try freeing all the garbage
480 _cache_collect_free (old_cache, old_cache->mask * sizeof(cache_entry *));
481 _cache_collect(false);
482
483 return new_cache;
484 }
485
486
487 /***********************************************************************
488 * _cache_fill. Add the specified method to the specified class' cache.
489 * Returns NO if the cache entry wasn't added: cache was busy,
490 * class is still being initialized, new entry is a duplicate.
491 *
492 * Called only from _class_lookupMethodAndLoadCache and
493 * class_respondsToMethod and _cache_addForwardEntry.
494 *
495 * Cache locks: cacheUpdateLock must not be held.
496 **********************************************************************/
497 bool _cache_fill(Class cls, Method smt, SEL sel)
498 {
499 uintptr_t newOccupied;
500 uintptr_t index;
501 cache_entry **buckets;
502 cache_entry *entry;
503 Cache cache;
504
505 cacheUpdateLock.assertUnlocked();
506
507 // Never cache before +initialize is done
508 if (!cls->isInitialized()) {
509 return NO;
510 }
511
512 // Keep tally of cache additions
513 totalCacheFills += 1;
514
515 mutex_locker_t lock(cacheUpdateLock);
516
517 entry = (cache_entry *)smt;
518
519 cache = cls->cache;
520
521 // Make sure the entry wasn't added to the cache by some other thread
522 // before we grabbed the cacheUpdateLock.
523 // Don't use _cache_getMethod() because _cache_getMethod() doesn't
524 // return forward:: entries.
525 if (_cache_getImp(cls, sel)) {
526 return NO; // entry is already cached, didn't add new one
527 }
528
529 // Use the cache as-is if it is less than 3/4 full
530 newOccupied = cache->occupied + 1;
531 if ((newOccupied * 4) <= (cache->mask + 1) * 3) {
532 // Cache is less than 3/4 full.
533 cache->occupied = (unsigned int)newOccupied;
534 } else {
535 // Cache is too full. Expand it.
536 cache = _cache_expand (cls);
537
538 // Account for the addition
539 cache->occupied += 1;
540 }
541
542 // Scan for the first unused slot and insert there.
543 // There is guaranteed to be an empty slot because the
544 // minimum size is 4 and we resized at 3/4 full.
545 buckets = (cache_entry **)cache->buckets;
546 for (index = CACHE_HASH(sel, cache->mask);
547 buckets[index] != NULL;
548 index = (index+1) & cache->mask)
549 {
550 // empty
551 }
552 buckets[index] = entry;
553
554 return YES; // successfully added new cache entry
555 }
556
557
558 /***********************************************************************
559 * _cache_addForwardEntry
560 * Add a forward:: entry for the given selector to cls's method cache.
561 * Does nothing if the cache addition fails for any reason.
562 * Called from class_respondsToMethod and _class_lookupMethodAndLoadCache.
563 * Cache locks: cacheUpdateLock must not be held.
564 **********************************************************************/
565 void _cache_addForwardEntry(Class cls, SEL sel)
566 {
567 cache_entry *smt;
568
569 smt = (cache_entry *)malloc(sizeof(cache_entry));
570 smt->name = sel;
571 smt->imp = _objc_msgForward_impcache;
572 if (! _cache_fill(cls, (Method)smt, sel)) { // fixme hack
573 // Entry not added to cache. Don't leak the method struct.
574 free(smt);
575 }
576 }
577
578
579 /***********************************************************************
580 * _cache_flush. Invalidate all valid entries in the given class' cache.
581 *
582 * Called from flush_caches() and _cache_fill()
583 * Cache locks: cacheUpdateLock must be held by the caller.
584 **********************************************************************/
585 void _cache_flush(Class cls)
586 {
587 Cache cache;
588 unsigned int index;
589
590 cacheUpdateLock.assertLocked();
591
592 // Locate cache. Ignore unused cache.
593 cache = cls->cache;
594 if (_cache_isEmpty(cache)) return;
595
596 #ifdef OBJC_INSTRUMENTED
597 {
598 CacheInstrumentation *cacheData;
599
600 // Tally this flush
601 cacheData = CACHE_INSTRUMENTATION(cache);
602 cacheData->flushCount += 1;
603 cacheData->flushedEntries += cache->occupied;
604 if (cache->occupied > cacheData->maxFlushedEntries)
605 cacheData->maxFlushedEntries = cache->occupied;
606 }
607 #endif
608
609 // Traverse the cache
610 for (index = 0; index <= cache->mask; index += 1)
611 {
612 // Remember what this entry was, so we can possibly
613 // deallocate it after the bucket has been invalidated
614 cache_entry *oldEntry = (cache_entry *)cache->buckets[index];
615
616 // Invalidate this entry
617 cache->buckets[index] = NULL;
618
619 // Deallocate "forward::" entry
620 if (oldEntry && oldEntry->imp == _objc_msgForward_impcache)
621 _cache_collect_free (oldEntry, sizeof(cache_entry));
622 }
623
624 // Clear the valid-entry counter
625 cache->occupied = 0;
626 }
627
628
629 /***********************************************************************
630 * flush_cache. Flushes the instance method cache for class cls only.
631 * Use flush_caches() if cls might have in-use subclasses.
632 **********************************************************************/
633 void flush_cache(Class cls)
634 {
635 if (cls) {
636 mutex_locker_t lock(cacheUpdateLock);
637 _cache_flush(cls);
638 }
639 }
640
641
642 /***********************************************************************
643 * cache collection.
644 **********************************************************************/
645
646 #if !TARGET_OS_WIN32
647
648 // A sentinel (magic value) to report bad thread_get_state status.
649 // Must not be a valid PC.
650 // Must not be zero - thread_get_state() on a new thread returns PC == 0.
651 #define PC_SENTINEL 1
652
653 // UNIX03 compliance hack (4508809)
654 #if !__DARWIN_UNIX03
655 #define __srr0 srr0
656 #define __eip eip
657 #endif
658
659 static uintptr_t _get_pc_for_thread(thread_t thread)
660 #if defined(__i386__)
661 {
662 i386_thread_state_t state;
663 unsigned int count = i386_THREAD_STATE_COUNT;
664 kern_return_t okay = thread_get_state (thread, i386_THREAD_STATE, (thread_state_t)&state, &count);
665 return (okay == KERN_SUCCESS) ? state.__eip : PC_SENTINEL;
666 }
667 #elif defined(__x86_64__)
668 {
669 x86_thread_state64_t state;
670 unsigned int count = x86_THREAD_STATE64_COUNT;
671 kern_return_t okay = thread_get_state (thread, x86_THREAD_STATE64, (thread_state_t)&state, &count);
672 return (okay == KERN_SUCCESS) ? state.__rip : PC_SENTINEL;
673 }
674 #elif defined(__arm__)
675 {
676 arm_thread_state_t state;
677 unsigned int count = ARM_THREAD_STATE_COUNT;
678 kern_return_t okay = thread_get_state (thread, ARM_THREAD_STATE, (thread_state_t)&state, &count);
679 return (okay == KERN_SUCCESS) ? state.__pc : PC_SENTINEL;
680 }
681 #else
682 {
683 #error _get_pc_for_thread () not implemented for this architecture
684 }
685 #endif
686
687 #endif
688
689 /***********************************************************************
690 * _collecting_in_critical.
691 * Returns TRUE if some thread is currently executing a cache-reading
692 * function. Collection of cache garbage is not allowed when a cache-
693 * reading function is in progress because it might still be using
694 * the garbage memory.
695 **********************************************************************/
696 typedef struct {
697 uint64_t location;
698 unsigned short length;
699 unsigned short recovery_offs;
700 unsigned int flags;
701 } task_restartable_range_t;
702
703 extern "C" task_restartable_range_t objc_restartableRanges[];
704
705 static int _collecting_in_critical(void)
706 {
707 #if TARGET_OS_WIN32
708 return TRUE;
709 #else
710 thread_act_port_array_t threads;
711 unsigned number;
712 unsigned count;
713 kern_return_t ret;
714 int result;
715
716 mach_port_t mythread = pthread_mach_thread_np(objc_thread_self());
717
718 // Get a list of all the threads in the current task
719 ret = task_threads (mach_task_self (), &threads, &number);
720 if (ret != KERN_SUCCESS)
721 {
722 _objc_fatal("task_thread failed (result %d)\n", ret);
723 }
724
725 // Check whether any thread is in the cache lookup code
726 result = FALSE;
727 for (count = 0; count < number; count++)
728 {
729 int region;
730 uintptr_t pc;
731
732 // Don't bother checking ourselves
733 if (threads[count] == mythread)
734 continue;
735
736 // Find out where thread is executing
737 pc = _get_pc_for_thread (threads[count]);
738
739 // Check for bad status, and if so, assume the worse (can't collect)
740 if (pc == PC_SENTINEL)
741 {
742 result = TRUE;
743 goto done;
744 }
745
746 // Check whether it is in the cache lookup code
747 for (region = 0; objc_restartableRanges[region].location != 0; region++)
748 {
749 uint32_t loc = (uint32_t)objc_restartableRanges[region].location;
750 if ((pc > loc) &&
751 (pc - loc) < objc_restartableRanges[region].length)
752 {
753 result = TRUE;
754 goto done;
755 }
756 }
757 }
758
759 done:
760 // Deallocate the port rights for the threads
761 for (count = 0; count < number; count++) {
762 mach_port_deallocate(mach_task_self (), threads[count]);
763 }
764
765 // Deallocate the thread list
766 vm_deallocate (mach_task_self (), (vm_address_t) threads, sizeof(threads[0]) * number);
767
768 // Return our finding
769 return result;
770 #endif
771 }
772
773
774 /***********************************************************************
775 * _garbage_make_room. Ensure that there is enough room for at least
776 * one more ref in the garbage.
777 **********************************************************************/
778
779 // amount of memory represented by all refs in the garbage
780 static size_t garbage_byte_size = 0;
781
782 // do not empty the garbage until garbage_byte_size gets at least this big
783 static size_t garbage_threshold = 1024;
784
785 // table of refs to free
786 static void **garbage_refs = 0;
787
788 // current number of refs in garbage_refs
789 static size_t garbage_count = 0;
790
791 // capacity of current garbage_refs
792 static size_t garbage_max = 0;
793
794 // capacity of initial garbage_refs
795 enum {
796 INIT_GARBAGE_COUNT = 128
797 };
798
799 static void _garbage_make_room(void)
800 {
801 static int first = 1;
802
803 // Create the collection table the first time it is needed
804 if (first)
805 {
806 first = 0;
807 garbage_refs = (void**)
808 malloc(INIT_GARBAGE_COUNT * sizeof(void *));
809 garbage_max = INIT_GARBAGE_COUNT;
810 }
811
812 // Double the table if it is full
813 else if (garbage_count == garbage_max)
814 {
815 garbage_refs = (void**)
816 realloc(garbage_refs, garbage_max * 2 * sizeof(void *));
817 garbage_max *= 2;
818 }
819 }
820
821
822 /***********************************************************************
823 * _cache_collect_free. Add the specified malloc'd memory to the list
824 * of them to free at some later point.
825 * size is used for the collection threshold. It does not have to be
826 * precisely the block's size.
827 * Cache locks: cacheUpdateLock must be held by the caller.
828 **********************************************************************/
829 static void _cache_collect_free(void *data, size_t size)
830 {
831 cacheUpdateLock.assertLocked();
832
833 _garbage_make_room ();
834 garbage_byte_size += size;
835 garbage_refs[garbage_count++] = data;
836 }
837
838
839 /***********************************************************************
840 * _cache_collect. Try to free accumulated dead caches.
841 * collectALot tries harder to free memory.
842 * Cache locks: cacheUpdateLock must be held by the caller.
843 **********************************************************************/
844 void _cache_collect(bool collectALot)
845 {
846 cacheUpdateLock.assertLocked();
847
848 // Done if the garbage is not full
849 if (garbage_byte_size < garbage_threshold && !collectALot) {
850 return;
851 }
852
853 // Synchronize collection with objc_msgSend and other cache readers
854 if (!collectALot) {
855 if (_collecting_in_critical ()) {
856 // objc_msgSend (or other cache reader) is currently looking in
857 // the cache and might still be using some garbage.
858 if (PrintCaches) {
859 _objc_inform ("CACHES: not collecting; "
860 "objc_msgSend in progress");
861 }
862 return;
863 }
864 }
865 else {
866 // No excuses.
867 while (_collecting_in_critical())
868 ;
869 }
870
871 // No cache readers in progress - garbage is now deletable
872
873 // Log our progress
874 if (PrintCaches) {
875 cache_collections++;
876 _objc_inform ("CACHES: COLLECTING %zu bytes (%zu regions, %zu allocations, %zu collections)", garbage_byte_size, cache_allocator_regions, cache_allocations, cache_collections);
877 }
878
879 // Dispose all refs now in the garbage
880 while (garbage_count--) {
881 _cache_free_block(garbage_refs[garbage_count]);
882 }
883
884 // Clear the garbage count and total size indicator
885 garbage_count = 0;
886 garbage_byte_size = 0;
887
888 if (PrintCaches) {
889 size_t i;
890 size_t total = 0;
891 size_t ideal_total = 0;
892 size_t malloc_total = 0;
893 size_t local_total = 0;
894
895 for (i = 0; i < sizeof(cache_counts) / sizeof(cache_counts[0]); i++) {
896 int count = cache_counts[i];
897 int slots = 1 << i;
898 size_t size = sizeof(struct objc_cache) + TABLE_SIZE(slots);
899 size_t ideal = size;
900 #if TARGET_OS_WIN32
901 size_t malloc = size;
902 #else
903 size_t malloc = malloc_good_size(size);
904 #endif
905 size_t local = size < CACHE_ALLOCATOR_MIN ? malloc : cache_allocator_size_for_mask(cache_allocator_mask_for_size(size));
906
907 if (!count) continue;
908
909 _objc_inform("CACHES: %4d slots: %4d caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", slots, count, ideal*count, malloc*count, local*count, malloc*count-ideal*count, local*count-ideal*count);
910
911 total += count;
912 ideal_total += ideal*count;
913 malloc_total += malloc*count;
914 local_total += local*count;
915 }
916
917 _objc_inform("CACHES: total: %4zu caches, %6zu / %6zu / %6zu bytes ideal/malloc/local, %6zu / %6zu bytes wasted malloc/local", total, ideal_total, malloc_total, local_total, malloc_total-ideal_total, local_total-ideal_total);
918 }
919 }
920
921
922
923
924
925 #if defined(CACHE_ALLOCATOR)
926
927 /***********************************************************************
928 * Custom method cache allocator.
929 * Method cache block sizes are 2^slots+2 words, which is a pessimal
930 * case for the system allocator. It wastes 504 bytes per cache block
931 * with 128 or more slots, which adds up to tens of KB for an AppKit process.
932 * To save memory, the custom cache allocator below is used.
933 *
934 * The cache allocator uses 128 KB allocation regions. Few processes will
935 * require a second region. Within a region, allocation is address-ordered
936 * first fit.
937 *
938 * The cache allocator uses a quantum of 520.
939 * Cache block ideal sizes: 520, 1032, 2056, 4104
940 * Cache allocator sizes: 520, 1040, 2080, 4160
941 *
942 * Because all blocks are known to be genuine method caches, the ordinary
943 * cache->mask and cache->occupied fields are used as block headers.
944 * No out-of-band headers are maintained. The number of blocks will
945 * almost always be fewer than 200, so for simplicity there is no free
946 * list or other optimization.
947 *
948 * Block in use: mask != 0, occupied != -1 (mask indicates block size)
949 * Block free: mask != 0, occupied == -1 (mask is precisely block size)
950 *
951 * No cache allocator functions take any locks. Instead, the caller
952 * must hold the cacheUpdateLock.
953 *
954 * fixme with 128 KB regions and 520 B min block size, an allocation
955 * bitmap would be only 32 bytes - better than free list?
956 **********************************************************************/
957
958 typedef struct cache_allocator_block {
959 uintptr_t size;
960 uintptr_t state;
961 struct cache_allocator_block *nextFree;
962 } cache_allocator_block;
963
964 typedef struct cache_allocator_region {
965 cache_allocator_block *start;
966 cache_allocator_block *end; // first non-block address
967 cache_allocator_block *freeList;
968 struct cache_allocator_region *next;
969 } cache_allocator_region;
970
971 static cache_allocator_region *cacheRegion = NULL;
972
973
974 /***********************************************************************
975 * cache_allocator_add_region
976 * Allocates and returns a new region that can hold at least size
977 * bytes of large method caches.
978 * The actual size will be rounded up to a CACHE_QUANTUM boundary,
979 * with a minimum of CACHE_REGION_SIZE.
980 * The new region is lowest-priority for new allocations. Callers that
981 * know the other regions are already full should allocate directly
982 * into the returned region.
983 **********************************************************************/
984 static cache_allocator_region *cache_allocator_add_region(size_t size)
985 {
986 vm_address_t addr;
987 cache_allocator_block *b;
988 cache_allocator_region **rgnP;
989 cache_allocator_region *newRegion = (cache_allocator_region *)
990 calloc(1, sizeof(cache_allocator_region));
991
992 // Round size up to quantum boundary, and apply the minimum size.
993 size += CACHE_QUANTUM - (size % CACHE_QUANTUM);
994 if (size < CACHE_REGION_SIZE) size = CACHE_REGION_SIZE;
995
996 // Allocate the region
997 addr = (vm_address_t)calloc(size, 1);
998 newRegion->start = (cache_allocator_block *)addr;
999 newRegion->end = (cache_allocator_block *)(addr + size);
1000
1001 // Mark the first block: free and covers the entire region
1002 b = newRegion->start;
1003 b->size = size;
1004 b->state = (uintptr_t)-1;
1005 b->nextFree = NULL;
1006 newRegion->freeList = b;
1007
1008 // Add to end of the linked list of regions.
1009 // Other regions should be re-used before this one is touched.
1010 newRegion->next = NULL;
1011 rgnP = &cacheRegion;
1012 while (*rgnP) {
1013 rgnP = &(**rgnP).next;
1014 }
1015 *rgnP = newRegion;
1016
1017 cache_allocator_regions++;
1018
1019 return newRegion;
1020 }
1021
1022
1023 /***********************************************************************
1024 * cache_allocator_coalesce
1025 * Attempts to coalesce a free block with the single free block following
1026 * it in the free list, if any.
1027 **********************************************************************/
1028 static void cache_allocator_coalesce(cache_allocator_block *block)
1029 {
1030 if (block->size + (uintptr_t)block == (uintptr_t)block->nextFree) {
1031 block->size += block->nextFree->size;
1032 block->nextFree = block->nextFree->nextFree;
1033 }
1034 }
1035
1036
1037 /***********************************************************************
1038 * cache_region_calloc
1039 * Attempt to allocate a size-byte block in the given region.
1040 * Allocation is first-fit. The free list is already fully coalesced.
1041 * Returns NULL if there is not enough room in the region for the block.
1042 **********************************************************************/
1043 static void *cache_region_calloc(cache_allocator_region *rgn, size_t size)
1044 {
1045 cache_allocator_block **blockP;
1046 uintptr_t mask;
1047
1048 // Save mask for allocated block, then round size
1049 // up to CACHE_QUANTUM boundary
1050 mask = cache_allocator_mask_for_size(size);
1051 size = cache_allocator_size_for_mask(mask);
1052
1053 // Search the free list for a sufficiently large free block.
1054
1055 for (blockP = &rgn->freeList;
1056 *blockP != NULL;
1057 blockP = &(**blockP).nextFree)
1058 {
1059 cache_allocator_block *block = *blockP;
1060 if (block->size < size) continue; // not big enough
1061
1062 // block is now big enough. Allocate from it.
1063
1064 // Slice off unneeded fragment of block, if any,
1065 // and reconnect the free list around block.
1066 if (block->size - size >= CACHE_QUANTUM) {
1067 cache_allocator_block *leftover =
1068 (cache_allocator_block *)(size + (uintptr_t)block);
1069 leftover->size = block->size - size;
1070 leftover->state = (uintptr_t)-1;
1071 leftover->nextFree = block->nextFree;
1072 *blockP = leftover;
1073 } else {
1074 *blockP = block->nextFree;
1075 }
1076
1077 // block is now exactly the right size.
1078
1079 bzero(block, size);
1080 block->size = mask; // Cache->mask
1081 block->state = 0; // Cache->occupied
1082
1083 return block;
1084 }
1085
1086 // No room in this region.
1087 return NULL;
1088 }
1089
1090
1091 /***********************************************************************
1092 * cache_allocator_calloc
1093 * Custom allocator for large method caches (128+ slots)
1094 * The returned cache block already has cache->mask set.
1095 * cache->occupied and the cache contents are zero.
1096 * Cache locks: cacheUpdateLock must be held by the caller
1097 **********************************************************************/
1098 static Cache cache_allocator_calloc(size_t size)
1099 {
1100 cache_allocator_region *rgn;
1101
1102 cacheUpdateLock.assertLocked();
1103
1104 for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1105 void *p = cache_region_calloc(rgn, size);
1106 if (p) {
1107 return (Cache)p;
1108 }
1109 }
1110
1111 // No regions or all regions full - make a region and try one more time
1112 // In the unlikely case of a cache over 256KB, it will get its own region.
1113 return (Cache)cache_region_calloc(cache_allocator_add_region(size), size);
1114 }
1115
1116
1117 /***********************************************************************
1118 * cache_allocator_region_for_block
1119 * Returns the cache allocator region that ptr points into, or NULL.
1120 **********************************************************************/
1121 static cache_allocator_region *cache_allocator_region_for_block(cache_allocator_block *block)
1122 {
1123 cache_allocator_region *rgn;
1124 for (rgn = cacheRegion; rgn != NULL; rgn = rgn->next) {
1125 if (block >= rgn->start && block < rgn->end) return rgn;
1126 }
1127 return NULL;
1128 }
1129
1130
1131 /***********************************************************************
1132 * cache_allocator_is_block
1133 * If ptr is a live block from the cache allocator, return YES
1134 * If ptr is a block from some other allocator, return NO.
1135 * If ptr is a dead block from the cache allocator, result is undefined.
1136 * Cache locks: cacheUpdateLock must be held by the caller
1137 **********************************************************************/
1138 static bool cache_allocator_is_block(void *ptr)
1139 {
1140 cacheUpdateLock.assertLocked();
1141 return (cache_allocator_region_for_block((cache_allocator_block *)ptr) != NULL);
1142 }
1143
1144 /***********************************************************************
1145 * cache_allocator_free
1146 * Frees a block allocated by the cache allocator.
1147 * Cache locks: cacheUpdateLock must be held by the caller.
1148 **********************************************************************/
1149 static void cache_allocator_free(void *ptr)
1150 {
1151 cache_allocator_block *dead = (cache_allocator_block *)ptr;
1152 cache_allocator_block *cur;
1153 cache_allocator_region *rgn;
1154
1155 cacheUpdateLock.assertLocked();
1156
1157 if (! (rgn = cache_allocator_region_for_block(dead))) {
1158 // free of non-pointer
1159 _objc_inform("cache_allocator_free of non-pointer %p", dead);
1160 return;
1161 }
1162
1163 dead->size = cache_allocator_size_for_mask(dead->size);
1164 dead->state = (uintptr_t)-1;
1165
1166 if (!rgn->freeList || rgn->freeList > dead) {
1167 // dead block belongs at front of free list
1168 dead->nextFree = rgn->freeList;
1169 rgn->freeList = dead;
1170 cache_allocator_coalesce(dead);
1171 return;
1172 }
1173
1174 // dead block belongs in the middle or end of free list
1175 for (cur = rgn->freeList; cur != NULL; cur = cur->nextFree) {
1176 cache_allocator_block *ahead = cur->nextFree;
1177
1178 if (!ahead || ahead > dead) {
1179 // cur and ahead straddle dead, OR dead belongs at end of free list
1180 cur->nextFree = dead;
1181 dead->nextFree = ahead;
1182
1183 // coalesce into dead first in case both succeed
1184 cache_allocator_coalesce(dead);
1185 cache_allocator_coalesce(cur);
1186 return;
1187 }
1188 }
1189
1190 // uh-oh
1191 _objc_inform("cache_allocator_free of non-pointer %p", ptr);
1192 }
1193
1194 // defined(CACHE_ALLOCATOR)
1195 #endif
1196
1197 /***********************************************************************
1198 * Cache instrumentation and debugging
1199 **********************************************************************/
1200
1201 #ifdef OBJC_INSTRUMENTED
1202 enum {
1203 CACHE_HISTOGRAM_SIZE = 512
1204 };
1205
1206 unsigned int CacheHitHistogram [CACHE_HISTOGRAM_SIZE];
1207 unsigned int CacheMissHistogram [CACHE_HISTOGRAM_SIZE];
1208 #endif
1209
1210
1211 /***********************************************************************
1212 * _cache_print.
1213 **********************************************************************/
1214 static void _cache_print(Cache cache)
1215 {
1216 uintptr_t index;
1217 uintptr_t count;
1218
1219 count = cache->mask + 1;
1220 for (index = 0; index < count; index += 1) {
1221 cache_entry *entry = (cache_entry *)cache->buckets[index];
1222 if (entry) {
1223 if (entry->imp == _objc_msgForward_impcache)
1224 printf ("does not recognize: \n");
1225 printf ("%s\n", sel_getName(entry->name));
1226 }
1227 }
1228 }
1229
1230
1231 /***********************************************************************
1232 * _class_printMethodCaches.
1233 **********************************************************************/
1234 void _class_printMethodCaches(Class cls)
1235 {
1236 if (_cache_isEmpty(cls->cache)) {
1237 printf("no instance-method cache for class %s\n",cls->nameForLogging());
1238 } else {
1239 printf("instance-method cache for class %s:\n", cls->nameForLogging());
1240 _cache_print(cls->cache);
1241 }
1242
1243 if (_cache_isEmpty(cls->ISA()->cache)) {
1244 printf("no class-method cache for class %s\n", cls->nameForLogging());
1245 } else {
1246 printf ("class-method cache for class %s:\n", cls->nameForLogging());
1247 _cache_print(cls->ISA()->cache);
1248 }
1249 }
1250
1251
1252 #if 0
1253 #warning fixme
1254
1255
1256 /***********************************************************************
1257 * _class_printDuplicateCacheEntries.
1258 **********************************************************************/
1259 void _class_printDuplicateCacheEntries(bool detail)
1260 {
1261 NXHashState state;
1262 Class cls;
1263 unsigned int duplicates;
1264 unsigned int index1;
1265 unsigned int index2;
1266 unsigned int mask;
1267 unsigned int count;
1268 unsigned int isMeta;
1269 Cache cache;
1270
1271
1272 printf ("Checking for duplicate cache entries \n");
1273
1274 // Outermost loop - iterate over all classes
1275 state = NXInitHashState (class_hash);
1276 duplicates = 0;
1277 while (NXNextHashState (class_hash, &state, (void **) &cls))
1278 {
1279 // Control loop - do given class' cache, then its isa's cache
1280 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1281 {
1282 // Select cache of interest and make sure it exists
1283 cache = (isMeta ? cls->ISA : cls)->cache;
1284 if (_cache_isEmpty(cache))
1285 continue;
1286
1287 // Middle loop - check each entry in the given cache
1288 mask = cache->mask;
1289 count = mask + 1;
1290 for (index1 = 0; index1 < count; index1 += 1)
1291 {
1292 // Skip invalid entry
1293 if (!cache->buckets[index1])
1294 continue;
1295
1296 // Inner loop - check that given entry matches no later entry
1297 for (index2 = index1 + 1; index2 < count; index2 += 1)
1298 {
1299 // Skip invalid entry
1300 if (!cache->buckets[index2])
1301 continue;
1302
1303 // Check for duplication by method name comparison
1304 if (strcmp ((char *) cache->buckets[index1]->name),
1305 (char *) cache->buckets[index2]->name)) == 0)
1306 {
1307 if (detail)
1308 printf ("%s %s\n", cls->nameForLogging(), sel_getName(cache->buckets[index1]->name));
1309 duplicates += 1;
1310 break;
1311 }
1312 }
1313 }
1314 }
1315 }
1316
1317 // Log the findings
1318 printf ("duplicates = %d\n", duplicates);
1319 printf ("total cache fills = %d\n", totalCacheFills);
1320 }
1321
1322
1323 /***********************************************************************
1324 * PrintCacheHeader.
1325 **********************************************************************/
1326 static void PrintCacheHeader(void)
1327 {
1328 #ifdef OBJC_INSTRUMENTED
1329 printf ("Cache Cache Slots Avg Max AvgS MaxS AvgS MaxS TotalD AvgD MaxD TotalD AvgD MaxD TotD AvgD MaxD\n");
1330 printf ("Size Count Used Used Used Hit Hit Miss Miss Hits Prbs Prbs Misses Prbs Prbs Flsh Flsh Flsh\n");
1331 printf ("----- ----- ----- ----- ---- ---- ---- ---- ---- ------- ---- ---- ------- ---- ---- ---- ---- ----\n");
1332 #else
1333 printf ("Cache Cache Slots Avg Max AvgS MaxS AvgS MaxS\n");
1334 printf ("Size Count Used Used Used Hit Hit Miss Miss\n");
1335 printf ("----- ----- ----- ----- ---- ---- ---- ---- ----\n");
1336 #endif
1337 }
1338
1339
1340 /***********************************************************************
1341 * PrintCacheInfo.
1342 **********************************************************************/
1343 static void PrintCacheInfo(unsigned int cacheSize,
1344 unsigned int cacheCount,
1345 unsigned int slotsUsed,
1346 float avgUsed, unsigned int maxUsed,
1347 float avgSHit, unsigned int maxSHit,
1348 float avgSMiss, unsigned int maxSMiss
1349 #ifdef OBJC_INSTRUMENTED
1350 , unsigned int totDHits,
1351 float avgDHit,
1352 unsigned int maxDHit,
1353 unsigned int totDMisses,
1354 float avgDMiss,
1355 unsigned int maxDMiss,
1356 unsigned int totDFlsh,
1357 float avgDFlsh,
1358 unsigned int maxDFlsh
1359 #endif
1360 )
1361 {
1362 #ifdef OBJC_INSTRUMENTED
1363 printf ("%5u %5u %5u %5.1f %4u %4.1f %4u %4.1f %4u %7u %4.1f %4u %7u %4.1f %4u %4u %4.1f %4u\n",
1364 #else
1365 printf ("%5u %5u %5u %5.1f %4u %4.1f %4u %4.1f %4u\n",
1366 #endif
1367 cacheSize, cacheCount, slotsUsed, avgUsed, maxUsed, avgSHit, maxSHit, avgSMiss, maxSMiss
1368 #ifdef OBJC_INSTRUMENTED
1369 , totDHits, avgDHit, maxDHit, totDMisses, avgDMiss, maxDMiss, totDFlsh, avgDFlsh, maxDFlsh
1370 #endif
1371 );
1372
1373 }
1374
1375
1376 #ifdef OBJC_INSTRUMENTED
1377 /***********************************************************************
1378 * PrintCacheHistogram. Show the non-zero entries from the specified
1379 * cache histogram.
1380 **********************************************************************/
1381 static void PrintCacheHistogram(char *title,
1382 unsigned int *firstEntry,
1383 unsigned int entryCount)
1384 {
1385 unsigned int index;
1386 unsigned int *thisEntry;
1387
1388 printf ("%s\n", title);
1389 printf (" Probes Tally\n");
1390 printf (" ------ -----\n");
1391 for (index = 0, thisEntry = firstEntry;
1392 index < entryCount;
1393 index += 1, thisEntry += 1)
1394 {
1395 if (*thisEntry == 0)
1396 continue;
1397
1398 printf (" %6d %5d\n", index, *thisEntry);
1399 }
1400 }
1401 #endif
1402
1403
1404 /***********************************************************************
1405 * _class_printMethodCacheStatistics.
1406 **********************************************************************/
1407
1408 #define MAX_LOG2_SIZE 32
1409 #define MAX_CHAIN_SIZE 100
1410
1411 void _class_printMethodCacheStatistics(void)
1412 {
1413 unsigned int isMeta;
1414 unsigned int index;
1415 NXHashState state;
1416 Class cls;
1417 unsigned int totalChain;
1418 unsigned int totalMissChain;
1419 unsigned int maxChain;
1420 unsigned int maxMissChain;
1421 unsigned int classCount;
1422 unsigned int negativeEntryCount;
1423 unsigned int cacheExpandCount;
1424 unsigned int cacheCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1425 unsigned int totalEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1426 unsigned int maxEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1427 unsigned int totalChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1428 unsigned int totalMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1429 unsigned int totalMaxChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1430 unsigned int totalMaxMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1431 unsigned int maxChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1432 unsigned int maxMissChainBySize[2][MAX_LOG2_SIZE] = {{0}};
1433 unsigned int chainCount[MAX_CHAIN_SIZE] = {0};
1434 unsigned int missChainCount[MAX_CHAIN_SIZE] = {0};
1435 #ifdef OBJC_INSTRUMENTED
1436 unsigned int hitCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1437 unsigned int hitProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1438 unsigned int maxHitProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1439 unsigned int missCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1440 unsigned int missProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1441 unsigned int maxMissProbesBySize[2][MAX_LOG2_SIZE] = {{0}};
1442 unsigned int flushCountBySize[2][MAX_LOG2_SIZE] = {{0}};
1443 unsigned int flushedEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1444 unsigned int maxFlushedEntriesBySize[2][MAX_LOG2_SIZE] = {{0}};
1445 #endif
1446
1447 printf ("Printing cache statistics\n");
1448
1449 // Outermost loop - iterate over all classes
1450 state = NXInitHashState (class_hash);
1451 classCount = 0;
1452 negativeEntryCount = 0;
1453 cacheExpandCount = 0;
1454 while (NXNextHashState (class_hash, &state, (void **) &cls))
1455 {
1456 // Tally classes
1457 classCount += 1;
1458
1459 // Control loop - do given class' cache, then its isa's cache
1460 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1461 {
1462 Cache cache;
1463 unsigned int mask;
1464 unsigned int log2Size;
1465 unsigned int entryCount;
1466
1467 // Select cache of interest
1468 cache = (isMeta ? cls->ISA : cls)->cache;
1469
1470 // Ignore empty cache... should we?
1471 if (_cache_isEmpty(cache))
1472 continue;
1473
1474 // Middle loop - do each entry in the given cache
1475 mask = cache->mask;
1476 entryCount = 0;
1477 totalChain = 0;
1478 totalMissChain = 0;
1479 maxChain = 0;
1480 maxMissChain = 0;
1481 for (index = 0; index < mask + 1; index += 1)
1482 {
1483 cache_entry **buckets;
1484 cache_entry *entry;
1485 unsigned int hash;
1486 unsigned int methodChain;
1487 unsigned int methodMissChain;
1488 unsigned int index2;
1489
1490 // If entry is invalid, the only item of
1491 // interest is that future insert hashes
1492 // to this entry can use it directly.
1493 buckets = (cache_entry **)cache->buckets;
1494 if (!buckets[index])
1495 {
1496 missChainCount[0] += 1;
1497 continue;
1498 }
1499
1500 entry = buckets[index];
1501
1502 // Tally valid entries
1503 entryCount += 1;
1504
1505 // Tally "forward::" entries
1506 if (entry->imp == _objc_msgForward_impcache)
1507 negativeEntryCount += 1;
1508
1509 // Calculate search distance (chain length) for this method
1510 // The chain may wrap around to the beginning of the table.
1511 hash = CACHE_HASH(entry->name, mask);
1512 if (index >= hash) methodChain = index - hash;
1513 else methodChain = (mask+1) + index - hash;
1514
1515 // Tally chains of this length
1516 if (methodChain < MAX_CHAIN_SIZE)
1517 chainCount[methodChain] += 1;
1518
1519 // Keep sum of all chain lengths
1520 totalChain += methodChain;
1521
1522 // Record greatest chain length
1523 if (methodChain > maxChain)
1524 maxChain = methodChain;
1525
1526 // Calculate search distance for miss that hashes here
1527 index2 = index;
1528 while (buckets[index2])
1529 {
1530 index2 += 1;
1531 index2 &= mask;
1532 }
1533 methodMissChain = ((index2 - index) & mask);
1534
1535 // Tally miss chains of this length
1536 if (methodMissChain < MAX_CHAIN_SIZE)
1537 missChainCount[methodMissChain] += 1;
1538
1539 // Keep sum of all miss chain lengths in this class
1540 totalMissChain += methodMissChain;
1541
1542 // Record greatest miss chain length
1543 if (methodMissChain > maxMissChain)
1544 maxMissChain = methodMissChain;
1545 }
1546
1547 // Factor this cache into statistics about caches of the same
1548 // type and size (all caches are a power of two in size)
1549 log2Size = log2u (mask + 1);
1550 cacheCountBySize[isMeta][log2Size] += 1;
1551 totalEntriesBySize[isMeta][log2Size] += entryCount;
1552 if (entryCount > maxEntriesBySize[isMeta][log2Size])
1553 maxEntriesBySize[isMeta][log2Size] = entryCount;
1554 totalChainBySize[isMeta][log2Size] += totalChain;
1555 totalMissChainBySize[isMeta][log2Size] += totalMissChain;
1556 totalMaxChainBySize[isMeta][log2Size] += maxChain;
1557 totalMaxMissChainBySize[isMeta][log2Size] += maxMissChain;
1558 if (maxChain > maxChainBySize[isMeta][log2Size])
1559 maxChainBySize[isMeta][log2Size] = maxChain;
1560 if (maxMissChain > maxMissChainBySize[isMeta][log2Size])
1561 maxMissChainBySize[isMeta][log2Size] = maxMissChain;
1562 #ifdef OBJC_INSTRUMENTED
1563 {
1564 CacheInstrumentation *cacheData;
1565
1566 cacheData = CACHE_INSTRUMENTATION(cache);
1567 hitCountBySize[isMeta][log2Size] += cacheData->hitCount;
1568 hitProbesBySize[isMeta][log2Size] += cacheData->hitProbes;
1569 if (cacheData->maxHitProbes > maxHitProbesBySize[isMeta][log2Size])
1570 maxHitProbesBySize[isMeta][log2Size] = cacheData->maxHitProbes;
1571 missCountBySize[isMeta][log2Size] += cacheData->missCount;
1572 missProbesBySize[isMeta][log2Size] += cacheData->missProbes;
1573 if (cacheData->maxMissProbes > maxMissProbesBySize[isMeta][log2Size])
1574 maxMissProbesBySize[isMeta][log2Size] = cacheData->maxMissProbes;
1575 flushCountBySize[isMeta][log2Size] += cacheData->flushCount;
1576 flushedEntriesBySize[isMeta][log2Size] += cacheData->flushedEntries;
1577 if (cacheData->maxFlushedEntries > maxFlushedEntriesBySize[isMeta][log2Size])
1578 maxFlushedEntriesBySize[isMeta][log2Size] = cacheData->maxFlushedEntries;
1579 }
1580 #endif
1581 // Caches start with a power of two number of entries, and grow by doubling, so
1582 // we can calculate the number of times this cache has expanded
1583 cacheExpandCount += log2Size - INIT_CACHE_SIZE_LOG2;
1584 }
1585 }
1586
1587 {
1588 unsigned int cacheCountByType[2] = {0};
1589 unsigned int totalCacheCount = 0;
1590 unsigned int totalEntries = 0;
1591 unsigned int maxEntries = 0;
1592 unsigned int totalSlots = 0;
1593 #ifdef OBJC_INSTRUMENTED
1594 unsigned int totalHitCount = 0;
1595 unsigned int totalHitProbes = 0;
1596 unsigned int maxHitProbes = 0;
1597 unsigned int totalMissCount = 0;
1598 unsigned int totalMissProbes = 0;
1599 unsigned int maxMissProbes = 0;
1600 unsigned int totalFlushCount = 0;
1601 unsigned int totalFlushedEntries = 0;
1602 unsigned int maxFlushedEntries = 0;
1603 #endif
1604
1605 totalChain = 0;
1606 maxChain = 0;
1607 totalMissChain = 0;
1608 maxMissChain = 0;
1609
1610 // Sum information over all caches
1611 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1612 {
1613 for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1614 {
1615 cacheCountByType[isMeta] += cacheCountBySize[isMeta][index];
1616 totalEntries += totalEntriesBySize[isMeta][index];
1617 totalSlots += cacheCountBySize[isMeta][index] * (1 << index);
1618 totalChain += totalChainBySize[isMeta][index];
1619 if (maxEntriesBySize[isMeta][index] > maxEntries)
1620 maxEntries = maxEntriesBySize[isMeta][index];
1621 if (maxChainBySize[isMeta][index] > maxChain)
1622 maxChain = maxChainBySize[isMeta][index];
1623 totalMissChain += totalMissChainBySize[isMeta][index];
1624 if (maxMissChainBySize[isMeta][index] > maxMissChain)
1625 maxMissChain = maxMissChainBySize[isMeta][index];
1626 #ifdef OBJC_INSTRUMENTED
1627 totalHitCount += hitCountBySize[isMeta][index];
1628 totalHitProbes += hitProbesBySize[isMeta][index];
1629 if (maxHitProbesBySize[isMeta][index] > maxHitProbes)
1630 maxHitProbes = maxHitProbesBySize[isMeta][index];
1631 totalMissCount += missCountBySize[isMeta][index];
1632 totalMissProbes += missProbesBySize[isMeta][index];
1633 if (maxMissProbesBySize[isMeta][index] > maxMissProbes)
1634 maxMissProbes = maxMissProbesBySize[isMeta][index];
1635 totalFlushCount += flushCountBySize[isMeta][index];
1636 totalFlushedEntries += flushedEntriesBySize[isMeta][index];
1637 if (maxFlushedEntriesBySize[isMeta][index] > maxFlushedEntries)
1638 maxFlushedEntries = maxFlushedEntriesBySize[isMeta][index];
1639 #endif
1640 }
1641
1642 totalCacheCount += cacheCountByType[isMeta];
1643 }
1644
1645 // Log our findings
1646 printf ("There are %u classes\n", classCount);
1647
1648 for (isMeta = 0; isMeta <= 1; isMeta += 1)
1649 {
1650 // Number of this type of class
1651 printf ("\nThere are %u %s-method caches, broken down by size (slot count):\n",
1652 cacheCountByType[isMeta],
1653 isMeta ? "class" : "instance");
1654
1655 // Print header
1656 PrintCacheHeader ();
1657
1658 // Keep format consistent even if there are caches of this kind
1659 if (cacheCountByType[isMeta] == 0)
1660 {
1661 printf ("(none)\n");
1662 continue;
1663 }
1664
1665 // Usage information by cache size
1666 for (index = 0; index < MAX_LOG2_SIZE; index += 1)
1667 {
1668 unsigned int cacheCount;
1669 unsigned int cacheSlotCount;
1670 unsigned int cacheEntryCount;
1671
1672 // Get number of caches of this type and size
1673 cacheCount = cacheCountBySize[isMeta][index];
1674 if (cacheCount == 0)
1675 continue;
1676
1677 // Get the cache slot count and the total number of valid entries
1678 cacheSlotCount = (1 << index);
1679 cacheEntryCount = totalEntriesBySize[isMeta][index];
1680
1681 // Give the analysis
1682 PrintCacheInfo (cacheSlotCount,
1683 cacheCount,
1684 cacheEntryCount,
1685 (float) cacheEntryCount / (float) cacheCount,
1686 maxEntriesBySize[isMeta][index],
1687 (float) totalChainBySize[isMeta][index] / (float) cacheEntryCount,
1688 maxChainBySize[isMeta][index],
1689 (float) totalMissChainBySize[isMeta][index] / (float) (cacheCount * cacheSlotCount),
1690 maxMissChainBySize[isMeta][index]
1691 #ifdef OBJC_INSTRUMENTED
1692 , hitCountBySize[isMeta][index],
1693 hitCountBySize[isMeta][index] ?
1694 (float) hitProbesBySize[isMeta][index] / (float) hitCountBySize[isMeta][index] : 0.0,
1695 maxHitProbesBySize[isMeta][index],
1696 missCountBySize[isMeta][index],
1697 missCountBySize[isMeta][index] ?
1698 (float) missProbesBySize[isMeta][index] / (float) missCountBySize[isMeta][index] : 0.0,
1699 maxMissProbesBySize[isMeta][index],
1700 flushCountBySize[isMeta][index],
1701 flushCountBySize[isMeta][index] ?
1702 (float) flushedEntriesBySize[isMeta][index] / (float) flushCountBySize[isMeta][index] : 0.0,
1703 maxFlushedEntriesBySize[isMeta][index]
1704 #endif
1705 );
1706 }
1707 }
1708
1709 // Give overall numbers
1710 printf ("\nCumulative:\n");
1711 PrintCacheHeader ();
1712 PrintCacheInfo (totalSlots,
1713 totalCacheCount,
1714 totalEntries,
1715 (float) totalEntries / (float) totalCacheCount,
1716 maxEntries,
1717 (float) totalChain / (float) totalEntries,
1718 maxChain,
1719 (float) totalMissChain / (float) totalSlots,
1720 maxMissChain
1721 #ifdef OBJC_INSTRUMENTED
1722 , totalHitCount,
1723 totalHitCount ?
1724 (float) totalHitProbes / (float) totalHitCount : 0.0,
1725 maxHitProbes,
1726 totalMissCount,
1727 totalMissCount ?
1728 (float) totalMissProbes / (float) totalMissCount : 0.0,
1729 maxMissProbes,
1730 totalFlushCount,
1731 totalFlushCount ?
1732 (float) totalFlushedEntries / (float) totalFlushCount : 0.0,
1733 maxFlushedEntries
1734 #endif
1735 );
1736
1737 printf ("\nNumber of \"forward::\" entries: %d\n", negativeEntryCount);
1738 printf ("Number of cache expansions: %d\n", cacheExpandCount);
1739 #ifdef OBJC_INSTRUMENTED
1740 printf ("flush_caches: total calls total visits average visits max visits total classes visits/class\n");
1741 printf (" ----------- ------------ -------------- ---------- ------------- -------------\n");
1742 printf (" linear %11u %12u %14.1f %10u %13u %12.2f\n",
1743 LinearFlushCachesCount,
1744 LinearFlushCachesVisitedCount,
1745 LinearFlushCachesCount ?
1746 (float) LinearFlushCachesVisitedCount / (float) LinearFlushCachesCount : 0.0,
1747 MaxLinearFlushCachesVisitedCount,
1748 LinearFlushCachesVisitedCount,
1749 1.0);
1750 printf (" nonlinear %11u %12u %14.1f %10u %13u %12.2f\n",
1751 NonlinearFlushCachesCount,
1752 NonlinearFlushCachesVisitedCount,
1753 NonlinearFlushCachesCount ?
1754 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesCount : 0.0,
1755 MaxNonlinearFlushCachesVisitedCount,
1756 NonlinearFlushCachesClassCount,
1757 NonlinearFlushCachesClassCount ?
1758 (float) NonlinearFlushCachesVisitedCount / (float) NonlinearFlushCachesClassCount : 0.0);
1759 printf (" ideal %11u %12u %14.1f %10u %13u %12.2f\n",
1760 LinearFlushCachesCount + NonlinearFlushCachesCount,
1761 IdealFlushCachesCount,
1762 LinearFlushCachesCount + NonlinearFlushCachesCount ?
1763 (float) IdealFlushCachesCount / (float) (LinearFlushCachesCount + NonlinearFlushCachesCount) : 0.0,
1764 MaxIdealFlushCachesCount,
1765 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount,
1766 LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount ?
1767 (float) IdealFlushCachesCount / (float) (LinearFlushCachesVisitedCount + NonlinearFlushCachesClassCount) : 0.0);
1768
1769 PrintCacheHistogram ("\nCache hit histogram:", &CacheHitHistogram[0], CACHE_HISTOGRAM_SIZE);
1770 PrintCacheHistogram ("\nCache miss histogram:", &CacheMissHistogram[0], CACHE_HISTOGRAM_SIZE);
1771 #endif
1772
1773 #if 0
1774 printf ("\nLookup chains:");
1775 for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1776 {
1777 if (chainCount[index] != 0)
1778 printf (" %u:%u", index, chainCount[index]);
1779 }
1780
1781 printf ("\nMiss chains:");
1782 for (index = 0; index < MAX_CHAIN_SIZE; index += 1)
1783 {
1784 if (missChainCount[index] != 0)
1785 printf (" %u:%u", index, missChainCount[index]);
1786 }
1787
1788 printf ("\nTotal memory usage for cache data structures: %lu bytes\n",
1789 totalCacheCount * (sizeof(struct objc_cache) - sizeof(cache_entry *)) +
1790 totalSlots * sizeof(cache_entry *) +
1791 negativeEntryCount * sizeof(cache_entry));
1792 #endif
1793 }
1794 }
1795
1796 #endif
1797
1798 // !__OBJC2__
1799 #endif