X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/fe8ab488e9161c46dd9885d58fc52996dc0249ff..cc8bc92ae4a8e9f1a1ab61bf83d34ad8150b3405:/osfmk/kern/kalloc.c?ds=sidebyside diff --git a/osfmk/kern/kalloc.c b/osfmk/kern/kalloc.c index 5a799ee77..97b04c739 100644 --- a/osfmk/kern/kalloc.c +++ b/osfmk/kern/kalloc.c @@ -77,6 +77,9 @@ #include #include #include +#include + +#include #ifdef MACH_BSD zone_t kalloc_zone(vm_size_t); @@ -89,6 +92,9 @@ vm_size_t kalloc_max; vm_size_t kalloc_max_prerounded; vm_size_t kalloc_kernmap_size; /* size of kallocs that can come from kernel map */ +/* how many times we couldn't allocate out of kalloc_map and fell back to kernel_map */ +unsigned long kalloc_fallback_count; + unsigned int kalloc_large_inuse; vm_size_t kalloc_large_total; vm_size_t kalloc_large_max; @@ -111,28 +117,14 @@ static void KALLOC_ZINFO_SALLOC(vm_size_t bytes) { thread_t thr = current_thread(); - task_t task; - zinfo_usage_t zinfo; - ledger_debit(thr->t_ledger, task_ledgers.tkm_shared, bytes); - - if (kalloc_fake_zone_index != -1 && - (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) - zinfo[kalloc_fake_zone_index].alloc += bytes; } static void KALLOC_ZINFO_SFREE(vm_size_t bytes) { thread_t thr = current_thread(); - task_t task; - zinfo_usage_t zinfo; - ledger_credit(thr->t_ledger, task_ledgers.tkm_shared, bytes); - - if (kalloc_fake_zone_index != -1 && - (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) - zinfo[kalloc_fake_zone_index].free += bytes; } /* @@ -154,45 +146,42 @@ KALLOC_ZINFO_SFREE(vm_size_t bytes) #if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4 -/* - * "Legacy" aka "power-of-2" backing zones with 16-byte minimum - * size and alignment. Users of this profile would probably - * benefit from some tuning. - */ - #define K_ZONE_SIZES \ 16, \ 32, \ -/* 6 */ 64, \ - 128, \ + 48, \ +/* 3 */ 64, \ + 80, \ + 96, \ +/* 6 */ 128, \ + 160, 192, \ 256, \ -/* 9 */ 512, \ - 1024, \ +/* 9 */ 288, \ + 512, 576, \ + 1024, 1152, \ +/* C */ 1280, \ 2048, \ -/* C */ 4096 - + 4096 #define K_ZONE_NAMES \ "kalloc.16", \ "kalloc.32", \ -/* 6 */ "kalloc.64", \ - "kalloc.128", \ + "kalloc.48", \ +/* 3 */ "kalloc.64", \ + "kalloc.80", \ + "kalloc.96", \ +/* 6 */ "kalloc.128", \ + "kalloc.160", \ + "kalloc.192", \ "kalloc.256", \ -/* 9 */ "kalloc.512", \ +/* 9 */ "kalloc.288", \ + "kalloc.512", \ + "kalloc.576", \ "kalloc.1024", \ + "kalloc.1152", \ +/* C */ "kalloc.1280", \ "kalloc.2048", \ -/* C */ "kalloc.4096" - -#define K_ZONE_MAXIMA \ - 1024, \ - 4096, \ -/* 6 */ 4096, \ - 4096, \ - 4096, \ -/* 9 */ 1024, \ - 1024, \ - 1024, \ -/* C */ 1024 + "kalloc.4096" #elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3 @@ -204,52 +193,49 @@ KALLOC_ZINFO_SFREE(vm_size_t bytes) /* 3 */ 8, \ 16, 24, \ 32, 40, 48, \ -/* 6 */ 64, 88, 112, \ +/* 6 */ 64, 72, 88, 112, \ 128, 192, \ - 256, 384, \ -/* 9 */ 512, 768, \ - 1024, 1536, \ - 2048, 3072, \ + 256, 288, 384, 440, \ +/* 9 */ 512, 576, 768, \ + 1024, 1152, 1536, \ + 2048, 2128, 3072, \ 4096, 6144 #define K_ZONE_NAMES \ /* 3 */ "kalloc.8", \ "kalloc.16", "kalloc.24", \ "kalloc.32", "kalloc.40", "kalloc.48", \ -/* 6 */ "kalloc.64", "kalloc.88", "kalloc.112", \ +/* 6 */ "kalloc.64", "kalloc.72", "kalloc.88", "kalloc.112", \ "kalloc.128", "kalloc.192", \ - "kalloc.256", "kalloc.384", \ -/* 9 */ "kalloc.512", "kalloc.768", \ - "kalloc.1024", "kalloc.1536", \ - "kalloc.2048", "kalloc.3072", \ + "kalloc.256", "kalloc.288", "kalloc.384", "kalloc.440", \ +/* 9 */ "kalloc.512", "kalloc.576", "kalloc.768", \ + "kalloc.1024", "kalloc.1152", "kalloc.1536", \ + "kalloc.2048", "kalloc.2128", "kalloc.3072", \ "kalloc.4096", "kalloc.6144" -#define K_ZONE_MAXIMA \ -/* 3 */ 1024, \ - 1024, 1024, \ - 4096, 4096, 4096, \ -/* 6 */ 4096, 4096, 4096, \ - 4096, 4096, \ - 4096, 4096, \ -/* 9 */ 1024, 1024, \ - 1024, 1024, \ - 1024, 1024, \ -/* C */ 1024, 64 - #else #error missing zone size parameters for kalloc #endif #define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN) +#define KiB(x) (1024 * (x)) static const int k_zone_size[] = { K_ZONE_SIZES, - 8192, - 16384, -/* F */ 32768 + KiB(8), + KiB(16), + KiB(32) +}; + +#define MAX_K_ZONE (sizeof (k_zone_size) / sizeof (k_zone_size[0])) + +static const char *k_zone_name[MAX_K_ZONE] = { + K_ZONE_NAMES, + "kalloc.8192", + "kalloc.16384", + "kalloc.32768" }; -#define N_K_ZONE (sizeof (k_zone_size) / sizeof (k_zone_size[0])) /* * Many kalloc() allocations are for small structures containing a few @@ -271,37 +257,13 @@ static int8_t k_zone_dlut[N_K_ZDLUT]; /* table of indices into k_zone[] */ */ static int k_zindex_start; -static zone_t k_zone[N_K_ZONE]; - -static const char *k_zone_name[N_K_ZONE] = { - K_ZONE_NAMES, - "kalloc.8192", - "kalloc.16384", -/* F */ "kalloc.32768" -}; - -/* - * Max number of elements per zone. zinit rounds things up correctly - * Doing things this way permits each zone to have a different maximum size - * based on need, rather than just guessing; it also - * means its patchable in case you're wrong! - */ -unsigned int k_zone_max[N_K_ZONE] = { - K_ZONE_MAXIMA, - 4096, - 64, -/* F */ 64 -}; +static zone_t k_zone[MAX_K_ZONE]; /* #define KALLOC_DEBUG 1 */ /* forward declarations */ -void * kalloc_canblock( - vm_size_t size, - boolean_t canblock); - -lck_grp_t *kalloc_lck_grp; +lck_grp_t kalloc_lck_grp; lck_mtx_t kalloc_lock; #define kalloc_spin_lock() lck_mtx_lock_spin(&kalloc_lock) @@ -339,7 +301,8 @@ kalloc_init( kern_return_t retval; vm_offset_t min; vm_size_t size, kalloc_map_size; - register int i; + int i; + vm_map_kernel_flags_t vmk_flags; /* * Scale the kalloc_map_size to physical memory size: stay below @@ -353,8 +316,14 @@ kalloc_init( if (kalloc_map_size < KALLOC_MAP_SIZE_MIN) kalloc_map_size = KALLOC_MAP_SIZE_MIN; + vmk_flags = VM_MAP_KERNEL_FLAGS_NONE; + vmk_flags.vmkf_permanent = TRUE; + retval = kmem_suballoc(kernel_map, &min, kalloc_map_size, - FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT, + FALSE, + (VM_FLAGS_ANYWHERE), + vmk_flags, + VM_KERN_MEMORY_KALLOC, &kalloc_map); if (retval != KERN_SUCCESS) @@ -364,30 +333,34 @@ kalloc_init( kalloc_map_max = min + kalloc_map_size - 1; /* - * Ensure that zones up to size 8192 bytes exist. - * This is desirable because messages are allocated - * with kalloc, and messages up through size 8192 are common. + * Create zones up to a least 2 pages because small page-multiples are common + * allocations. Also ensure that zones up to size 8192 bytes exist. This is + * desirable because messages are allocated with kalloc(), and messages up + * through size 8192 are common. */ + kalloc_max = PAGE_SIZE << 2; + if (kalloc_max < KiB(16)) { + kalloc_max = KiB(16); + } + assert(kalloc_max <= KiB(64)); /* assumption made in size arrays */ - if (PAGE_SIZE < 16*1024) - kalloc_max = 16*1024; - else - kalloc_max = PAGE_SIZE; kalloc_max_prerounded = kalloc_max / 2 + 1; - /* size it to be more than 16 times kalloc_max (256k) for allocations from kernel map */ + /* allocations larger than 16 times kalloc_max go directly to kernel map */ kalloc_kernmap_size = (kalloc_max * 16) + 1; kalloc_largest_allocated = kalloc_kernmap_size; /* - * Allocate a zone for each size we are going to handle. - * We specify non-paged memory. Don't charge the caller - * for the allocation, as we aren't sure how the memory - * will be handled. + * Allocate a zone for each size we are going to handle. Don't charge the + * caller for the allocation, as we aren't sure how the memory will be + * handled. */ - for (i = 0; (size = k_zone_size[i]) < kalloc_max; i++) { - k_zone[i] = zinit(size, k_zone_max[i] * size, size, - k_zone_name[i]); + for (i = 0; i < (int)MAX_K_ZONE && (size = k_zone_size[i]) < kalloc_max; i++) { + k_zone[i] = zinit(size, size, size, k_zone_name[i]); zone_change(k_zone[i], Z_CALLERACCT, FALSE); +#if VM_MAX_TAG_ZONES + if (zone_tagging_on) zone_change(k_zone[i], Z_TAGS_ENABLED, TRUE); +#endif + zone_change(k_zone[i], Z_KASAN_QUARANTINE, FALSE); } /* @@ -415,7 +388,7 @@ kalloc_init( * Useful when debugging/tweaking the array of zone sizes. * Cache misses probably more critical than compare-branches! */ - for (i = 0; i < (int)N_K_ZONE; i++) { + for (i = 0; i < (int)MAX_K_ZONE; i++) { vm_size_t testsize = (vm_size_t)k_zone_size[i] - 1; int compare = 0; int zindex; @@ -445,12 +418,13 @@ kalloc_init( compare == 1 ? "" : "s"); } #endif - kalloc_lck_grp = lck_grp_alloc_init("kalloc.large", LCK_GRP_ATTR_NULL); - lck_mtx_init(&kalloc_lock, kalloc_lck_grp, LCK_ATTR_NULL); + + lck_grp_init(&kalloc_lck_grp, "kalloc.large", LCK_GRP_ATTR_NULL); + lck_mtx_init(&kalloc_lock, &kalloc_lck_grp, LCK_ATTR_NULL); OSMalloc_init(); -#ifdef MUTEX_ZONE +#ifdef MUTEX_ZONE lck_mtx_zone = zinit(sizeof(struct _lck_mtx_), 1024*256, 4096, "lck_mtx"); -#endif +#endif } /* @@ -475,18 +449,172 @@ get_zone_search(vm_size_t size, int zindex) while ((vm_size_t)k_zone_size[zindex] < size) zindex++; - assert((unsigned)zindex < N_K_ZONE && + assert((unsigned)zindex < MAX_K_ZONE && (vm_size_t)k_zone_size[zindex] < kalloc_max); return (k_zone[zindex]); } +static vm_size_t +vm_map_lookup_kalloc_entry_locked( + vm_map_t map, + void *addr) +{ + boolean_t ret; + vm_map_entry_t vm_entry = NULL; + + ret = vm_map_lookup_entry(map, (vm_map_offset_t)addr, &vm_entry); + if (!ret) { + panic("Attempting to lookup/free an address not allocated via kalloc! (vm_map_lookup_entry() failed map: %p, addr: %p)\n", + map, addr); + } + if (vm_entry->vme_start != (vm_map_offset_t)addr) { + panic("Attempting to lookup/free the middle of a kalloc'ed element! (map: %p, addr: %p, entry: %p)\n", + map, addr, vm_entry); + } + if (!vm_entry->vme_atomic) { + panic("Attempting to lookup/free an address not managed by kalloc! (map: %p, addr: %p, entry: %p)\n", + map, addr, vm_entry); + } + return (vm_entry->vme_end - vm_entry->vme_start); +} + +#if KASAN_KALLOC +/* + * KASAN kalloc stashes the original user-requested size away in the poisoned + * area. Return that directly. + */ +vm_size_t +kalloc_size(void *addr) +{ + (void)vm_map_lookup_kalloc_entry_locked; /* silence warning */ + return kasan_user_size((vm_offset_t)addr); +} +#else +vm_size_t +kalloc_size( + void *addr) +{ + vm_map_t map; + vm_size_t size; + + size = zone_element_size(addr, NULL); + if (size) { + return size; + } + if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) { + map = kalloc_map; + } else { + map = kernel_map; + } + vm_map_lock_read(map); + size = vm_map_lookup_kalloc_entry_locked(map, addr); + vm_map_unlock_read(map); + return size; +} +#endif + +vm_size_t +kalloc_bucket_size( + vm_size_t size) +{ + zone_t z; + vm_map_t map; + + if (size < MAX_SIZE_ZDLUT) { + z = get_zone_dlut(size); + return z->elem_size; + } + + if (size < kalloc_max_prerounded) { + z = get_zone_search(size, k_zindex_start); + return z->elem_size; + } + + if (size >= kalloc_kernmap_size) + map = kernel_map; + else + map = kalloc_map; + + return vm_map_round_page(size, VM_MAP_PAGE_MASK(map)); +} + +#if KASAN_KALLOC +vm_size_t +kfree_addr(void *addr) +{ + vm_size_t origsz = kalloc_size(addr); + kfree(addr, origsz); + return origsz; +} +#else +vm_size_t +kfree_addr( + void *addr) +{ + vm_map_t map; + vm_size_t size = 0; + kern_return_t ret; + zone_t z; + + size = zone_element_size(addr, &z); + if (size) { + zfree(z, addr); + return size; + } + + if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) { + map = kalloc_map; + } else { + map = kernel_map; + } + if ((vm_offset_t)addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS) { + panic("kfree on an address not in the kernel & kext address range! addr: %p\n", addr); + } + + vm_map_lock(map); + size = vm_map_lookup_kalloc_entry_locked(map, addr); + ret = vm_map_remove_locked(map, + vm_map_trunc_page((vm_map_offset_t)addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page((vm_map_offset_t)addr + size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_REMOVE_KUNWIRE); + if (ret != KERN_SUCCESS) { + panic("vm_map_remove_locked() failed for kalloc vm_entry! addr: %p, map: %p ret: %d\n", + addr, map, ret); + } + vm_map_unlock(map); + + kalloc_spin_lock(); + kalloc_large_total -= size; + kalloc_large_inuse--; + kalloc_unlock(); + + KALLOC_ZINFO_SFREE(size); + return size; +} +#endif + void * kalloc_canblock( - vm_size_t size, - boolean_t canblock) + vm_size_t * psize, + boolean_t canblock, + vm_allocation_site_t * site) { zone_t z; + vm_size_t size; + void *addr; + vm_tag_t tag; + + tag = VM_KERN_MEMORY_KALLOC; + size = *psize; + +#if KASAN_KALLOC + /* expand the allocation to accomodate redzones */ + vm_size_t req_size = size; + size = kasan_alloc_resize(req_size); +#endif if (size < MAX_SIZE_ZDLUT) z = get_zone_dlut(size); @@ -499,21 +627,31 @@ kalloc_canblock( * krealloc can use kmem_realloc.) */ vm_map_t alloc_map; - void *addr; /* kmem_alloc could block so we return if noblock */ if (!canblock) { return(NULL); } +#if KASAN_KALLOC + /* large allocation - use guard pages instead of small redzones */ + size = round_page(req_size + 2 * PAGE_SIZE); + assert(size >= MAX_SIZE_ZDLUT && size >= kalloc_max_prerounded); +#endif + if (size >= kalloc_kernmap_size) alloc_map = kernel_map; else alloc_map = kalloc_map; - if (kmem_alloc(alloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) { + if (site) tag = vm_tag_alloc(site); + + if (kmem_alloc_flags(alloc_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS) { if (alloc_map != kernel_map) { - if (kmem_alloc(kernel_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) + if (kalloc_fallback_count++ == 0) { + printf("%s: falling back to kernel_map\n", __func__); + } + if (kmem_alloc_flags(kernel_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS) addr = NULL; } else @@ -540,6 +678,12 @@ kalloc_canblock( KALLOC_ZINFO_SALLOC(size); } +#if KASAN_KALLOC + /* fixup the return address to skip the redzone */ + addr = (void *)kasan_alloc((vm_offset_t)addr, size, req_size, PAGE_SIZE); +#else + *psize = round_page(size); +#endif return(addr); } #ifdef KALLOC_DEBUG @@ -547,22 +691,40 @@ kalloc_canblock( panic("%s: z %p (%s) but requested size %lu", __func__, z, z->zone_name, (unsigned long)size); #endif + assert(size <= z->elem_size); - return (zalloc_canblock(z, canblock)); -} -void * -kalloc( - vm_size_t size) -{ - return( kalloc_canblock(size, TRUE) ); +#if VM_MAX_TAG_ZONES + if (z->tags && site) + { + tag = vm_tag_alloc(site); + if (!canblock && !vm_allocation_zone_totals[tag]) tag = VM_KERN_MEMORY_KALLOC; + } +#endif + + addr = zalloc_canblock_tag(z, canblock, size, tag); + +#if KASAN_KALLOC + /* fixup the return address to skip the redzone */ + addr = (void *)kasan_alloc((vm_offset_t)addr, z->elem_size, req_size, KASAN_GUARD_SIZE); + + /* For KASan, the redzone lives in any additional space, so don't + * expand the allocation. */ +#else + *psize = z->elem_size; +#endif + + return addr; } void * -kalloc_noblock( - vm_size_t size) +kalloc_external( + vm_size_t size); +void * +kalloc_external( + vm_size_t size) { - return( kalloc_canblock(size, FALSE) ); + return( kalloc_tag_bt(size, VM_KERN_MEMORY_KALLOC) ); } volatile SInt32 kfree_nop_count = 0; @@ -574,6 +736,20 @@ kfree( { zone_t z; +#if KASAN_KALLOC + /* + * Resize back to the real allocation size and hand off to the KASan + * quarantine. `data` may then point to a different allocation. + */ + vm_size_t user_size = size; + kasan_check_free((vm_address_t)data, size, KASAN_HEAP_KALLOC); + data = (void *)kasan_dealloc((vm_address_t)data, &size); + kasan_free(&data, &size, KASAN_HEAP_KALLOC, NULL, user_size, true); + if (!data) { + return; + } +#endif + if (size < MAX_SIZE_ZDLUT) z = get_zone_dlut(size); else if (size < kalloc_max_prerounded) @@ -610,7 +786,6 @@ kfree( return; } kmem_free(alloc_map, (vm_offset_t)data, size); - kalloc_spin_lock(); kalloc_large_total -= size; @@ -645,35 +820,6 @@ kalloc_zone( } #endif -void -kalloc_fake_zone_init(int zone_index) -{ - kalloc_fake_zone_index = zone_index; -} - -void -kalloc_fake_zone_info(int *count, - vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size, - uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct) -{ - *count = kalloc_large_inuse; - *cur_size = kalloc_large_total; - *max_size = kalloc_large_max; - - if (kalloc_large_inuse) { - *elem_size = kalloc_large_total / kalloc_large_inuse; - *alloc_size = kalloc_large_total / kalloc_large_inuse; - } else { - *elem_size = 0; - *alloc_size = 0; - } - *sum_size = kalloc_large_sum; - *collectable = 0; - *exhaustable = 0; - *caller_acct = 0; -} - - void OSMalloc_init( void) @@ -700,7 +846,7 @@ OSMalloc_Tagalloc( OSMTag->OSMT_refcnt = 1; - strncpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME); + strlcpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME); OSMalloc_tag_spin_lock(); enqueue_tail(&OSMalloc_tag_list, (queue_entry_t)OSMTag); @@ -763,11 +909,10 @@ OSMalloc( OSMalloc_Tagref(tag); if ((tag->OSMT_attr & OSMT_PAGEABLE) && (size & ~PAGE_MASK)) { - - if ((kr = kmem_alloc_pageable(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS) + if ((kr = kmem_alloc_pageable_external(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS) addr = NULL; } else - addr = kalloc((vm_size_t)size); + addr = kalloc_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); if (!addr) OSMalloc_Tagrele(tag); @@ -787,7 +932,7 @@ OSMalloc_nowait( OSMalloc_Tagref(tag); /* XXX: use non-blocking kalloc for now */ - addr = kalloc_noblock((vm_size_t)size); + addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); if (addr == NULL) OSMalloc_Tagrele(tag); @@ -805,7 +950,7 @@ OSMalloc_noblock( return(NULL); OSMalloc_Tagref(tag); - addr = kalloc_noblock((vm_size_t)size); + addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC); if (addr == NULL) OSMalloc_Tagrele(tag); @@ -826,3 +971,11 @@ OSFree( OSMalloc_Tagrele(tag); } + +uint32_t +OSMalloc_size( + void *addr) +{ + return (uint32_t)kalloc_size(addr); +} +