X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/9bccf70c0258c7cac2dcb80011b2a964d884c552..c0fea4742e91338fffdcf79f86a7c1d5e2b97eb1:/osfmk/kern/zalloc.c diff --git a/osfmk/kern/zalloc.c b/osfmk/kern/zalloc.c index 66e36c016..0661d36a3 100644 --- a/osfmk/kern/zalloc.c +++ b/osfmk/kern/zalloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,8 +59,17 @@ #include #include #include -#include + +#include +#include +#include +#include +#include +#include + +#include #include +#include #include #include #include @@ -68,10 +77,20 @@ #include #include #include -#include +#include + +#include +#include #include +#include + #include +#if defined(__ppc__) +/* for fake zone stat routines */ +#include +#include +#endif #if MACH_ASSERT /* Detect use of zone elt after freeing it by two methods: @@ -83,12 +102,12 @@ #if defined(__alpha) #define is_kernel_data_addr(a) \ - (!(a) || IS_SYS_VA(a) && !((a) & (sizeof(long)-1))) + (!(a) || (IS_SYS_VA(a) && !((a) & (sizeof(long)-1)))) #else /* !defined(__alpha) */ #define is_kernel_data_addr(a) \ - (!(a) || (a) >= VM_MIN_KERNEL_ADDRESS && !((a) & 0x3)) + (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3))) #endif /* defined(__alpha) */ @@ -101,7 +120,7 @@ boolean_t zfree_clear = FALSE; #define ADD_TO_ZONE(zone, element) \ MACRO_BEGIN \ if (zfree_clear) \ - { int i; \ + { unsigned int i; \ for (i=1; \ i < zone->elem_size/sizeof(vm_offset_t) - 1; \ i++) \ @@ -145,6 +164,8 @@ MACRO_END #if ZONE_DEBUG #define zone_debug_enabled(z) z->active_zones.next +#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) +#define ZONE_DEBUG_OFFSET ROUNDUP(sizeof(queue_chain_t),16) #endif /* ZONE_DEBUG */ /* @@ -152,19 +173,11 @@ MACRO_END */ struct zone_page_table_entry { - struct zone_page_table_entry *next; - short in_free_list; + struct zone_page_table_entry *link; short alloc_count; + short collect_count; }; -extern struct zone_page_table_entry * zone_page_table; - -#define lock_zone_page_table() simple_lock(&zone_page_table_lock) -#define unlock_zone_page_table() simple_unlock(&zone_page_table_lock) - -#define zone_page(addr) \ - (&(zone_page_table[(atop(((vm_offset_t)addr) - zone_map_min_address))])) - /* Forwards */ void zone_page_init( vm_offset_t addr, @@ -175,19 +188,12 @@ void zone_page_alloc( vm_offset_t addr, vm_size_t size); -void zone_add_free_page_list( - struct zone_page_table_entry **free_list, - vm_offset_t addr, - vm_size_t size); -void zone_page_dealloc( +void zone_page_free_element( + struct zone_page_table_entry **free_pages, vm_offset_t addr, vm_size_t size); -void zone_page_in_use( - vm_offset_t addr, - vm_size_t size); - -void zone_page_free( +void zone_page_collect( vm_offset_t addr, vm_size_t size); @@ -224,26 +230,26 @@ vm_size_t zdata_size; #define lock_zone(zone) \ MACRO_BEGIN \ - simple_lock(&(zone)->lock); \ + mutex_lock(&(zone)->lock); \ MACRO_END #define unlock_zone(zone) \ MACRO_BEGIN \ - simple_unlock(&(zone)->lock); \ + mutex_unlock(&(zone)->lock); \ MACRO_END #define zone_wakeup(zone) thread_wakeup((event_t)(zone)) #define zone_sleep(zone) \ - thread_sleep_simple_lock((event_t)(zone), \ + thread_sleep_mutex((event_t)(zone), \ &(zone)->lock, \ THREAD_UNINT) #define lock_zone_init(zone) \ MACRO_BEGIN \ - simple_lock_init(&zone->lock, ETAP_MISC_ZONE); \ + mutex_init(&zone->lock, 0); \ MACRO_END -#define lock_try_zone(zone) simple_lock_try(&zone->lock) +#define lock_try_zone(zone) mutex_try(&zone->lock) kern_return_t zget_space( vm_offset_t size, @@ -257,20 +263,19 @@ vm_size_t zalloc_wasted_space; /* * Garbage collection map information */ -decl_simple_lock_data(, zone_page_table_lock) struct zone_page_table_entry * zone_page_table; vm_offset_t zone_map_min_address; vm_offset_t zone_map_max_address; -integer_t zone_pages; +unsigned int zone_pages; /* * Exclude more than one concurrent garbage collection */ decl_mutex_data(, zone_gc_lock) -#define from_zone_map(addr) \ +#define from_zone_map(addr, size) \ ((vm_offset_t)(addr) >= zone_map_min_address && \ - (vm_offset_t)(addr) < zone_map_max_address) + ((vm_offset_t)(addr) + size -1) < zone_map_max_address) #define ZONE_PAGE_USED 0 #define ZONE_PAGE_UNUSED -1 @@ -283,7 +288,7 @@ decl_mutex_data(, zone_gc_lock) decl_simple_lock_data(, all_zones_lock) zone_t first_zone; zone_t *last_zone; -int num_zones; +unsigned int num_zones; boolean_t zone_gc_allowed = TRUE; boolean_t zone_gc_forced = FALSE; @@ -301,7 +306,7 @@ zinit( vm_size_t size, /* the size of an element */ vm_size_t max, /* maximum memory to use */ vm_size_t alloc, /* allocation size */ - char *name) /* a name for the zone */ + const char *name) /* a name for the zone */ { zone_t z; @@ -326,15 +331,26 @@ zinit( alloc = round_page(alloc); max = round_page(max); /* - * We look for an allocation size with least fragmentation - * in the range of 1 - 5 pages. This size will be used unless + * we look for an allocation size with less than 1% waste + * up to 5 pages in size... + * otherwise, we look for an allocation size with least fragmentation + * in the range of 1 - 5 pages + * This size will be used unless * the user suggestion is larger AND has less fragmentation */ { vm_size_t best, waste; unsigned int i; best = PAGE_SIZE; waste = best % size; - for (i = 2; i <= 5; i++){ vm_size_t tsize, twaste; - tsize = i * PAGE_SIZE; + + for (i = 1; i <= 5; i++) { + vm_size_t tsize, twaste; + + tsize = i * PAGE_SIZE; + + if ((tsize % size) < (tsize / 100)) { + alloc = tsize; + goto use_this_allocation; + } twaste = tsize % size; if (twaste < waste) best = tsize, waste = twaste; @@ -342,6 +358,7 @@ zinit( if (alloc <= best || (alloc % size >= waste)) alloc = best; } +use_this_allocation: if (max && (max < alloc)) max = alloc; @@ -353,6 +370,7 @@ zinit( z->zone_name = name; z->count = 0; z->doing_alloc = FALSE; + z->doing_gc = FALSE; z->exhaustible = FALSE; z->collectable = TRUE; z->allows_foreign = FALSE; @@ -387,22 +405,23 @@ zinit( void zcram( register zone_t zone, - vm_offset_t newmem, + void *newaddr, vm_size_t size) { register vm_size_t elem_size; + vm_offset_t newmem = (vm_offset_t) newaddr; /* Basic sanity checks */ assert(zone != ZONE_NULL && newmem != (vm_offset_t)0); assert(!zone->collectable || zone->allows_foreign - || (from_zone_map(newmem) && from_zone_map(newmem+size-1))); + || (from_zone_map(newmem, size))); elem_size = zone->elem_size; lock_zone(zone); while (size >= elem_size) { ADD_TO_ZONE(zone, newmem); - if (from_zone_map(newmem)) + if (from_zone_map(newmem, elem_size)) zone_page_alloc(newmem, elem_size); zone->count++; /* compensate for ADD_TO_ZONE */ size -= elem_size; @@ -423,7 +442,7 @@ zget_space( vm_offset_t *result) { vm_offset_t new_space = 0; - vm_size_t space_to_add; + vm_size_t space_to_add = 0; simple_lock(&zget_space_lock); while ((zalloc_next_space + size) > zalloc_end_of_space) { @@ -501,7 +520,7 @@ void zone_steal_memory(void) { zdata_size = round_page(128*sizeof(struct zone)); - zdata = pmap_steal_memory(zdata_size); + zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0); } @@ -532,7 +551,7 @@ zfill( return 0; zone_change(zone, Z_FOREIGN, TRUE); - zcram(zone, memory, size); + zcram(zone, (void *)memory, size); nalloc = size / zone->elem_size; assert(nalloc >= nelem); @@ -550,13 +569,13 @@ zone_bootstrap(void) vm_size_t zone_zone_size; vm_offset_t zone_zone_space; - simple_lock_init(&all_zones_lock, ETAP_MISC_ZONE_ALL); + simple_lock_init(&all_zones_lock, 0); first_zone = ZONE_NULL; last_zone = &first_zone; num_zones = 0; - simple_lock_init(&zget_space_lock, ETAP_MISC_ZONE_GET); + simple_lock_init(&zget_space_lock, 0); zalloc_next_space = zdata; zalloc_end_of_space = zdata + zdata_size; zalloc_wasted_space = 0; @@ -568,7 +587,7 @@ zone_bootstrap(void) zone_change(zone_zone, Z_COLLECT, FALSE); zone_zone_size = zalloc_end_of_space - zalloc_next_space; zget_space(zone_zone_size, &zone_zone_space); - zcram(zone_zone, zone_zone_space, zone_zone_size); + zcram(zone_zone, (void *)zone_zone_space, zone_zone_size); } void @@ -581,24 +600,24 @@ zone_init( vm_size_t zone_table_size; retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size, - FALSE, TRUE, &zone_map); + FALSE, VM_FLAGS_ANYWHERE, &zone_map); + if (retval != KERN_SUCCESS) panic("zone_init: kmem_suballoc failed"); zone_max = zone_min + round_page(max_zonemap_size); /* * Setup garbage collection information: */ - zone_table_size = atop(zone_max - zone_min) * + zone_table_size = atop_32(zone_max - zone_min) * sizeof(struct zone_page_table_entry); if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table, zone_table_size) != KERN_SUCCESS) panic("zone_init"); zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size); - zone_pages = atop(zone_max - zone_min); + zone_pages = atop_32(zone_max - zone_min); zone_map_min_address = zone_min; zone_map_max_address = zone_max; - simple_lock_init(&zone_page_table_lock, ETAP_MISC_ZONE_PTABLE); - mutex_init(&zone_gc_lock, ETAP_NO_TRACE); + mutex_init(&zone_gc_lock, 0); zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED); } @@ -606,7 +625,7 @@ zone_init( /* * zalloc returns an element from the specified zone. */ -vm_offset_t +void * zalloc_canblock( register zone_t zone, boolean_t canblock) @@ -615,12 +634,17 @@ zalloc_canblock( kern_return_t retval; assert(zone != ZONE_NULL); - check_simple_locks(); lock_zone(zone); REMOVE_FROM_ZONE(zone, addr, vm_offset_t); + while ((addr == 0) && canblock && (zone->doing_gc)) { + zone->waiting = TRUE; + zone_sleep(zone); + REMOVE_FROM_ZONE(zone, addr, vm_offset_t); + } + while ((addr == 0) && canblock) { /* * If nothing was there, try to get more @@ -662,24 +686,33 @@ zalloc_canblock( if (zone->collectable) { vm_offset_t space; vm_size_t alloc_size; - - if (vm_pool_low()) - alloc_size = - round_page(zone->elem_size); - else - alloc_size = zone->alloc_size; - - retval = kernel_memory_allocate(zone_map, - &space, alloc_size, 0, - KMA_KOBJECT|KMA_NOPAGEWAIT); - if (retval == KERN_SUCCESS) { - zone_page_init(space, alloc_size, - ZONE_PAGE_USED); - zcram(zone, space, alloc_size); - } else if (retval != KERN_RESOURCE_SHORTAGE) { - /* would like to cause a zone_gc() */ - - panic("zalloc"); + boolean_t retry = FALSE; + + for (;;) { + + if (vm_pool_low() || retry == TRUE) + alloc_size = + round_page(zone->elem_size); + else + alloc_size = zone->alloc_size; + + retval = kernel_memory_allocate(zone_map, + &space, alloc_size, 0, + KMA_KOBJECT|KMA_NOPAGEWAIT); + if (retval == KERN_SUCCESS) { + zone_page_init(space, alloc_size, + ZONE_PAGE_USED); + zcram(zone, (void *)space, alloc_size); + + break; + } else if (retval != KERN_RESOURCE_SHORTAGE) { + /* would like to cause a zone_gc() */ + if (retry == TRUE) + panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval); + retry = TRUE; + } else { + break; + } } lock_zone(zone); zone->doing_alloc = FALSE; @@ -717,9 +750,9 @@ zalloc_canblock( zone_page_alloc(space, zone->elem_size); #if ZONE_DEBUG if (zone_debug_enabled(zone)) - space += sizeof(queue_chain_t); + space += ZONE_DEBUG_OFFSET; #endif - return(space); + return((void *)space); } if (retval == KERN_RESOURCE_SHORTAGE) { unlock_zone(zone); @@ -727,7 +760,7 @@ zalloc_canblock( VM_PAGE_WAIT(); lock_zone(zone); } else { - panic("zalloc"); + panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval); } } } @@ -746,24 +779,24 @@ zalloc_canblock( #if ZONE_DEBUG if (addr && zone_debug_enabled(zone)) { enqueue_tail(&zone->active_zones, (queue_entry_t)addr); - addr += sizeof(queue_chain_t); + addr += ZONE_DEBUG_OFFSET; } #endif unlock_zone(zone); - return(addr); + return((void *)addr); } -vm_offset_t +void * zalloc( register zone_t zone) { return( zalloc_canblock(zone, TRUE) ); } -vm_offset_t +void * zalloc_noblock( register zone_t zone) { @@ -772,10 +805,10 @@ zalloc_noblock( void zalloc_async( - thread_call_param_t p0, - thread_call_param_t p1) + thread_call_param_t p0, + __unused thread_call_param_t p1) { - vm_offset_t elt; + void *elt; elt = zalloc_canblock((zone_t)p0, TRUE); zfree((zone_t)p0, elt); @@ -792,7 +825,7 @@ zalloc_async( * This form should be used when you can not block (like when * processing an interrupt). */ -vm_offset_t +void * zget( register zone_t zone) { @@ -801,29 +834,33 @@ zget( assert( zone != ZONE_NULL ); if (!lock_try_zone(zone)) - return ((vm_offset_t)0); + return NULL; REMOVE_FROM_ZONE(zone, addr, vm_offset_t); #if ZONE_DEBUG if (addr && zone_debug_enabled(zone)) { enqueue_tail(&zone->active_zones, (queue_entry_t)addr); - addr += sizeof(queue_chain_t); + addr += ZONE_DEBUG_OFFSET; } #endif /* ZONE_DEBUG */ unlock_zone(zone); - return(addr); + return((void *) addr); } /* Keep this FALSE by default. Large memory machine run orders of magnitude slower in debug mode when true. Use debugger to enable if needed */ -boolean_t zone_check = FALSE; +/* static */ boolean_t zone_check = FALSE; + +static zone_t zone_last_bogus_zone = ZONE_NULL; +static vm_offset_t zone_last_bogus_elem = 0; void zfree( register zone_t zone, - vm_offset_t elem) + void *addr) { + vm_offset_t elem = (vm_offset_t) addr; #if MACH_ASSERT /* Basic sanity checks */ @@ -832,17 +869,24 @@ zfree( /* zone_gc assumes zones are never freed */ if (zone == zone_zone) panic("zfree: freeing to zone_zone breaks zone_gc!"); +#endif + if (zone->collectable && !zone->allows_foreign && - (!from_zone_map(elem) || !from_zone_map(elem+zone->elem_size-1))) + !from_zone_map(elem, zone->elem_size)) { +#if MACH_ASSERT panic("zfree: non-allocated memory in collectable zone!"); #endif + zone_last_bogus_zone = zone; + zone_last_bogus_elem = elem; + return; + } lock_zone(zone); #if ZONE_DEBUG if (zone_debug_enabled(zone)) { queue_t tmp_elem; - elem -= sizeof(queue_chain_t); + elem -= ZONE_DEBUG_OFFSET; if (zone_check) { /* check the zone's consistency */ @@ -953,68 +997,34 @@ zprealloc( if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS) panic("zprealloc"); zone_page_init(addr, size, ZONE_PAGE_USED); - zcram(zone, addr, size); + zcram(zone, (void *)addr, size); } } /* * Zone garbage collection subroutines - * - * These routines have in common the modification of entries in the - * zone_page_table. The latter contains one entry for every page - * in the zone_map. - * - * For each page table entry in the given range: - * - * zone_page_collectable - test if one (in_free_list == alloc_count) - * zone_page_keep - reset in_free_list - * zone_page_in_use - decrements in_free_list - * zone_page_free - increments in_free_list - * zone_page_init - initializes in_free_list and alloc_count - * zone_page_alloc - increments alloc_count - * zone_page_dealloc - decrements alloc_count - * zone_add_free_page_list - adds the page to the free list - * - * Two counts are maintained for each page, the in_free_list count and - * alloc_count. The alloc_count is how many zone elements have been - * allocated from a page. (Note that the page could contain elements - * that span page boundaries. The count includes these elements so - * one element may be counted in two pages.) In_free_list is a count - * of how many zone elements are currently free. If in_free_list is - * equal to alloc_count then the page is eligible for garbage - * collection. - * - * Alloc_count and in_free_list are initialized to the correct values - * for a particular zone when a page is zcram'ed into a zone. Subsequent - * gets and frees of zone elements will call zone_page_in_use and - * zone_page_free which modify the in_free_list count. When the zones - * garbage collector runs it will walk through a zones free element list, - * remove the elements that reside on collectable pages, and use - * zone_add_free_page_list to create a list of pages to be collected. */ + boolean_t zone_page_collectable( vm_offset_t addr, vm_size_t size) { + struct zone_page_table_entry *zp; natural_t i, j; #if MACH_ASSERT - if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) + if (!from_zone_map(addr, size)) panic("zone_page_collectable"); #endif - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - if (zone_page_table[i].in_free_list == - zone_page_table[i].alloc_count) { - unlock_zone_page_table(); + i = atop_32(addr-zone_map_min_address); + j = atop_32((addr+size-1) - zone_map_min_address); + + for (zp = zone_page_table + i; i <= j; zp++, i++) + if (zp->collect_count == zp->alloc_count) return (TRUE); - } - } - unlock_zone_page_table(); + return (FALSE); } @@ -1023,64 +1033,39 @@ zone_page_keep( vm_offset_t addr, vm_size_t size) { + struct zone_page_table_entry *zp; natural_t i, j; #if MACH_ASSERT - if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) + if (!from_zone_map(addr, size)) panic("zone_page_keep"); #endif - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - zone_page_table[i].in_free_list = 0; - } - unlock_zone_page_table(); -} - -void -zone_page_in_use( - vm_offset_t addr, - vm_size_t size) -{ - natural_t i, j; - -#if MACH_ASSERT - if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) - panic("zone_page_in_use"); -#endif + i = atop_32(addr-zone_map_min_address); + j = atop_32((addr+size-1) - zone_map_min_address); - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - if (zone_page_table[i].in_free_list > 0) - zone_page_table[i].in_free_list--; - } - unlock_zone_page_table(); + for (zp = zone_page_table + i; i <= j; zp++, i++) + zp->collect_count = 0; } void -zone_page_free( +zone_page_collect( vm_offset_t addr, vm_size_t size) { + struct zone_page_table_entry *zp; natural_t i, j; #if MACH_ASSERT - if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) - panic("zone_page_free"); + if (!from_zone_map(addr, size)) + panic("zone_page_collect"); #endif - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - assert(zone_page_table[i].in_free_list >= 0); - zone_page_table[i].in_free_list++; - } - unlock_zone_page_table(); + i = atop_32(addr-zone_map_min_address); + j = atop_32((addr+size-1) - zone_map_min_address); + + for (zp = zone_page_table + i; i <= j; zp++, i++) + ++zp->collect_count; } void @@ -1089,21 +1074,21 @@ zone_page_init( vm_size_t size, int value) { + struct zone_page_table_entry *zp; natural_t i, j; #if MACH_ASSERT - if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) + if (!from_zone_map(addr, size)) panic("zone_page_init"); #endif - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - zone_page_table[i].alloc_count = value; - zone_page_table[i].in_free_list = 0; + i = atop_32(addr-zone_map_min_address); + j = atop_32((addr+size-1) - zone_map_min_address); + + for (zp = zone_page_table + i; i <= j; zp++, i++) { + zp->alloc_count = value; + zp->collect_count = 0; } - unlock_zone_page_table(); } void @@ -1111,85 +1096,73 @@ zone_page_alloc( vm_offset_t addr, vm_size_t size) { + struct zone_page_table_entry *zp; natural_t i, j; #if MACH_ASSERT - if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) + if (!from_zone_map(addr, size)) panic("zone_page_alloc"); #endif - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - /* Set alloc_count to (ZONE_PAGE_USED + 1) if + i = atop_32(addr-zone_map_min_address); + j = atop_32((addr+size-1) - zone_map_min_address); + + for (zp = zone_page_table + i; i <= j; zp++, i++) { + /* + * Set alloc_count to (ZONE_PAGE_USED + 1) if * it was previously set to ZONE_PAGE_UNUSED. */ - if (zone_page_table[i].alloc_count == ZONE_PAGE_UNUSED) { - zone_page_table[i].alloc_count = 1; - } else { - zone_page_table[i].alloc_count++; - } + if (zp->alloc_count == ZONE_PAGE_UNUSED) + zp->alloc_count = 1; + else + ++zp->alloc_count; } - unlock_zone_page_table(); } void -zone_page_dealloc( +zone_page_free_element( + struct zone_page_table_entry **free_pages, vm_offset_t addr, vm_size_t size) { + struct zone_page_table_entry *zp; natural_t i, j; #if MACH_ASSERT - if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) - panic("zone_page_dealloc"); + if (!from_zone_map(addr, size)) + panic("zone_page_free_element"); #endif - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - zone_page_table[i].alloc_count--; - } - unlock_zone_page_table(); -} + i = atop_32(addr-zone_map_min_address); + j = atop_32((addr+size-1) - zone_map_min_address); -void -zone_add_free_page_list( - struct zone_page_table_entry **free_list, - vm_offset_t addr, - vm_size_t size) -{ - natural_t i, j; + for (zp = zone_page_table + i; i <= j; zp++, i++) { + if (zp->collect_count > 0) + --zp->collect_count; + if (--zp->alloc_count == 0) { + zp->alloc_count = ZONE_PAGE_UNUSED; + zp->collect_count = 0; -#if MACH_ASSERT - if (!from_zone_map(addr) || !from_zone_map(addr+size-1)) - panic("zone_add_free_page_list"); -#endif - - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - if (zone_page_table[i].alloc_count == 0) { - zone_page_table[i].next = *free_list; - *free_list = &zone_page_table[i]; - zone_page_table[i].alloc_count = ZONE_PAGE_UNUSED; - zone_page_table[i].in_free_list = 0; + zp->link = *free_pages; + *free_pages = zp; } } - unlock_zone_page_table(); } /* This is used for walking through a zone's free element list. */ -struct zone_free_entry { - struct zone_free_entry * next; +struct zone_free_element { + struct zone_free_element * next; }; -int reclaim_page_count = 0; +struct { + uint32_t pgs_freed; + + uint32_t elems_collected, + elems_freed, + elems_kept; +} zgc_stats; /* Zone garbage collection * @@ -1202,35 +1175,28 @@ void zone_gc(void) { unsigned int max_zones; - zone_t z; + zone_t z; unsigned int i; - struct zone_page_table_entry *freep; - struct zone_page_table_entry *zone_free_page_list; + struct zone_page_table_entry *zp, *zone_free_pages; mutex_lock(&zone_gc_lock); - /* - * Note that this scheme of locking only to walk the zone list - * assumes that zones are never freed (checked by zfree) - */ simple_lock(&all_zones_lock); max_zones = num_zones; z = first_zone; simple_unlock(&all_zones_lock); #if MACH_ASSERT - lock_zone_page_table(); for (i = 0; i < zone_pages; i++) - assert(zone_page_table[i].in_free_list == 0); - unlock_zone_page_table(); + assert(zone_page_table[i].collect_count == 0); #endif /* MACH_ASSERT */ - zone_free_page_list = (struct zone_page_table_entry *) 0; + zone_free_pages = NULL; for (i = 0; i < max_zones; i++, z = z->next_zone) { - struct zone_free_entry * prev; - struct zone_free_entry * elt; - struct zone_free_entry * end; + unsigned int n, m; + vm_size_t elt_size, size_freed; + struct zone_free_element *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail; assert(z != ZONE_NULL); @@ -1239,82 +1205,213 @@ zone_gc(void) lock_zone(z); + elt_size = z->elem_size; + /* * Do a quick feasability check before we scan the zone: - * skip unless there is likelihood of getting 1+ pages back. + * skip unless there is likelihood of getting pages back + * (i.e we need a whole allocation block's worth of free + * elements before we can garbage collect) and + * the zone has more than 10 percent of it's elements free */ - if ((z->cur_size - z->count * z->elem_size) <= (2*PAGE_SIZE)){ + if (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) || + ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10))) { unlock_zone(z); continue; } - /* Count the free elements in each page. This loop - * requires that all in_free_list entries are zero. - * - * Exit the loop early if we need to hurry up and drop - * the lock to allow preemption - but we must fully process - * all elements we looked at so far. + z->doing_gc = TRUE; + + /* + * Snatch all of the free elements away from the zone. */ - elt = (struct zone_free_entry *)(z->free_elements); - while (!ast_urgency() && (elt != (struct zone_free_entry *)0)) { - if (from_zone_map(elt)) - zone_page_free((vm_offset_t)elt, z->elem_size); - elt = elt->next; - } - end = elt; - /* Now determine which elements should be removed - * from the free list and, after all the elements - * on a page have been removed, add the element's - * page to a list of pages to be freed. + scan = (void *)z->free_elements; + z->free_elements = 0; + + unlock_zone(z); + + /* + * Pass 1: + * + * Determine which elements we can attempt to collect + * and count them up in the page table. Foreign elements + * are returned to the zone. */ - prev = elt = (struct zone_free_entry *)(z->free_elements); - while (elt != end) { - if (!from_zone_map(elt)) { + + prev = (void *)&scan; + elt = scan; + n = 0; tail = keep = NULL; + while (elt != NULL) { + if (from_zone_map(elt, elt_size)) { + zone_page_collect((vm_offset_t)elt, elt_size); + prev = elt; elt = elt->next; - continue; + + ++zgc_stats.elems_collected; } - if (zone_page_collectable((vm_offset_t)elt, - z->elem_size)) { - z->cur_size -= z->elem_size; - zone_page_in_use((vm_offset_t)elt, - z->elem_size); - zone_page_dealloc((vm_offset_t)elt, - z->elem_size); - zone_add_free_page_list(&zone_free_page_list, - (vm_offset_t)elt, - z->elem_size); - if (elt == prev) { - elt = elt->next; - z->free_elements =(vm_offset_t)elt; - prev = elt; - } else { - prev->next = elt->next; - elt = elt->next; + else { + if (keep == NULL) + keep = tail = elt; + else + tail = tail->next = elt; + + elt = prev->next = elt->next; + tail->next = NULL; + } + + /* + * Dribble back the elements we are keeping. + */ + + if (++n >= 50) { + if (z->waiting == TRUE) { + lock_zone(z); + + if (keep != NULL) { + tail->next = (void *)z->free_elements; + z->free_elements = (vm_offset_t) keep; + tail = keep = NULL; + } else { + m =0; + base_elt = elt; + base_prev = prev; + while ((elt != NULL) && (++m < 50)) { + prev = elt; + elt = elt->next; + } + if (m !=0 ) { + prev->next = (void *)z->free_elements; + z->free_elements = (vm_offset_t) base_elt; + base_prev->next = elt; + prev = base_prev; + } + } + + if (z->waiting) { + z->waiting = FALSE; + zone_wakeup(z); + } + + unlock_zone(z); } - } else { - /* This element is not eligible for collection - * so clear in_free_list in preparation for a - * subsequent garbage collection pass. - */ - zone_page_keep((vm_offset_t)elt, z->elem_size); - prev = elt; - elt = elt->next; + n =0; } - } /* end while(elt != end) */ + } + + /* + * Return any remaining elements. + */ + + if (keep != NULL) { + lock_zone(z); + + tail->next = (void *)z->free_elements; + z->free_elements = (vm_offset_t) keep; + + unlock_zone(z); + } + + /* + * Pass 2: + * + * Determine which pages we can reclaim and + * free those elements. + */ + + size_freed = 0; + prev = (void *)&scan; + elt = scan; + n = 0; tail = keep = NULL; + while (elt != NULL) { + if (zone_page_collectable((vm_offset_t)elt, elt_size)) { + size_freed += elt_size; + zone_page_free_element(&zone_free_pages, + (vm_offset_t)elt, elt_size); + + elt = prev->next = elt->next; + ++zgc_stats.elems_freed; + } + else { + zone_page_keep((vm_offset_t)elt, elt_size); + + if (keep == NULL) + keep = tail = elt; + else + tail = tail->next = elt; + + elt = prev->next = elt->next; + tail->next = NULL; + + ++zgc_stats.elems_kept; + } + + /* + * Dribble back the elements we are keeping, + * and update the zone size info. + */ + + if (++n >= 50) { + lock_zone(z); + + z->cur_size -= size_freed; + size_freed = 0; + + if (keep != NULL) { + tail->next = (void *)z->free_elements; + z->free_elements = (vm_offset_t) keep; + } + + if (z->waiting) { + z->waiting = FALSE; + zone_wakeup(z); + } + + unlock_zone(z); + + n = 0; tail = keep = NULL; + } + } + + /* + * Return any remaining elements, and update + * the zone size info. + */ + + lock_zone(z); + + if (size_freed > 0 || keep != NULL) { + + z->cur_size -= size_freed; + + if (keep != NULL) { + tail->next = (void *)z->free_elements; + z->free_elements = (vm_offset_t) keep; + } + + } + + z->doing_gc = FALSE; + if (z->waiting) { + z->waiting = FALSE; + zone_wakeup(z); + } unlock_zone(z); } - for (freep = zone_free_page_list; freep != 0; freep = freep->next) { - vm_offset_t free_addr; + /* + * Reclaim the pages we are freeing. + */ - free_addr = zone_map_min_address + - PAGE_SIZE * (freep - zone_page_table); - kmem_free(zone_map, free_addr, PAGE_SIZE); - reclaim_page_count++; + while ((zp = zone_free_pages) != NULL) { + zone_free_pages = zp->link; + kmem_free(zone_map, zone_map_min_address + PAGE_SIZE * + (zp - zone_page_table), PAGE_SIZE); + ++zgc_stats.pgs_freed; } + mutex_unlock(&zone_gc_lock); } @@ -1329,11 +1426,11 @@ consider_zone_gc(void) { /* * By default, don't attempt zone GC more frequently - * than once a second. + * than once / 1 minutes. */ if (zone_gc_max_rate == 0) - zone_gc_max_rate = (1 << SCHED_TICK_SHIFT) + 1; + zone_gc_max_rate = (60 << SCHED_TICK_SHIFT) + 1; if (zone_gc_allowed && ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) || @@ -1344,14 +1441,6 @@ consider_zone_gc(void) } } -#include -#include -#include -#include -#include -#include - -#include kern_return_t host_zone_info( @@ -1385,14 +1474,14 @@ host_zone_info( #ifdef ppc max_zones = num_zones + 4; #else - max_zones = num_zones + 2; + max_zones = num_zones + 3; /* ATN: count the number below!! */ #endif z = first_zone; simple_unlock(&all_zones_lock); if (max_zones <= *namesCntp) { /* use in-line memory */ - + names_size = *namesCntp * sizeof *names; names = *namesp; } else { names_size = round_page(max_zones * sizeof *names); @@ -1405,7 +1494,7 @@ host_zone_info( if (max_zones <= *infoCntp) { /* use in-line memory */ - + info_size = *infoCntp * sizeof *info; info = *infop; } else { info_size = round_page(max_zones * sizeof *info); @@ -1469,6 +1558,15 @@ host_zone_info( zn++; zi++; #endif + +#ifdef i386 + strcpy(zn->zn_name, "page_tables"); + pt_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size, + &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible); + zn++; + zi++; +#endif + strcpy(zn->zn_name, "kalloc.large"); kalloc_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size, &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible); @@ -1482,8 +1580,8 @@ host_zone_info( if (used != names_size) bzero((char *) (names_addr + used), names_size - used); - kr = vm_map_copyin(ipc_kernel_map, names_addr, names_size, - TRUE, ©); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr, + (vm_map_size_t)names_size, TRUE, ©); assert(kr == KERN_SUCCESS); *namesp = (zone_name_t *) copy; @@ -1499,8 +1597,8 @@ host_zone_info( if (used != info_size) bzero((char *) (info_addr + used), info_size - used); - kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size, - TRUE, ©); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr, + (vm_map_size_t)info_size, TRUE, ©); assert(kr == KERN_SUCCESS); *infop = (zone_info_t *) copy; @@ -1554,12 +1652,12 @@ db_print_zone( /*ARGSUSED*/ void db_show_one_zone( - db_expr_t addr, - int have_addr, - db_expr_t count, - char * modif) + db_expr_t addr, + int have_addr, + __unused db_expr_t count, + __unused char * modif) { - struct zone *z = (zone_t)addr; + struct zone *z = (zone_t)((char *)0 + addr); if (z == ZONE_NULL || !have_addr){ db_error("No Zone\n"); @@ -1573,10 +1671,10 @@ db_show_one_zone( /*ARGSUSED*/ void db_show_all_zones( - db_expr_t addr, - int have_addr, - db_expr_t count, - char * modif) + __unused db_expr_t addr, + int have_addr, + db_expr_t count, + __unused char * modif) { zone_t z; unsigned total = 0; @@ -1608,8 +1706,7 @@ db_show_all_zones( } } db_printf("\nTotal %8x", total); - db_printf("\n\nzone_gc() has reclaimed %d pages\n", - reclaim_page_count); + db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed); } #if ZONE_DEBUG @@ -1724,33 +1821,35 @@ db_zone_print_free( /* should we care about locks here ? */ #if MACH_KDB -vm_offset_t +void * next_element( zone_t z, - vm_offset_t elt) + void *prev) { + char *elt = (char *)prev; + if (!zone_debug_enabled(z)) return(0); - elt -= sizeof(queue_chain_t); - elt = (vm_offset_t) queue_next((queue_t) elt); + elt -= ZONE_DEBUG_OFFSET; + elt = (char *) queue_next((queue_t) elt); if ((queue_t) elt == &z->active_zones) return(0); - elt += sizeof(queue_chain_t); + elt += ZONE_DEBUG_OFFSET; return(elt); } -vm_offset_t +void * first_element( zone_t z) { - vm_offset_t elt; + char *elt; if (!zone_debug_enabled(z)) return(0); if (queue_empty(&z->active_zones)) return(0); - elt = (vm_offset_t) queue_first(&z->active_zones); - elt += sizeof(queue_chain_t); + elt = (char *)queue_first(&z->active_zones); + elt += ZONE_DEBUG_OFFSET; return(elt); } @@ -1765,7 +1864,7 @@ zone_count( zone_t z, int tail) { - vm_offset_t elt; + void *elt; int count = 0; boolean_t print = (tail != 0); @@ -1791,10 +1890,10 @@ zone_debug_enable( zone_t z) { if (zone_debug_enabled(z) || zone_in_use(z) || - z->alloc_size < (z->elem_size + sizeof(queue_chain_t))) + z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET)) return; queue_init(&z->active_zones); - z->elem_size += sizeof(queue_chain_t); + z->elem_size += ZONE_DEBUG_OFFSET; } void @@ -1803,7 +1902,7 @@ zone_debug_disable( { if (!zone_debug_enabled(z) || zone_in_use(z)) return; - z->elem_size -= sizeof(queue_chain_t); + z->elem_size -= ZONE_DEBUG_OFFSET; z->active_zones.next = z->active_zones.prev = 0; } #endif /* ZONE_DEBUG */