- /*
- * Pass 1:
- *
- * Determine which elements we can attempt to collect
- * and count them up in the page table. Foreign elements
- * are returned to the zone.
- */
-
- prev = (void *)&scan;
- elt = scan;
- n = 0; tail = keep = NULL;
-
- zone_free_page_head = ZONE_PAGE_INDEX_INVALID;
- zone_free_page_tail = ZONE_PAGE_INDEX_INVALID;
-
-
- while (elt != NULL) {
- if (from_zone_map(elt, elt_size)) {
- zone_page_collect((vm_offset_t)elt, elt_size);
-
- prev = elt;
- elt = elt->next;
-
- ++zgc_stats.elems_collected;
- }
- else {
- if (keep == NULL)
- keep = tail = elt;
- else {
- append_zone_element(z, tail, elt);
- tail = elt;
- }
-
- append_zone_element(z, prev, elt->next);
- elt = elt->next;
- append_zone_element(z, tail, NULL);
- }
-
- /*
- * Dribble back the elements we are keeping.
- * If there are none, give some elements that we haven't looked at yet
- * back to the freelist so that others waiting on the zone don't get stuck
- * for too long. This might prevent us from recovering some memory,
- * but allows us to avoid having to allocate new memory to serve requests
- * while zone_gc has all the free memory tied up.
- * <rdar://problem/3893406>
- */
-
- if (++n >= 50) {
- if (z->waiting == TRUE) {
- /* z->waiting checked without lock held, rechecked below after locking */
- lock_zone(z);
-
- if (keep != NULL) {
- add_list_to_zone(z, keep, tail);
- tail = keep = NULL;
- } else {
- m =0;
- base_elt = elt;
- base_prev = prev;
- while ((elt != NULL) && (++m < 50)) {
- prev = elt;
- elt = elt->next;
- }
- if (m !=0 ) {
- /* Extract the elements from the list and
- * give them back */
- append_zone_element(z, prev, NULL);
- add_list_to_zone(z, base_elt, prev);
- append_zone_element(z, base_prev, elt);
- prev = base_prev;
- }
- }
-
- if (z->waiting) {
- z->waiting = FALSE;
- zone_wakeup(z);
- }
-
- unlock_zone(z);
- }
- n =0;
- }
- }
-
- /*
- * Return any remaining elements.
- */
-
- if (keep != NULL) {
- lock_zone(z);
-
- add_list_to_zone(z, keep, tail);
-
- if (z->waiting) {
- z->waiting = FALSE;
- zone_wakeup(z);
- }
-
- unlock_zone(z);
- }
-
- /*
- * Pass 2:
- *
- * Determine which pages we can reclaim and
- * free those elements.
- */
-
- size_freed = 0;
- elt = scan;
- n = 0; tail = keep = NULL;
-
- while (elt != NULL) {
- if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
- struct zone_free_element *next_elt = elt->next;
-
- size_freed += elt_size;
-
- /*
- * If this is the last allocation on the page(s),
- * we may use their storage to maintain the linked
- * list of free-able pages. So store elt->next because
- * "elt" may be scribbled over.
- */
- zone_page_free_element(&zone_free_page_head, &zone_free_page_tail, (vm_offset_t)elt, elt_size);
-
- elt = next_elt;
-
- ++zgc_stats.elems_freed;
- }
- else {
- zone_page_keep((vm_offset_t)elt, elt_size);
-
- if (keep == NULL)
- keep = tail = elt;
- else {
- append_zone_element(z, tail, elt);
- tail = elt;
- }
-
- elt = elt->next;
- append_zone_element(z, tail, NULL);
-
- ++zgc_stats.elems_kept;
- }
-
- /*
- * Dribble back the elements we are keeping,
- * and update the zone size info.
- */
-
- if (++n >= 50) {
- lock_zone(z);
-
- z->cur_size -= size_freed;
- z->countfree -= size_freed/elt_size;
- size_freed = 0;
-
- if (keep != NULL) {
- add_list_to_zone(z, keep, tail);
- }
-
- if (z->waiting) {
- z->waiting = FALSE;
- zone_wakeup(z);
- }
-
- unlock_zone(z);
-
- n = 0; tail = keep = NULL;
- }
- }
-
- /*
- * Return any remaining elements, and update
- * the zone size info.
- */
-
- lock_zone(z);
-
- if (size_freed > 0 || keep != NULL) {
-
- z->cur_size -= size_freed;
- z->countfree -= size_freed/elt_size;
-
- if (keep != NULL) {
- add_list_to_zone(z, keep, tail);
- }
-
- }
-
- z->doing_gc = FALSE;
- if (z->waiting) {
- z->waiting = FALSE;
- zone_wakeup(z);
- }
- unlock_zone(z);
-
- if (zone_free_page_head == ZONE_PAGE_INDEX_INVALID)
- continue;
-
- /*
- * we don't want to allow eager kernel preemption while holding the
- * various locks taken in the kmem_free path of execution
- */
- thread_clear_eager_preempt(mythread);
-
-
- /*
- * This loop counts the number of pages that should be freed by the
- * next loop that tries to coalesce the kmem_frees()
- */
- uint32_t pages_to_free_count = 0;
- vm_address_t fpa;
- zone_page_index_t index;
- for (index = zone_free_page_head; index != ZONE_PAGE_INDEX_INVALID;) {
- pages_to_free_count++;
- fpa = zone_map_min_address + PAGE_SIZE * ((vm_size_t)index);
- index = *(zone_page_index_t *)fpa;
- }
-
- /*
- * Reclaim the pages we are freeing.
- */
- while (zone_free_page_head != ZONE_PAGE_INDEX_INVALID) {
- zone_page_index_t zind = zone_free_page_head;
- vm_address_t free_page_address;
- int page_count;
-
- /*
- * Use the first word of the page about to be freed to find the next free page
- */
- free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)zind);
- zone_free_page_head = *(zone_page_index_t *)free_page_address;
-
- page_count = 1;
- total_freed_pages++;
-
- while (zone_free_page_head != ZONE_PAGE_INDEX_INVALID) {
- zone_page_index_t next_zind = zone_free_page_head;
- vm_address_t next_free_page_address;
-
- next_free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)next_zind);
-
- if (next_free_page_address == (free_page_address - PAGE_SIZE)) {
- free_page_address = next_free_page_address;
- } else if (next_free_page_address != (free_page_address + (PAGE_SIZE * page_count)))
- break;
-
- zone_free_page_head = *(zone_page_index_t *)next_free_page_address;
- page_count++;
- total_freed_pages++;
- }
- kmem_free(zone_map, free_page_address, page_count * PAGE_SIZE);
- ZONE_PAGE_COUNT_DECR(z, page_count);
- zgc_stats.pgs_freed += page_count;
- pages_to_free_count -= page_count;
-
- if (++kmem_frees == 32) {
- thread_yield_internal(1);
- kmem_frees = 0;
- }
- }
-
- /* Check that we actually free the exact number of pages we were supposed to */
- assert(pages_to_free_count == 0);
-
- if (zalloc_debug & ZALLOC_DEBUG_ZONEGC)
- kprintf("zone_gc() of zone %s freed %lu elements, %d pages\n", z->zone_name, (unsigned long)size_freed/elt_size, total_freed_pages);
-
- thread_set_eager_preempt(mythread);
- }
-
- if (old_pgs_freed == zgc_stats.pgs_freed)
- zgc_stats.zgc_bailed++;
-
- thread_clear_eager_preempt(mythread);
-
- lck_mtx_unlock(&zone_gc_lock);
-
-}
-
-extern vm_offset_t kmapoff_kaddr;
-extern unsigned int kmapoff_pgcnt;
-
-/*
- * consider_zone_gc:
- *
- * Called by the pageout daemon when the system needs more free pages.
- */
-
-void
-consider_zone_gc(boolean_t force)
-{
- boolean_t all_zones = FALSE;
-
- if (kmapoff_kaddr != 0) {
- /*
- * One-time reclaim of kernel_map resources we allocated in
- * early boot.
- */
- (void) vm_deallocate(kernel_map,
- kmapoff_kaddr, kmapoff_pgcnt * PAGE_SIZE_64);
- kmapoff_kaddr = 0;
- }
-
- if (zone_gc_allowed &&
- (zone_gc_allowed_by_time_throttle ||
- zone_gc_forced ||
- force)) {
- if (zone_gc_allowed_by_time_throttle == TRUE) {
- zone_gc_allowed_by_time_throttle = FALSE;
- all_zones = TRUE;
- }
- zone_gc_forced = FALSE;
-
- zone_gc(all_zones);
- }
-}
-
-/*
- * By default, don't attempt zone GC more frequently
- * than once / 1 minutes.
- */
-void
-compute_zone_gc_throttle(void *arg __unused)
-{
- zone_gc_allowed_by_time_throttle = TRUE;
-}
-
-
-#if CONFIG_TASK_ZONE_INFO
-
-kern_return_t
-task_zone_info(
- task_t task,
- mach_zone_name_array_t *namesp,
- mach_msg_type_number_t *namesCntp,
- task_zone_info_array_t *infop,
- mach_msg_type_number_t *infoCntp)
-{
- mach_zone_name_t *names;
- vm_offset_t names_addr;
- vm_size_t names_size;
- task_zone_info_t *info;
- vm_offset_t info_addr;
- vm_size_t info_size;
- unsigned int max_zones, i;
- zone_t z;
- mach_zone_name_t *zn;
- task_zone_info_t *zi;
- kern_return_t kr;
-
- vm_size_t used;
- vm_map_copy_t copy;
-
-
- if (task == TASK_NULL)
- return KERN_INVALID_TASK;
-
- /*
- * We assume that zones aren't freed once allocated.
- * We won't pick up any zones that are allocated later.
- */
-
- simple_lock(&all_zones_lock);
- max_zones = (unsigned int)(num_zones + num_fake_zones);
- z = first_zone;
- simple_unlock(&all_zones_lock);
-
- names_size = round_page(max_zones * sizeof *names);
- kr = kmem_alloc_pageable(ipc_kernel_map,
- &names_addr, names_size);
- if (kr != KERN_SUCCESS)
- return kr;
- names = (mach_zone_name_t *) names_addr;
-
- info_size = round_page(max_zones * sizeof *info);
- kr = kmem_alloc_pageable(ipc_kernel_map,
- &info_addr, info_size);
- if (kr != KERN_SUCCESS) {
- kmem_free(ipc_kernel_map,
- names_addr, names_size);
- return kr;
- }
-
- info = (task_zone_info_t *) info_addr;
-
- zn = &names[0];
- zi = &info[0];
-
- for (i = 0; i < max_zones - num_fake_zones; i++) {
- struct zone zcopy;
-
- assert(z != ZONE_NULL);
-
- lock_zone(z);
- zcopy = *z;
- unlock_zone(z);
-
- simple_lock(&all_zones_lock);
- z = z->next_zone;
- simple_unlock(&all_zones_lock);
-
- /* assuming here the name data is static */
- (void) strncpy(zn->mzn_name, zcopy.zone_name,
- sizeof zn->mzn_name);
- zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
-
- zi->tzi_count = (uint64_t)zcopy.count;
- zi->tzi_cur_size = (uint64_t)zcopy.cur_size;
- zi->tzi_max_size = (uint64_t)zcopy.max_size;
- zi->tzi_elem_size = (uint64_t)zcopy.elem_size;
- zi->tzi_alloc_size = (uint64_t)zcopy.alloc_size;
- zi->tzi_sum_size = zcopy.sum_count * zcopy.elem_size;
- zi->tzi_exhaustible = (uint64_t)zcopy.exhaustible;
- zi->tzi_collectable = (uint64_t)zcopy.collectable;
- zi->tzi_caller_acct = (uint64_t)zcopy.caller_acct;
- if (task->tkm_zinfo != NULL) {
- zi->tzi_task_alloc = task->tkm_zinfo[zcopy.index].alloc;
- zi->tzi_task_free = task->tkm_zinfo[zcopy.index].free;
- } else {
- zi->tzi_task_alloc = 0;
- zi->tzi_task_free = 0;
- }
- zn++;
- zi++;
- }
-
- /*
- * loop through the fake zones and fill them using the specialized
- * functions
- */
- for (i = 0; i < num_fake_zones; i++) {
- int count, collectable, exhaustible, caller_acct, index;
- vm_size_t cur_size, max_size, elem_size, alloc_size;
- uint64_t sum_size;
-
- strncpy(zn->mzn_name, fake_zones[i].name, sizeof zn->mzn_name);
- zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
- fake_zones[i].query(&count, &cur_size,
- &max_size, &elem_size,
- &alloc_size, &sum_size,
- &collectable, &exhaustible, &caller_acct);
- zi->tzi_count = (uint64_t)count;
- zi->tzi_cur_size = (uint64_t)cur_size;
- zi->tzi_max_size = (uint64_t)max_size;
- zi->tzi_elem_size = (uint64_t)elem_size;
- zi->tzi_alloc_size = (uint64_t)alloc_size;
- zi->tzi_sum_size = sum_size;
- zi->tzi_collectable = (uint64_t)collectable;
- zi->tzi_exhaustible = (uint64_t)exhaustible;
- zi->tzi_caller_acct = (uint64_t)caller_acct;
- if (task->tkm_zinfo != NULL) {
- index = ZINFO_SLOTS - num_fake_zones + i;
- zi->tzi_task_alloc = task->tkm_zinfo[index].alloc;
- zi->tzi_task_free = task->tkm_zinfo[index].free;
- } else {
- zi->tzi_task_alloc = 0;
- zi->tzi_task_free = 0;
- }
- zn++;
- zi++;
- }
-
- used = max_zones * sizeof *names;
- if (used != names_size)
- bzero((char *) (names_addr + used), names_size - used);
-
- kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
- (vm_map_size_t)names_size, TRUE, ©);
- assert(kr == KERN_SUCCESS);
-
- *namesp = (mach_zone_name_t *) copy;
- *namesCntp = max_zones;
-
- used = max_zones * sizeof *info;
-
- if (used != info_size)
- bzero((char *) (info_addr + used), info_size - used);
-
- kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
- (vm_map_size_t)info_size, TRUE, ©);
- assert(kr == KERN_SUCCESS);
-
- *infop = (task_zone_info_t *) copy;
- *infoCntp = max_zones;
-
- return KERN_SUCCESS;
-}
-
-#else /* CONFIG_TASK_ZONE_INFO */
-
-kern_return_t
-task_zone_info(
- __unused task_t task,
- __unused mach_zone_name_array_t *namesp,
- __unused mach_msg_type_number_t *namesCntp,
- __unused task_zone_info_array_t *infop,
- __unused mach_msg_type_number_t *infoCntp)
-{
- return KERN_FAILURE;
-}
-
-#endif /* CONFIG_TASK_ZONE_INFO */
-
-kern_return_t
-mach_zone_info(
- host_priv_t host,
- mach_zone_name_array_t *namesp,
- mach_msg_type_number_t *namesCntp,
- mach_zone_info_array_t *infop,
- mach_msg_type_number_t *infoCntp)
-{
- mach_zone_name_t *names;
- vm_offset_t names_addr;
- vm_size_t names_size;
- mach_zone_info_t *info;
- vm_offset_t info_addr;
- vm_size_t info_size;
- unsigned int max_zones, i;
- zone_t z;
- mach_zone_name_t *zn;
- mach_zone_info_t *zi;
- kern_return_t kr;
-
- vm_size_t used;
- vm_map_copy_t copy;
-
-
- if (host == HOST_NULL)
- return KERN_INVALID_HOST;
-#if CONFIG_DEBUGGER_FOR_ZONE_INFO
- if (!PE_i_can_has_debugger(NULL))
- return KERN_INVALID_HOST;
-#endif
-
- /*
- * We assume that zones aren't freed once allocated.
- * We won't pick up any zones that are allocated later.
- */
-
- simple_lock(&all_zones_lock);
- max_zones = (unsigned int)(num_zones + num_fake_zones);
- z = first_zone;
- simple_unlock(&all_zones_lock);