+
+/*
+ * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
+ * local queues if they exist... its the only spot in the system where we add pages
+ * to those queues... once on those queues, those pages can only move to one of the
+ * global page queues or the free queues... they NEVER move from local q to local q.
+ * the 'local' state is stable when vm_page_queues_remove is called since we're behind
+ * the global vm_page_queue_lock at this point... we still need to take the local lock
+ * in case this operation is being run on a different CPU then the local queue's identity,
+ * but we don't have to worry about the page moving to a global queue or becoming wired
+ * while we're grabbing the local lock since those operations would require the global
+ * vm_page_queue_lock to be held, and we already own it.
+ *
+ * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
+ * 'wired' and local are ALWAYS mutually exclusive conditions.
+ */
+void
+vm_page_queues_remove(vm_page_t mem)
+{
+ boolean_t was_pageable;
+
+ VM_PAGE_QUEUES_ASSERT(mem, 1);
+ assert(!mem->pageout_queue);
+ /*
+ * if (mem->pageout_queue)
+ * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
+ * the caller is responsible for determing if the page is on that queue, and if so, must
+ * either first remove it (it needs both the page queues lock and the object lock to do
+ * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
+ */
+ if (mem->local) {
+ struct vpl *lq;
+ assert(mem->object != kernel_object);
+ assert(mem->object != compressor_object);
+ assert(!mem->inactive && !mem->speculative);
+ assert(!mem->active && !mem->throttled);
+ assert(!mem->clean_queue);
+ assert(!mem->fictitious);
+ lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;
+ VPL_LOCK(&lq->vpl_lock);
+ queue_remove(&lq->vpl_queue,
+ mem, vm_page_t, pageq);
+ mem->local = FALSE;
+ mem->local_id = 0;
+ lq->vpl_count--;
+ if (mem->object->internal) {
+ lq->vpl_internal_count--;
+ } else {
+ lq->vpl_external_count--;
+ }
+ VPL_UNLOCK(&lq->vpl_lock);
+ was_pageable = FALSE;
+ }
+
+ else if (mem->active) {
+ assert(mem->object != kernel_object);
+ assert(mem->object != compressor_object);
+ assert(!mem->inactive && !mem->speculative);
+ assert(!mem->clean_queue);
+ assert(!mem->throttled);
+ assert(!mem->fictitious);
+ queue_remove(&vm_page_queue_active,
+ mem, vm_page_t, pageq);
+ mem->active = FALSE;
+ vm_page_active_count--;
+ was_pageable = TRUE;
+ }
+
+ else if (mem->inactive) {
+ assert(mem->object != kernel_object);
+ assert(mem->object != compressor_object);
+ assert(!mem->active && !mem->speculative);
+ assert(!mem->throttled);
+ assert(!mem->fictitious);
+ vm_page_inactive_count--;
+ if (mem->clean_queue) {
+ queue_remove(&vm_page_queue_cleaned,
+ mem, vm_page_t, pageq);
+ mem->clean_queue = FALSE;
+ vm_page_cleaned_count--;
+ } else {
+ if (mem->object->internal) {
+ queue_remove(&vm_page_queue_anonymous,
+ mem, vm_page_t, pageq);
+ vm_page_anonymous_count--;
+ } else {
+ queue_remove(&vm_page_queue_inactive,
+ mem, vm_page_t, pageq);
+ }
+ vm_purgeable_q_advance_all();
+ }
+ mem->inactive = FALSE;
+ was_pageable = TRUE;
+ }
+
+ else if (mem->throttled) {
+ assert(mem->object != compressor_object);
+ assert(!mem->active && !mem->inactive);
+ assert(!mem->speculative);
+ assert(!mem->fictitious);
+ queue_remove(&vm_page_queue_throttled,
+ mem, vm_page_t, pageq);
+ mem->throttled = FALSE;
+ vm_page_throttled_count--;
+ was_pageable = FALSE;
+ }
+
+ else if (mem->speculative) {
+ assert(mem->object != compressor_object);
+ assert(!mem->active && !mem->inactive);
+ assert(!mem->throttled);
+ assert(!mem->fictitious);
+ remque(&mem->pageq);
+ mem->speculative = FALSE;
+ vm_page_speculative_count--;
+ was_pageable = TRUE;
+ }
+
+ else if (mem->pageq.next || mem->pageq.prev) {
+ was_pageable = FALSE;
+ panic("vm_page_queues_remove: unmarked page on Q");
+ } else {
+ was_pageable = FALSE;
+ }
+
+ mem->pageq.next = NULL;
+ mem->pageq.prev = NULL;
+ VM_PAGE_QUEUES_ASSERT(mem, 0);
+ if (was_pageable) {
+ if (mem->object->internal) {
+ vm_page_pageable_internal_count--;
+ } else {
+ vm_page_pageable_external_count--;
+ }
+ }
+}
+
+void
+vm_page_remove_internal(vm_page_t page)
+{
+ vm_object_t __object = page->object;
+ if (page == __object->memq_hint) {
+ vm_page_t __new_hint;
+ queue_entry_t __qe;
+ __qe = queue_next(&page->listq);
+ if (queue_end(&__object->memq, __qe)) {
+ __qe = queue_prev(&page->listq);
+ if (queue_end(&__object->memq, __qe)) {
+ __qe = NULL;
+ }
+ }
+ __new_hint = (vm_page_t) __qe;
+ __object->memq_hint = __new_hint;
+ }
+ queue_remove(&__object->memq, page, vm_page_t, listq);
+}
+
+void
+vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
+{
+ VM_PAGE_QUEUES_ASSERT(mem, 0);
+ assert(!mem->fictitious);
+ assert(!mem->laundry);
+ assert(!mem->pageout_queue);
+ vm_page_check_pageable_safe(mem);
+ if (mem->object->internal) {
+ if (first == TRUE)
+ queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
+ else
+ queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
+ vm_page_anonymous_count++;
+ vm_page_pageable_internal_count++;
+ } else {
+ if (first == TRUE)
+ queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq);
+ else
+ queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);
+ vm_page_pageable_external_count++;
+ }
+ mem->inactive = TRUE;
+ vm_page_inactive_count++;
+ token_new_pagecount++;
+}
+
+/*
+ * Pages from special kernel objects shouldn't
+ * be placed on pageable queues.
+ */
+void
+vm_page_check_pageable_safe(vm_page_t page)
+{
+ if (page->object == kernel_object) {
+ panic("vm_page_check_pageable_safe: trying to add page" \
+ "from kernel object (%p) to pageable queue", kernel_object);
+ }
+
+ if (page->object == compressor_object) {
+ panic("vm_page_check_pageable_safe: trying to add page" \
+ "from compressor object (%p) to pageable queue", compressor_object);
+ }
+
+ if (page->object == vm_submap_object) {
+ panic("vm_page_check_pageable_safe: trying to add page" \
+ "from submap object (%p) to pageable queue", vm_submap_object);
+ }
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * wired page diagnose
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#include <libkern/OSKextLibPrivate.h>
+
+vm_allocation_site_t *
+vm_allocation_sites[VM_KERN_MEMORY_COUNT];
+
+vm_tag_t
+vm_tag_bt(void)
+{
+ uintptr_t* frameptr;
+ uintptr_t* frameptr_next;
+ uintptr_t retaddr;
+ uintptr_t kstackb, kstackt;
+ const vm_allocation_site_t * site;
+ thread_t cthread;
+
+ cthread = current_thread();
+ if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
+
+ kstackb = cthread->kernel_stack;
+ kstackt = kstackb + kernel_stack_size;
+
+ /* Load stack frame pointer (EBP on x86) into frameptr */
+ frameptr = __builtin_frame_address(0);
+ site = NULL;
+ while (frameptr != NULL)
+ {
+ /* Verify thread stack bounds */
+ if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
+
+ /* Next frame pointer is pointed to by the previous one */
+ frameptr_next = (uintptr_t*) *frameptr;
+
+ /* Pull return address from one spot above the frame pointer */
+ retaddr = *(frameptr + 1);
+
+ if ((retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
+ {
+ site = OSKextGetAllocationSiteForCaller(retaddr);
+ break;
+ }
+
+ frameptr = frameptr_next;
+ }
+ return (site ? site->tag : VM_KERN_MEMORY_NONE);
+}
+
+static uint64_t free_tag_bits[256/64];
+
+void
+vm_tag_alloc_locked(vm_allocation_site_t * site)
+{
+ vm_tag_t tag;
+ uint64_t avail;
+ uint64_t idx;
+
+ if (site->tag) return;
+
+ idx = 0;
+ while (TRUE)
+ {
+ avail = free_tag_bits[idx];
+ if (avail)
+ {
+ tag = __builtin_clzll(avail);
+ avail &= ~(1ULL << (63 - tag));
+ free_tag_bits[idx] = avail;
+ tag += (idx << 6);
+ break;
+ }
+ idx++;
+ if (idx >= (sizeof(free_tag_bits) / sizeof(free_tag_bits[0])))
+ {
+ tag = VM_KERN_MEMORY_ANY;
+ break;
+ }
+ }
+ site->tag = tag;
+ if (VM_KERN_MEMORY_ANY != tag)
+ {
+ assert(!vm_allocation_sites[tag]);
+ vm_allocation_sites[tag] = site;
+ }
+}
+
+static void
+vm_tag_free_locked(vm_tag_t tag)
+{
+ uint64_t avail;
+ uint32_t idx;
+ uint64_t bit;
+
+ if (VM_KERN_MEMORY_ANY == tag) return;
+
+ idx = (tag >> 6);
+ avail = free_tag_bits[idx];
+ tag &= 63;
+ bit = (1ULL << (63 - tag));
+ assert(!(avail & bit));
+ free_tag_bits[idx] = (avail | bit);
+}
+
+static void
+vm_tag_init(void)
+{
+ vm_tag_t tag;
+ for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
+ {
+ vm_tag_free_locked(tag);
+ }
+}
+
+vm_tag_t
+vm_tag_alloc(vm_allocation_site_t * site)
+{
+ vm_tag_t tag;
+
+ if (VM_TAG_BT & site->flags)
+ {
+ tag = vm_tag_bt();
+ if (VM_KERN_MEMORY_NONE != tag) return (tag);
+ }
+
+ if (!site->tag)
+ {
+ lck_spin_lock(&vm_allocation_sites_lock);
+ vm_tag_alloc_locked(site);
+ lck_spin_unlock(&vm_allocation_sites_lock);
+ }
+
+ return (site->tag);
+}
+
+static void
+vm_page_count_object(mach_memory_info_t * sites, unsigned int __unused num_sites, vm_object_t object)
+{
+ if (!object->wired_page_count) return;
+ if (object != kernel_object)
+ {
+ assert(object->wire_tag < num_sites);
+ sites[object->wire_tag].size += ptoa_64(object->wired_page_count);
+ }
+}
+
+typedef void (*vm_page_iterate_proc)(mach_memory_info_t * sites,
+ unsigned int num_sites, vm_object_t object);
+
+static void
+vm_page_iterate_purgeable_objects(mach_memory_info_t * sites, unsigned int num_sites,
+ vm_page_iterate_proc proc, purgeable_q_t queue,
+ int group)
+{
+ vm_object_t object;
+
+ for (object = (vm_object_t) queue_first(&queue->objq[group]);
+ !queue_end(&queue->objq[group], (queue_entry_t) object);
+ object = (vm_object_t) queue_next(&object->objq))
+ {
+ proc(sites, num_sites, object);
+ }
+}
+
+static void
+vm_page_iterate_objects(mach_memory_info_t * sites, unsigned int num_sites,
+ vm_page_iterate_proc proc)
+{
+ purgeable_q_t volatile_q;
+ queue_head_t * nonvolatile_q;
+ vm_object_t object;
+ int group;
+
+ lck_spin_lock(&vm_objects_wired_lock);
+ queue_iterate(&vm_objects_wired,
+ object,
+ vm_object_t,
+ objq)
+ {
+ proc(sites, num_sites, object);
+ }
+ lck_spin_unlock(&vm_objects_wired_lock);
+
+ lck_mtx_lock(&vm_purgeable_queue_lock);
+ nonvolatile_q = &purgeable_nonvolatile_queue;
+ for (object = (vm_object_t) queue_first(nonvolatile_q);
+ !queue_end(nonvolatile_q, (queue_entry_t) object);
+ object = (vm_object_t) queue_next(&object->objq))
+ {
+ proc(sites, num_sites, object);
+ }
+
+ volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
+ vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, 0);
+
+ volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
+ for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
+ {
+ vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
+ }
+
+ volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
+ for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
+ {
+ vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
+ }
+ lck_mtx_unlock(&vm_purgeable_queue_lock);
+}
+
+static uint64_t
+process_account(mach_memory_info_t * sites, unsigned int __unused num_sites)
+{
+ uint64_t found;
+ unsigned int idx;
+ vm_allocation_site_t * site;
+
+ assert(num_sites >= VM_KERN_MEMORY_COUNT);
+ found = 0;
+ for (idx = 0; idx < VM_KERN_MEMORY_COUNT; idx++)
+ {
+ found += sites[idx].size;
+ if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
+ {
+ sites[idx].site = idx;
+ sites[idx].flags |= VM_KERN_SITE_TAG;
+ if (VM_KERN_MEMORY_ZONE == idx) sites[idx].flags |= VM_KERN_SITE_HIDE;
+ else sites[idx].flags |= VM_KERN_SITE_WIRED;
+ continue;
+ }
+ lck_spin_lock(&vm_allocation_sites_lock);
+ if ((site = vm_allocation_sites[idx]))
+ {
+ if (sites[idx].size)
+ {
+ sites[idx].flags |= VM_KERN_SITE_WIRED;
+ if (VM_TAG_KMOD == (VM_KERN_SITE_TYPE & site->flags))
+ {
+ sites[idx].site = OSKextGetKmodIDForSite(site);
+ sites[idx].flags |= VM_KERN_SITE_KMOD;
+ }
+ else
+ {
+ sites[idx].site = VM_KERNEL_UNSLIDE(site);
+ sites[idx].flags |= VM_KERN_SITE_KERNEL;
+ }
+ site = NULL;
+ }
+ else
+ {
+ vm_tag_free_locked(site->tag);
+ site->tag = VM_KERN_MEMORY_NONE;
+ vm_allocation_sites[idx] = NULL;
+ if (!(VM_TAG_UNLOAD & site->flags)) site = NULL;
+ }
+ }
+ lck_spin_unlock(&vm_allocation_sites_lock);
+ if (site) OSKextFreeSite(site);
+ }
+ return (found);
+}
+
+kern_return_t
+vm_page_diagnose(mach_memory_info_t * sites, unsigned int num_sites)
+{
+ enum { kMaxKernelDepth = 1 };
+ vm_map_t maps [kMaxKernelDepth];
+ vm_map_entry_t entries[kMaxKernelDepth];
+ vm_map_t map;
+ vm_map_entry_t entry;
+ vm_object_offset_t offset;
+ vm_page_t page;
+ int stackIdx, count;
+ uint64_t wired_size;
+ uint64_t wired_managed_size;
+ uint64_t wired_reserved_size;
+ mach_memory_info_t * counts;
+
+ bzero(sites, num_sites * sizeof(mach_memory_info_t));
+
+ vm_page_iterate_objects(sites, num_sites, &vm_page_count_object);
+
+ wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
+ wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
+ wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
+
+ assert(num_sites >= (VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT));
+ counts = &sites[VM_KERN_MEMORY_COUNT];
+
+#define SET_COUNT(xcount, xsize, xflags) \
+ counts[xcount].site = (xcount); \
+ counts[xcount].size = (xsize); \
+ counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
+
+ SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
+ SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
+ SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
+ SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
+ SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
+ SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
+
+#define SET_MAP(xcount, xsize, xfree, xlargest) \
+ counts[xcount].site = (xcount); \
+ counts[xcount].size = (xsize); \
+ counts[xcount].free = (xfree); \
+ counts[xcount].largest = (xlargest); \
+ counts[xcount].flags = VM_KERN_SITE_COUNTER;
+
+ vm_map_size_t map_size, map_free, map_largest;
+
+ vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
+ SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
+
+ vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
+ SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
+
+ vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
+ SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
+
+ map = kernel_map;
+ stackIdx = 0;
+ while (map)
+ {
+ vm_map_lock(map);
+ for (entry = map->hdr.links.next; map; entry = entry->links.next)
+ {
+ if (entry->is_sub_map)
+ {
+ assert(stackIdx < kMaxKernelDepth);
+ maps[stackIdx] = map;
+ entries[stackIdx] = entry;
+ stackIdx++;
+ map = VME_SUBMAP(entry);
+ entry = NULL;
+ break;
+ }
+ if (VME_OBJECT(entry) == kernel_object)
+ {
+ count = 0;
+ vm_object_lock(VME_OBJECT(entry));
+ for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
+ {
+ page = vm_page_lookup(VME_OBJECT(entry), offset);
+ if (page && VM_PAGE_WIRED(page)) count++;
+ }
+ vm_object_unlock(VME_OBJECT(entry));
+
+ if (count)
+ {
+ assert(VME_ALIAS(entry) < num_sites);
+ sites[VME_ALIAS(entry)].size += ptoa_64(count);
+ }
+ }
+ if (entry == vm_map_last_entry(map))
+ {
+ vm_map_unlock(map);
+ if (!stackIdx) map = NULL;
+ else
+ {
+ --stackIdx;
+ map = maps[stackIdx];
+ entry = entries[stackIdx];
+ }
+ }
+ }
+ }
+
+ process_account(sites, num_sites);
+
+ return (KERN_SUCCESS);
+}