/*
- * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
*
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
* Resident memory management module.
*/
+#include <debug.h>
+#include <libkern/OSAtomic.h>
+
#include <mach/clock_types.h>
#include <mach/vm_prot.h>
#include <mach/vm_statistics.h>
+#include <mach/sdt.h>
#include <kern/counters.h>
#include <kern/sched_prim.h>
#include <kern/task.h>
#include <ppc/mappings.h> /* (BRINGUP) */
#include <pexpert/pexpert.h> /* (BRINGUP) */
+#include <vm/vm_protos.h>
+#include <vm/memory_object.h>
+#include <vm/vm_purgeable_internal.h>
+
+#if CONFIG_EMBEDDED
+#include <sys/kern_memorystatus.h>
+#endif
+
+int speculative_age_index = 0;
+int speculative_steal_index = 0;
+
+struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
+
+static void vm_page_insert_internal(vm_page_t, vm_object_t, vm_object_offset_t, boolean_t);
-/* Variables used to indicate the relative age of pages in the
- * inactive list
- */
-int vm_page_ticket_roll = 0;
-int vm_page_ticket = 0;
/*
* Associated with page of user-allocatable memory is a
* page structure.
unsigned int vm_page_bucket_count = 0; /* How big is array? */
unsigned int vm_page_hash_mask; /* Mask for hash function */
unsigned int vm_page_hash_shift; /* Shift for hash function */
-uint32_t vm_page_bucket_hash; /* Basic bucket hash */
+uint32_t vm_page_bucket_hash; /* Basic bucket hash */
decl_simple_lock_data(,vm_page_bucket_lock)
+
#if MACH_PAGE_HASH_STATS
/* This routine is only for debug. It is intended to be called by
* hand by a developer using a kernel debugger. This routine prints
* module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
* constants.
*/
-#ifndef PAGE_SIZE_FIXED
-vm_size_t page_size = 4096;
-vm_size_t page_mask = 4095;
-int page_shift = 12;
-#else
vm_size_t page_size = PAGE_SIZE;
vm_size_t page_mask = PAGE_MASK;
int page_shift = PAGE_SHIFT;
-#endif /* PAGE_SIZE_FIXED */
/*
* Resident page structures are initialized from
*/
struct vm_page vm_page_template;
+vm_page_t vm_pages = VM_PAGE_NULL;
+unsigned int vm_pages_count = 0;
+
/*
* Resident pages that represent real memory
- * are allocated from a free list.
+ * are allocated from a set of free lists,
+ * one per color.
*/
-vm_page_t vm_page_queue_free;
+unsigned int vm_colors;
+unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
+unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
+queue_head_t vm_page_queue_free[MAX_COLORS];
vm_page_t vm_page_queue_fictitious;
-decl_mutex_data(,vm_page_queue_free_lock)
unsigned int vm_page_free_wanted;
-int vm_page_free_count;
-int vm_page_fictitious_count;
+unsigned int vm_page_free_wanted_privileged;
+unsigned int vm_page_free_count;
+unsigned int vm_page_fictitious_count;
unsigned int vm_page_free_count_minimum; /* debugging */
zone_t vm_page_zone;
decl_mutex_data(,vm_page_alloc_lock)
unsigned int io_throttle_zero_fill;
-decl_mutex_data(,vm_page_zero_fill_lock)
/*
* Fictitious pages don't have a physical address,
*/
vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
+/*
+ * Guard pages are not accessible so they don't
+ * need a physical address, but we need to enter
+ * one in the pmap.
+ * Let's make it recognizable and make sure that
+ * we don't use a real physical page with that
+ * physical address.
+ */
+vm_offset_t vm_page_guard_addr = (vm_offset_t) -2;
+
/*
* Resident page structures are also chained on
* queues that are used by the page replacement
*/
queue_head_t vm_page_queue_active;
queue_head_t vm_page_queue_inactive;
-queue_head_t vm_page_queue_zf;
-decl_mutex_data(,vm_page_queue_lock)
-int vm_page_active_count;
-int vm_page_inactive_count;
-int vm_page_wire_count;
-int vm_page_gobble_count = 0;
-int vm_page_wire_count_warning = 0;
-int vm_page_gobble_count_warning = 0;
-
-/* the following fields are protected by the vm_page_queue_lock */
-queue_head_t vm_page_queue_limbo;
-int vm_page_limbo_count = 0; /* total pages in limbo */
-int vm_page_limbo_real_count = 0; /* real pages in limbo */
-int vm_page_pin_count = 0; /* number of pinned pages */
-
-decl_simple_lock_data(,vm_page_preppin_lock)
+queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
+
+unsigned int vm_page_active_count;
+unsigned int vm_page_inactive_count;
+unsigned int vm_page_throttled_count;
+unsigned int vm_page_speculative_count;
+unsigned int vm_page_wire_count;
+unsigned int vm_page_gobble_count = 0;
+unsigned int vm_page_wire_count_warning = 0;
+unsigned int vm_page_gobble_count_warning = 0;
+
+unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
+uint64_t vm_page_purged_count = 0; /* total count of purged pages */
+
+unsigned int vm_page_speculative_recreated = 0;
+unsigned int vm_page_speculative_created = 0;
+unsigned int vm_page_speculative_used = 0;
+
+ppnum_t vm_lopage_poolstart = 0;
+ppnum_t vm_lopage_poolend = 0;
+int vm_lopage_poolsize = 0;
+uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
+
/*
* Several page replacement parameters are also
* (done here in vm_page_alloc) can trigger the
* pageout daemon.
*/
-int vm_page_free_target = 0;
-int vm_page_free_min = 0;
-int vm_page_inactive_target = 0;
-int vm_page_free_reserved = 0;
-int vm_page_laundry_count = 0;
-int vm_page_burst_count = 0;
-int vm_page_throttled_count = 0;
+unsigned int vm_page_free_target = 0;
+unsigned int vm_page_free_min = 0;
+unsigned int vm_page_inactive_target = 0;
+unsigned int vm_page_inactive_min = 0;
+unsigned int vm_page_free_reserved = 0;
+unsigned int vm_page_zfill_throttle_count = 0;
/*
* The VM system has a couple of heuristics for deciding
void
vm_set_page_size(void)
{
-#ifndef PAGE_SIZE_FIXED
page_mask = page_size - 1;
if ((page_mask & page_size) != 0)
panic("vm_set_page_size: page size not a power of two");
for (page_shift = 0; ; page_shift++)
- if ((1 << page_shift) == page_size)
+ if ((1U << page_shift) == page_size)
break;
-#endif /* PAGE_SIZE_FIXED */
}
+
+/* Called once during statup, once the cache geometry is known.
+ */
+static void
+vm_page_set_colors( void )
+{
+ unsigned int n, override;
+
+ if ( PE_parse_boot_arg("colors", &override) ) /* colors specified as a boot-arg? */
+ n = override;
+ else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
+ n = vm_cache_geometry_colors;
+ else n = DEFAULT_COLORS; /* use default if all else fails */
+
+ if ( n == 0 )
+ n = 1;
+ if ( n > MAX_COLORS )
+ n = MAX_COLORS;
+
+ /* the count must be a power of 2 */
+ if ( ( n & (n - 1)) !=0 )
+ panic("vm_page_set_colors");
+
+ vm_colors = n;
+ vm_color_mask = n - 1;
+}
+
+
/*
* vm_page_bootstrap:
*
vm_offset_t *endp)
{
register vm_page_t m;
- int i;
+ unsigned int i;
unsigned int log1;
unsigned int log2;
unsigned int size;
*/
m = &vm_page_template;
- m->object = VM_OBJECT_NULL; /* reset later */
- m->offset = 0; /* reset later */
+ m->object = VM_OBJECT_NULL; /* reset later */
+ m->offset = (vm_object_offset_t) -1; /* reset later */
m->wire_count = 0;
+ m->pageq.next = NULL;
+ m->pageq.prev = NULL;
+ m->listq.next = NULL;
+ m->listq.prev = NULL;
+
+ m->speculative = FALSE;
+ m->throttled = FALSE;
m->inactive = FALSE;
m->active = FALSE;
+ m->no_cache = FALSE;
m->laundry = FALSE;
m->free = FALSE;
- m->no_isync = TRUE;
+ m->pmapped = FALSE;
m->reference = FALSE;
m->pageout = FALSE;
m->dump_cleaning = FALSE;
m->cleaning = FALSE;
m->precious = FALSE;
m->clustered = FALSE;
- m->lock_supplied = FALSE;
m->unusual = FALSE;
m->restart = FALSE;
m->zero_fill = FALSE;
+ m->encrypted = FALSE;
+ m->encrypted_cleaning = FALSE;
+ m->deactivated = FALSE;
m->phys_page = 0; /* reset later */
- m->page_lock = VM_PROT_NONE;
- m->unlock_request = VM_PROT_NONE;
- m->page_error = KERN_SUCCESS;
-
/*
* Initialize the page queues.
*/
- mutex_init(&vm_page_queue_free_lock, ETAP_VM_PAGEQ_FREE);
- mutex_init(&vm_page_queue_lock, ETAP_VM_PAGEQ);
- simple_lock_init(&vm_page_preppin_lock, ETAP_VM_PREPPIN);
+ mutex_init(&vm_page_queue_free_lock, 0);
+ mutex_init(&vm_page_queue_lock, 0);
+
+ mutex_init(&vm_purgeable_queue_lock, 0);
+
+ for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
+ int group;
+
+ purgeable_queues[i].token_q_head = 0;
+ purgeable_queues[i].token_q_tail = 0;
+ for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
+ queue_init(&purgeable_queues[i].objq[group]);
- vm_page_queue_free = VM_PAGE_NULL;
+ purgeable_queues[i].type = i;
+ purgeable_queues[i].new_pages = 0;
+#if MACH_ASSERT
+ purgeable_queues[i].debug_count_tokens = 0;
+ purgeable_queues[i].debug_count_objects = 0;
+#endif
+ };
+
+ for (i = 0; i < MAX_COLORS; i++ )
+ queue_init(&vm_page_queue_free[i]);
+ queue_init(&vm_lopage_queue_free);
vm_page_queue_fictitious = VM_PAGE_NULL;
queue_init(&vm_page_queue_active);
queue_init(&vm_page_queue_inactive);
+ queue_init(&vm_page_queue_throttled);
queue_init(&vm_page_queue_zf);
- queue_init(&vm_page_queue_limbo);
+ for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
+ queue_init(&vm_page_queue_speculative[i].age_q);
+
+ vm_page_queue_speculative[i].age_ts.tv_sec = 0;
+ vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
+ }
vm_page_free_wanted = 0;
+ vm_page_free_wanted_privileged = 0;
+
+ vm_page_set_colors();
+
/*
* Steal memory for the map and zone subsystems.
* than the number of physical pages in the system.
*/
- simple_lock_init(&vm_page_bucket_lock, ETAP_VM_BUCKET);
+ simple_lock_init(&vm_page_bucket_lock, 0);
if (vm_page_bucket_count == 0) {
unsigned int npages = pmap_free_pages();
*/
pmap_startup(&virtual_space_start, &virtual_space_end);
- virtual_space_start = round_page_32(virtual_space_start);
- virtual_space_end = trunc_page_32(virtual_space_end);
+ virtual_space_start = round_page(virtual_space_start);
+ virtual_space_end = trunc_page(virtual_space_end);
*startp = virtual_space_start;
*endp = virtual_space_end;
* all VM managed pages are "free", courtesy of pmap_startup.
*/
vm_page_wire_count = atop_64(max_mem) - vm_page_free_count; /* initial value */
-
- printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count);
vm_page_free_count_minimum = vm_page_free_count;
+
+ printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
+ vm_page_free_count, vm_page_wire_count);
+
+ simple_lock_init(&vm_paging_lock, 0);
}
#ifndef MACHINE_PAGES
* of two simpler functions, pmap_virtual_space and pmap_next_page.
*/
-vm_offset_t
+void *
pmap_steal_memory(
vm_size_t size)
{
* we don't trust the pmap module to do it right.
*/
- virtual_space_start = round_page_32(virtual_space_start);
- virtual_space_end = trunc_page_32(virtual_space_end);
+ virtual_space_start = round_page(virtual_space_start);
+ virtual_space_end = trunc_page(virtual_space_end);
}
/*
* Allocate and map physical pages to back new virtual pages.
*/
- for (vaddr = round_page_32(addr);
+ for (vaddr = round_page(addr);
vaddr < addr + size;
vaddr += PAGE_SIZE) {
if (!pmap_next_page(&phys_page))
}
- return addr;
+ return (void *) addr;
}
void
vm_offset_t *endp)
{
unsigned int i, npages, pages_initialized, fill, fillval;
- vm_page_t pages;
ppnum_t phys_page;
addr64_t tmpaddr;
+ unsigned int num_of_lopages = 0;
+ unsigned int last_index;
/*
* We calculate how many page frames we will have
tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
tmpaddr = tmpaddr + (addr64_t)(round_page_32(virtual_space_start) - virtual_space_start); /* Account for any slop */
- npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
+ npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
- pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages);
+ vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
/*
* Initialize the page frames.
*/
-
for (i = 0, pages_initialized = 0; i < npages; i++) {
if (!pmap_next_page(&phys_page))
break;
- vm_page_init(&pages[i], phys_page);
+ vm_page_init(&vm_pages[i], phys_page);
vm_page_pages++;
pages_initialized++;
}
+ vm_pages_count = pages_initialized;
+
+ /*
+ * Check if we want to initialize pages to a known value
+ */
+ fill = 0; /* Assume no fill */
+ if (PE_parse_boot_arg("fill", &fillval)) fill = 1; /* Set fill */
+
+
+ /*
+ * if vm_lopage_poolsize is non-zero, than we need to reserve
+ * a pool of pages whose addresess are less than 4G... this pool
+ * is used by drivers whose hardware can't DMA beyond 32 bits...
+ *
+ * note that I'm assuming that the page list is ascending and
+ * ordered w/r to the physical address
+ */
+ for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) {
+ vm_page_t m;
+
+ m = &vm_pages[i];
+
+ if (m->phys_page >= (1 << (32 - PAGE_SHIFT)))
+ panic("couldn't reserve the lopage pool: not enough lo pages\n");
+
+ if (m->phys_page < vm_lopage_poolend)
+ panic("couldn't reserve the lopage pool: page list out of order\n");
+
+ vm_lopage_poolend = m->phys_page;
+
+ if (vm_lopage_poolstart == 0)
+ vm_lopage_poolstart = m->phys_page;
+ else {
+ if (m->phys_page < vm_lopage_poolstart)
+ panic("couldn't reserve the lopage pool: page list out of order\n");
+ }
+
+ if (fill)
+ fillPage(m->phys_page, fillval); /* Fill the page with a know value if requested at boot */
+
+ vm_page_release(m);
+ }
+ last_index = i;
+
+ // -debug code remove
+ if (2 == vm_himemory_mode) {
+ // free low -> high so high is preferred
+ for (i = last_index + 1; i <= pages_initialized; i++) {
+ if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
+ vm_page_release(&vm_pages[i - 1]);
+ }
+ }
+ else
+ // debug code remove-
/*
* Release pages in reverse order so that physical pages
* the devices (which must address physical memory) happy if
* they require several consecutive pages.
*/
-
-/*
- * Check if we want to initialize pages to a known value
- */
-
- fill = 0; /* Assume no fill */
- if (PE_parse_boot_arg("fill", &fillval)) fill = 1; /* Set fill */
-
- for (i = pages_initialized; i > 0; i--) {
- extern void fillPage(ppnum_t phys_page, unsigned int fillval);
- if(fill) fillPage(pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
- vm_page_release(&pages[i - 1]);
+ for (i = pages_initialized; i > last_index; i--) {
+ if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
+ vm_page_release(&vm_pages[i - 1]);
}
#if 0
{
vm_page_t xx, xxo, xxl;
- int j, k, l;
+ int i, j, k, l;
j = 0; /* (BRINGUP) */
xxl = 0;
- for(xx = vm_page_queue_free; xx; xxl = xx, xx = xx->pageq.next) { /* (BRINGUP) */
- j++; /* (BRINGUP) */
- if(j > vm_page_free_count) { /* (BRINGUP) */
- panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
- }
-
- l = vm_page_free_count - j; /* (BRINGUP) */
- k = 0; /* (BRINGUP) */
-
- if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
-
- for(xxo = xx->pageq.next; xxo; xxo = xxo->pageq.next) { /* (BRINGUP) */
- k++;
- if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
- if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
- panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
+ for( i = 0; i < vm_colors; i++ ) {
+ queue_iterate(&vm_page_queue_free[i],
+ xx,
+ vm_page_t,
+ pageq) { /* BRINGUP */
+ j++; /* (BRINGUP) */
+ if(j > vm_page_free_count) { /* (BRINGUP) */
+ panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
+ }
+
+ l = vm_page_free_count - j; /* (BRINGUP) */
+ k = 0; /* (BRINGUP) */
+
+ if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
+
+ for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
+ k++;
+ if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
+ if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
+ panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
+ }
}
+
+ xxl = xx;
}
}
vm_page_zone->count += vm_page_pages;
vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
- mutex_init(&vm_page_alloc_lock, ETAP_VM_PAGE_ALLOC);
- mutex_init(&vm_page_zero_fill_lock, ETAP_VM_PAGE_ALLOC);
+ mutex_init(&vm_page_alloc_lock, 0);
}
/*
( (natural_t)((uint32_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
& vm_page_hash_mask)
+
/*
* vm_page_insert: [ internal use only ]
*
*
* The object must be locked.
*/
-
void
vm_page_insert(
- register vm_page_t mem,
- register vm_object_t object,
- register vm_object_offset_t offset)
+ vm_page_t mem,
+ vm_object_t object,
+ vm_object_offset_t offset)
+{
+ vm_page_insert_internal(mem, object, offset, FALSE);
+}
+
+
+static void
+vm_page_insert_internal(
+ vm_page_t mem,
+ vm_object_t object,
+ vm_object_offset_t offset,
+ boolean_t queues_lock_held)
{
register vm_page_bucket_t *bucket;
VM_PAGE_CHECK(mem);
- if (mem->tabled)
- panic("vm_page_insert");
+ if (object == vm_submap_object) {
+ /* the vm_submap_object is only a placeholder for submaps */
+ panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
+ }
+ vm_object_lock_assert_exclusive(object);
+#if DEBUG
+ if (mem->tabled || mem->object != VM_OBJECT_NULL)
+ panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
+ "already in (obj=%p,off=0x%llx)",
+ mem, object, offset, mem->object, mem->offset);
+#endif
assert(!object->internal || offset < object->size);
/* only insert "pageout" pages into "pageout" objects,
* and normal pages into normal objects */
assert(object->pageout == mem->pageout);
+ assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
+
/*
* Record the object/offset pair in this page
*/
* Now link into the object's list of backed pages.
*/
- queue_enter(&object->memq, mem, vm_page_t, listq);
+ VM_PAGE_INSERT(mem, object);
mem->tabled = TRUE;
/*
*/
object->resident_page_count++;
+
+ if (object->purgable == VM_PURGABLE_VOLATILE ||
+ object->purgable == VM_PURGABLE_EMPTY) {
+ if (queues_lock_held == FALSE)
+ vm_page_lockspin_queues();
+
+ vm_page_purgeable_count++;
+
+ if (queues_lock_held == FALSE)
+ vm_page_unlock_queues();
+ }
}
/*
register vm_object_t object,
register vm_object_offset_t offset)
{
- register vm_page_bucket_t *bucket;
+ vm_page_bucket_t *bucket;
+ vm_page_t found_m = VM_PAGE_NULL;
VM_PAGE_CHECK(mem);
-
- if (mem->tabled)
- panic("vm_page_replace");
-
+ vm_object_lock_assert_exclusive(object);
+#if DEBUG
+ _mutex_assert(&vm_page_queue_lock, MA_OWNED);
+
+ if (mem->tabled || mem->object != VM_OBJECT_NULL)
+ panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
+ "already in (obj=%p,off=0x%llx)",
+ mem, object, offset, mem->object, mem->offset);
+#endif
/*
* Record the object/offset pair in this page
*/
bucket = &vm_page_buckets[vm_page_hash(object, offset)];
simple_lock(&vm_page_bucket_lock);
+
if (bucket->pages) {
vm_page_t *mp = &bucket->pages;
register vm_page_t m = *mp;
+
do {
if (m->object == object && m->offset == offset) {
/*
- * Remove page from bucket and from object,
- * and return it to the free list.
+ * Remove old page from hash list
*/
*mp = m->next;
- queue_remove(&object->memq, m, vm_page_t,
- listq);
- m->tabled = FALSE;
- object->resident_page_count--;
-
- /*
- * Return page to the free list.
- * Note the page is not tabled now, so this
- * won't self-deadlock on the bucket lock.
- */
- vm_page_free(m);
+ found_m = m;
break;
}
mp = &m->next;
- } while (m = *mp);
+ } while ((m = *mp));
+
mem->next = bucket->pages;
} else {
mem->next = VM_PAGE_NULL;
}
+ /*
+ * insert new page at head of hash list
+ */
bucket->pages = mem;
+
simple_unlock(&vm_page_bucket_lock);
+ if (found_m) {
+ /*
+ * there was already a page at the specified
+ * offset for this object... remove it from
+ * the object and free it back to the free list
+ */
+ VM_PAGE_REMOVE(found_m);
+ found_m->tabled = FALSE;
+
+ found_m->object = VM_OBJECT_NULL;
+ found_m->offset = (vm_object_offset_t) -1;
+ object->resident_page_count--;
+
+ if (object->purgable == VM_PURGABLE_VOLATILE ||
+ object->purgable == VM_PURGABLE_EMPTY) {
+ assert(vm_page_purgeable_count > 0);
+ vm_page_purgeable_count--;
+ }
+
+ /*
+ * Return page to the free list.
+ * Note the page is not tabled now
+ */
+ vm_page_free(found_m);
+ }
/*
* Now link into the object's list of backed pages.
*/
- queue_enter(&object->memq, mem, vm_page_t, listq);
+ VM_PAGE_INSERT(mem, object);
mem->tabled = TRUE;
/*
*/
object->resident_page_count++;
+
+ if (object->purgable == VM_PURGABLE_VOLATILE ||
+ object->purgable == VM_PURGABLE_EMPTY) {
+ vm_page_purgeable_count++;
+ }
}
/*
* Removes the given mem entry from the object/offset-page
* table and the object page list.
*
- * The object and page must be locked.
+ * The object and page queues must be locked.
*/
void
"vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
(integer_t)mem->object, (integer_t)mem->offset,
(integer_t)mem, 0,0);
-
+#if DEBUG
+ _mutex_assert(&vm_page_queue_lock, MA_OWNED);
+#endif
+ vm_object_lock_assert_exclusive(mem->object);
assert(mem->tabled);
assert(!mem->cleaning);
VM_PAGE_CHECK(mem);
+
/*
* Remove from the object_object/offset hash table
*/
* Now remove from the object's list of backed pages.
*/
- queue_remove(&mem->object->memq, mem, vm_page_t, listq);
+ VM_PAGE_REMOVE(mem);
/*
* And show that the object has one fewer resident
mem->object->resident_page_count--;
+ if (mem->object->purgable == VM_PURGABLE_VOLATILE ||
+ mem->object->purgable == VM_PURGABLE_EMPTY) {
+ assert(vm_page_purgeable_count > 0);
+ vm_page_purgeable_count--;
+ }
mem->tabled = FALSE;
mem->object = VM_OBJECT_NULL;
- mem->offset = 0;
+ mem->offset = (vm_object_offset_t) -1;
}
/*
* The object must be locked. No side effects.
*/
+unsigned long vm_page_lookup_hint = 0;
+unsigned long vm_page_lookup_hint_next = 0;
+unsigned long vm_page_lookup_hint_prev = 0;
+unsigned long vm_page_lookup_hint_miss = 0;
+unsigned long vm_page_lookup_bucket_NULL = 0;
+unsigned long vm_page_lookup_miss = 0;
+
+
vm_page_t
vm_page_lookup(
register vm_object_t object,
{
register vm_page_t mem;
register vm_page_bucket_t *bucket;
+ queue_entry_t qe;
+
+ vm_object_lock_assert_held(object);
+ mem = object->memq_hint;
+
+ if (mem != VM_PAGE_NULL) {
+ assert(mem->object == object);
+
+ if (mem->offset == offset) {
+ vm_page_lookup_hint++;
+ return mem;
+ }
+ qe = queue_next(&mem->listq);
+
+ if (! queue_end(&object->memq, qe)) {
+ vm_page_t next_page;
+
+ next_page = (vm_page_t) qe;
+ assert(next_page->object == object);
+
+ if (next_page->offset == offset) {
+ vm_page_lookup_hint_next++;
+ object->memq_hint = next_page; /* new hint */
+ return next_page;
+ }
+ }
+ qe = queue_prev(&mem->listq);
+
+ if (! queue_end(&object->memq, qe)) {
+ vm_page_t prev_page;
+
+ prev_page = (vm_page_t) qe;
+ assert(prev_page->object == object);
+ if (prev_page->offset == offset) {
+ vm_page_lookup_hint_prev++;
+ object->memq_hint = prev_page; /* new hint */
+ return prev_page;
+ }
+ }
+ }
/*
- * Search the hash table for this object/offset pair
+ * Search the hash table for this object/offset pair
*/
-
bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+ /*
+ * since we hold the object lock, we are guaranteed that no
+ * new pages can be inserted into this object... this in turn
+ * guarantess that the page we're looking for can't exist
+ * if the bucket it hashes to is currently NULL even when looked
+ * at outside the scope of the hash bucket lock... this is a
+ * really cheap optimiztion to avoid taking the lock
+ */
+ if (bucket->pages == VM_PAGE_NULL) {
+ vm_page_lookup_bucket_NULL++;
+
+ return (VM_PAGE_NULL);
+ }
simple_lock(&vm_page_bucket_lock);
+
for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
VM_PAGE_CHECK(mem);
if ((mem->object == object) && (mem->offset == offset))
}
simple_unlock(&vm_page_bucket_lock);
+ if (mem != VM_PAGE_NULL) {
+ if (object->memq_hint != VM_PAGE_NULL) {
+ vm_page_lookup_hint_miss++;
+ }
+ assert(mem->object == object);
+ object->memq_hint = mem;
+ } else
+ vm_page_lookup_miss++;
+
return(mem);
}
+
/*
* vm_page_rename:
*
vm_page_rename(
register vm_page_t mem,
register vm_object_t new_object,
- vm_object_offset_t new_offset)
+ vm_object_offset_t new_offset,
+ boolean_t encrypted_ok)
{
assert(mem->object != new_object);
+
+ /*
+ * ENCRYPTED SWAP:
+ * The encryption key is based on the page's memory object
+ * (aka "pager") and paging offset. Moving the page to
+ * another VM object changes its "pager" and "paging_offset"
+ * so it has to be decrypted first, or we would lose the key.
+ *
+ * One exception is VM object collapsing, where we transfer pages
+ * from one backing object to its parent object. This operation also
+ * transfers the paging information, so the <pager,paging_offset> info
+ * should remain consistent. The caller (vm_object_do_collapse())
+ * sets "encrypted_ok" in this case.
+ */
+ if (!encrypted_ok && mem->encrypted) {
+ panic("vm_page_rename: page %p is encrypted\n", mem);
+ }
+
/*
* Changes to mem->object require the page lock because
* the pageout daemon uses that lock to get the object.
(integer_t)new_object, (integer_t)new_offset,
(integer_t)mem, 0,0);
- vm_page_lock_queues();
+ vm_page_lockspin_queues();
vm_page_remove(mem);
vm_page_insert(mem, new_object, new_offset);
vm_page_unlock_queues();
vm_page_t mem,
ppnum_t phys_page)
{
+ assert(phys_page);
*mem = vm_page_template;
mem->phys_page = phys_page;
}
int c_vm_page_release_fictitious = 0;
int c_vm_page_more_fictitious = 0;
+extern vm_page_t vm_page_grab_fictitious_common(vm_offset_t phys_addr);
+
vm_page_t
-vm_page_grab_fictitious(void)
+vm_page_grab_fictitious_common(
+ vm_offset_t phys_addr)
{
register vm_page_t m;
m = (vm_page_t)zget(vm_page_zone);
if (m) {
- vm_page_init(m, vm_page_fictitious_addr);
+ vm_page_init(m, phys_addr);
m->fictitious = TRUE;
}
return m;
}
+vm_page_t
+vm_page_grab_fictitious(void)
+{
+ return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
+}
+
+vm_page_t
+vm_page_grab_guard(void)
+{
+ return vm_page_grab_fictitious_common(vm_page_guard_addr);
+}
+
/*
* vm_page_release_fictitious:
*
assert(!m->free);
assert(m->busy);
assert(m->fictitious);
- assert(m->phys_page == vm_page_fictitious_addr);
+ assert(m->phys_page == vm_page_fictitious_addr ||
+ m->phys_page == vm_page_guard_addr);
c_vm_page_release_fictitious++;
-
+#if DEBUG
if (m->free)
panic("vm_page_release_fictitious");
+#endif
m->free = TRUE;
- zfree(vm_page_zone, (vm_offset_t)m);
+ zfree(vm_page_zone, m);
}
/*
void vm_page_more_fictitious(void)
{
- extern vm_map_t zone_map;
register vm_page_t m;
vm_offset_t addr;
kern_return_t retval;
return;
}
- if ((retval = kernel_memory_allocate(zone_map,
- &addr, PAGE_SIZE, VM_PROT_ALL,
- KMA_KOBJECT|KMA_NOPAGEWAIT)) != KERN_SUCCESS) {
+ retval = kernel_memory_allocate(zone_map,
+ &addr, PAGE_SIZE, VM_PROT_ALL,
+ KMA_KOBJECT|KMA_NOPAGEWAIT);
+ if (retval != KERN_SUCCESS) {
/*
* No page was available. Tell the pageout daemon, drop the
* lock to give another thread a chance at it, and
m->fictitious = TRUE;
m++;
}
- zcram(vm_page_zone, addr, PAGE_SIZE);
+ zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
mutex_unlock(&vm_page_alloc_lock);
}
-/*
- * vm_page_convert:
- *
- * Attempt to convert a fictitious page into a real page.
- */
-
-boolean_t
-vm_page_convert(
- register vm_page_t m)
-{
- register vm_page_t real_m;
-
- assert(m->busy);
- assert(m->fictitious);
- assert(!m->dirty);
-
- real_m = vm_page_grab();
- if (real_m == VM_PAGE_NULL)
- return FALSE;
-
- m->phys_page = real_m->phys_page;
- m->fictitious = FALSE;
- m->no_isync = TRUE;
-
- vm_page_lock_queues();
- if (m->active)
- vm_page_active_count++;
- else if (m->inactive)
- vm_page_inactive_count++;
- vm_page_unlock_queues();
-
- real_m->phys_page = vm_page_fictitious_addr;
- real_m->fictitious = TRUE;
-
- vm_page_release_fictitious(real_m);
- return TRUE;
-}
/*
* vm_pool_low():
return( vm_page_free_count < vm_page_free_reserved );
}
+
+
+/*
+ * this is an interface to support bring-up of drivers
+ * on platforms with physical memory > 4G...
+ */
+int vm_himemory_mode = 0;
+
+
+/*
+ * this interface exists to support hardware controllers
+ * incapable of generating DMAs with more than 32 bits
+ * of address on platforms with physical memory > 4G...
+ */
+unsigned int vm_lopage_free_count = 0;
+unsigned int vm_lopage_max_count = 0;
+queue_head_t vm_lopage_queue_free;
+
+vm_page_t
+vm_page_grablo(void)
+{
+ register vm_page_t mem;
+ unsigned int vm_lopage_alloc_count;
+
+ if (vm_lopage_poolsize == 0)
+ return (vm_page_grab());
+
+ mutex_lock(&vm_page_queue_free_lock);
+
+ if (! queue_empty(&vm_lopage_queue_free)) {
+ queue_remove_first(&vm_lopage_queue_free,
+ mem,
+ vm_page_t,
+ pageq);
+ assert(mem->free);
+ assert(mem->busy);
+ assert(!mem->pmapped);
+
+ mem->pageq.next = NULL;
+ mem->pageq.prev = NULL;
+ mem->free = FALSE;
+
+ vm_lopage_free_count--;
+ vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count;
+ if (vm_lopage_alloc_count > vm_lopage_max_count)
+ vm_lopage_max_count = vm_lopage_alloc_count;
+ } else {
+ mem = VM_PAGE_NULL;
+ }
+ mutex_unlock(&vm_page_queue_free_lock);
+
+ return (mem);
+}
+
+
/*
* vm_page_grab:
*
- * Remove a page from the free list.
- * Returns VM_PAGE_NULL if the free list is too small.
+ * first try to grab a page from the per-cpu free list...
+ * this must be done while pre-emption is disabled... if
+ * a page is available, we're done...
+ * if no page is available, grab the vm_page_queue_free_lock
+ * and see if current number of free pages would allow us
+ * to grab at least 1... if not, return VM_PAGE_NULL as before...
+ * if there are pages available, disable preemption and
+ * recheck the state of the per-cpu free list... we could
+ * have been preempted and moved to a different cpu, or
+ * some other thread could have re-filled it... if still
+ * empty, figure out how many pages we can steal from the
+ * global free queue and move to the per-cpu queue...
+ * return 1 of these pages when done... only wakeup the
+ * pageout_scan thread if we moved pages from the global
+ * list... no need for the wakeup if we've satisfied the
+ * request from the per-cpu queue.
*/
-unsigned long vm_page_grab_count = 0; /* measure demand */
+#define COLOR_GROUPS_TO_STEAL 4
+
vm_page_t
-vm_page_grab(void)
+vm_page_grab( void )
{
- register vm_page_t mem;
+ vm_page_t mem;
+
+
+ disable_preemption();
+
+ if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
+return_page_from_cpu_list:
+ PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
+ PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
+ mem->pageq.next = NULL;
+
+ enable_preemption();
+
+ assert(mem->listq.next == NULL && mem->listq.prev == NULL);
+ assert(mem->tabled == FALSE);
+ assert(mem->object == VM_OBJECT_NULL);
+ assert(!mem->laundry);
+ assert(!mem->free);
+ assert(pmap_verify_free(mem->phys_page));
+ assert(mem->busy);
+ assert(!mem->encrypted);
+ assert(!mem->pmapped);
+
+ return mem;
+ }
+ enable_preemption();
+
mutex_lock(&vm_page_queue_free_lock);
- vm_page_grab_count++;
/*
* Optionally produce warnings if the wire or gobble
* Only let privileged threads (involved in pageout)
* dip into the reserved pool.
*/
-
if ((vm_page_free_count < vm_page_free_reserved) &&
- !current_thread()->vm_privilege) {
+ !(current_thread()->options & TH_OPT_VMPRIV)) {
mutex_unlock(&vm_page_queue_free_lock);
mem = VM_PAGE_NULL;
- goto wakeup_pageout;
}
+ else {
+ vm_page_t head;
+ vm_page_t tail;
+ unsigned int pages_to_steal;
+ unsigned int color;
- while (vm_page_queue_free == VM_PAGE_NULL) {
- printf("vm_page_grab: no free pages, trouble expected...\n");
- mutex_unlock(&vm_page_queue_free_lock);
- VM_PAGE_WAIT();
- mutex_lock(&vm_page_queue_free_lock);
- }
+ while ( vm_page_free_count == 0 ) {
- if (--vm_page_free_count < vm_page_free_count_minimum)
- vm_page_free_count_minimum = vm_page_free_count;
- mem = vm_page_queue_free;
- vm_page_queue_free = (vm_page_t) mem->pageq.next;
- mem->free = FALSE;
- mem->no_isync = TRUE;
- mutex_unlock(&vm_page_queue_free_lock);
+ mutex_unlock(&vm_page_queue_free_lock);
+ /*
+ * must be a privileged thread to be
+ * in this state since a non-privileged
+ * thread would have bailed if we were
+ * under the vm_page_free_reserved mark
+ */
+ VM_PAGE_WAIT();
+ mutex_lock(&vm_page_queue_free_lock);
+ }
+
+ disable_preemption();
+
+ if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
+ mutex_unlock(&vm_page_queue_free_lock);
+
+ /*
+ * we got preempted and moved to another processor
+ * or we got preempted and someone else ran and filled the cache
+ */
+ goto return_page_from_cpu_list;
+ }
+ if (vm_page_free_count <= vm_page_free_reserved)
+ pages_to_steal = 1;
+ else {
+ pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
+
+ if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
+ pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
+ }
+ color = PROCESSOR_DATA(current_processor(), start_color);
+ head = tail = NULL;
+
+ while (pages_to_steal--) {
+ if (--vm_page_free_count < vm_page_free_count_minimum)
+ vm_page_free_count_minimum = vm_page_free_count;
+
+ while (queue_empty(&vm_page_queue_free[color]))
+ color = (color + 1) & vm_color_mask;
+
+ queue_remove_first(&vm_page_queue_free[color],
+ mem,
+ vm_page_t,
+ pageq);
+ mem->pageq.next = NULL;
+ mem->pageq.prev = NULL;
+ color = (color + 1) & vm_color_mask;
+
+ if (head == NULL)
+ head = mem;
+ else
+ tail->pageq.next = (queue_t)mem;
+ tail = mem;
+
+ mem->pageq.prev = NULL;
+ assert(mem->listq.next == NULL && mem->listq.prev == NULL);
+ assert(mem->tabled == FALSE);
+ assert(mem->object == VM_OBJECT_NULL);
+ assert(!mem->laundry);
+ assert(mem->free);
+ mem->free = FALSE;
+
+ assert(pmap_verify_free(mem->phys_page));
+ assert(mem->busy);
+ assert(!mem->free);
+ assert(!mem->encrypted);
+ assert(!mem->pmapped);
+ }
+ PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
+ PROCESSOR_DATA(current_processor(), start_color) = color;
+
+ /*
+ * satisfy this request
+ */
+ PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
+ mem = head;
+ mem->pageq.next = NULL;
+
+ mutex_unlock(&vm_page_queue_free_lock);
+
+ enable_preemption();
+ }
/*
* Decide if we should poke the pageout daemon.
* We do this if the free count is less than the low
* We don't have the counts locked ... if they change a little,
* it doesn't really matter.
*/
-
-wakeup_pageout:
if ((vm_page_free_count < vm_page_free_min) ||
((vm_page_free_count < vm_page_free_target) &&
- (vm_page_inactive_count < vm_page_inactive_target)))
- thread_wakeup((event_t) &vm_page_free_wanted);
+ ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
+ thread_wakeup((event_t) &vm_page_free_wanted);
-// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
+#if CONFIG_EMBEDDED
+ {
+ int percent_avail;
- return mem;
-}
+ /*
+ * Decide if we need to poke the memorystatus notification thread.
+ */
+ percent_avail =
+ (vm_page_active_count + vm_page_inactive_count +
+ vm_page_speculative_count + vm_page_free_count +
+ vm_page_purgeable_count ) * 100 /
+ atop_64(max_mem);
+ if (percent_avail <= (kern_memorystatus_level - 5)) {
+ kern_memorystatus_level = percent_avail;
+ thread_wakeup((event_t)&kern_memorystatus_wakeup);
+ }
+ }
+#endif
+
+// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
+
+ return mem;
+}
/*
* vm_page_release:
vm_page_release(
register vm_page_t mem)
{
-
+ unsigned int color;
#if 0
unsigned int pindex;
phys_entry *physent;
}
physent->ppLink = physent->ppLink | ppN; /* (BRINGUP) */
#endif
-
assert(!mem->private && !mem->fictitious);
// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
mutex_lock(&vm_page_queue_free_lock);
+#if DEBUG
if (mem->free)
panic("vm_page_release");
+#endif
mem->free = TRUE;
- mem->pageq.next = (queue_entry_t) vm_page_queue_free;
- vm_page_queue_free = mem;
- vm_page_free_count++;
- /*
- * Check if we should wake up someone waiting for page.
- * But don't bother waking them unless they can allocate.
- *
- * We wakeup only one thread, to prevent starvation.
- * Because the scheduling system handles wait queues FIFO,
- * if we wakeup all waiting threads, one greedy thread
- * can starve multiple niceguy threads. When the threads
- * all wakeup, the greedy threads runs first, grabs the page,
- * and waits for another page. It will be the first to run
- * when the next page is freed.
- *
- * However, there is a slight danger here.
- * The thread we wake might not use the free page.
- * Then the other threads could wait indefinitely
- * while the page goes unused. To forestall this,
- * the pageout daemon will keep making free pages
- * as long as vm_page_free_wanted is non-zero.
- */
+ assert(mem->busy);
+ assert(!mem->laundry);
+ assert(mem->object == VM_OBJECT_NULL);
+ assert(mem->pageq.next == NULL &&
+ mem->pageq.prev == NULL);
+ assert(mem->listq.next == NULL &&
+ mem->listq.prev == NULL);
+
+ if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
+ /*
+ * this exists to support hardware controllers
+ * incapable of generating DMAs with more than 32 bits
+ * of address on platforms with physical memory > 4G...
+ */
+ queue_enter_first(&vm_lopage_queue_free,
+ mem,
+ vm_page_t,
+ pageq);
+ vm_lopage_free_count++;
+ } else {
+ color = mem->phys_page & vm_color_mask;
+ queue_enter_first(&vm_page_queue_free[color],
+ mem,
+ vm_page_t,
+ pageq);
+ vm_page_free_count++;
+ /*
+ * Check if we should wake up someone waiting for page.
+ * But don't bother waking them unless they can allocate.
+ *
+ * We wakeup only one thread, to prevent starvation.
+ * Because the scheduling system handles wait queues FIFO,
+ * if we wakeup all waiting threads, one greedy thread
+ * can starve multiple niceguy threads. When the threads
+ * all wakeup, the greedy threads runs first, grabs the page,
+ * and waits for another page. It will be the first to run
+ * when the next page is freed.
+ *
+ * However, there is a slight danger here.
+ * The thread we wake might not use the free page.
+ * Then the other threads could wait indefinitely
+ * while the page goes unused. To forestall this,
+ * the pageout daemon will keep making free pages
+ * as long as vm_page_free_wanted is non-zero.
+ */
- if ((vm_page_free_wanted > 0) &&
- (vm_page_free_count >= vm_page_free_reserved)) {
- vm_page_free_wanted--;
- thread_wakeup_one((event_t) &vm_page_free_count);
+ if ((vm_page_free_wanted_privileged > 0) && vm_page_free_count) {
+ vm_page_free_wanted_privileged--;
+ thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
+ } else if ((vm_page_free_wanted > 0) &&
+ (vm_page_free_count >= vm_page_free_reserved)) {
+ vm_page_free_wanted--;
+ thread_wakeup_one((event_t) &vm_page_free_count);
+ }
}
-
mutex_unlock(&vm_page_queue_free_lock);
-}
-#define VM_PAGEOUT_DEADLOCK_TIMEOUT 3
+#if CONFIG_EMBEDDED
+ {
+ int percent_avail;
+
+ /*
+ * Decide if we need to poke the memorystatus notification thread.
+ * Locking is not a big issue, as only a single thread delivers these.
+ */
+ percent_avail =
+ (vm_page_active_count + vm_page_inactive_count +
+ vm_page_speculative_count + vm_page_free_count +
+ vm_page_purgeable_count ) * 100 /
+ atop_64(max_mem);
+ if (percent_avail >= (kern_memorystatus_level + 5)) {
+ kern_memorystatus_level = percent_avail;
+ thread_wakeup((event_t)&kern_memorystatus_wakeup);
+ }
+ }
+#endif
+}
/*
* vm_page_wait:
* succeeds, the second fails. After the first page is freed,
* a call to vm_page_wait must really block.
*/
- uint64_t abstime;
kern_return_t wait_result;
- kern_return_t kr;
int need_wakeup = 0;
+ int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
mutex_lock(&vm_page_queue_free_lock);
+
+ if (is_privileged && vm_page_free_count) {
+ mutex_unlock(&vm_page_queue_free_lock);
+ return TRUE;
+ }
if (vm_page_free_count < vm_page_free_target) {
- if (vm_page_free_wanted++ == 0)
- need_wakeup = 1;
- wait_result = assert_wait((event_t)&vm_page_free_count,
- interruptible);
+
+ if (is_privileged) {
+ if (vm_page_free_wanted_privileged++ == 0)
+ need_wakeup = 1;
+ wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
+ } else {
+ if (vm_page_free_wanted++ == 0)
+ need_wakeup = 1;
+ wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
+ }
mutex_unlock(&vm_page_queue_free_lock);
counter(c_vm_page_wait_block++);
if (need_wakeup)
thread_wakeup((event_t)&vm_page_free_wanted);
- if (wait_result == THREAD_WAITING) {
- clock_interval_to_absolutetime_interval(
- VM_PAGEOUT_DEADLOCK_TIMEOUT,
- NSEC_PER_SEC, &abstime);
- clock_absolutetime_interval_to_deadline(
- abstime, &abstime);
- thread_set_timer_deadline(abstime);
+ if (wait_result == THREAD_WAITING)
wait_result = thread_block(THREAD_CONTINUE_NULL);
- if(wait_result == THREAD_TIMED_OUT) {
- kr = vm_pageout_emergency_availability_request();
- return TRUE;
- } else {
- thread_cancel_timer();
- }
- }
-
return(wait_result == THREAD_AWAKENED);
} else {
mutex_unlock(&vm_page_queue_free_lock);
{
register vm_page_t mem;
+ vm_object_lock_assert_exclusive(object);
mem = vm_page_grab();
if (mem == VM_PAGE_NULL)
return VM_PAGE_NULL;
return(mem);
}
+vm_page_t
+vm_page_alloclo(
+ vm_object_t object,
+ vm_object_offset_t offset)
+{
+ register vm_page_t mem;
+
+ vm_object_lock_assert_exclusive(object);
+ mem = vm_page_grablo();
+ if (mem == VM_PAGE_NULL)
+ return VM_PAGE_NULL;
+
+ vm_page_insert(mem, object, offset);
+
+ return(mem);
+}
+
+
+/*
+ * vm_page_alloc_guard:
+ *
+ * Allocate a ficticious page which will be used
+ * as a guard page. The page will be inserted into
+ * the object and returned to the caller.
+ */
+
+vm_page_t
+vm_page_alloc_guard(
+ vm_object_t object,
+ vm_object_offset_t offset)
+{
+ register vm_page_t mem;
+
+ vm_object_lock_assert_exclusive(object);
+ mem = vm_page_grab_guard();
+ if (mem == VM_PAGE_NULL)
+ return VM_PAGE_NULL;
+
+ vm_page_insert(mem, object, offset);
+
+ return(mem);
+}
+
+
counter(unsigned int c_laundry_pages_freed = 0;)
-int vm_pagein_cluster_unused = 0;
-boolean_t vm_page_free_verify = FALSE;
+boolean_t vm_page_free_verify = TRUE;
/*
* vm_page_free:
*
* Object and page queues must be locked prior to entry.
*/
void
-vm_page_free(
+vm_page_free_prepare(
register vm_page_t mem)
{
- vm_object_t object = mem->object;
-
+ VM_PAGE_CHECK(mem);
assert(!mem->free);
assert(!mem->cleaning);
assert(!mem->pageout);
- assert(!vm_page_free_verify || pmap_verify_free(mem->phys_page));
+
+#if DEBUG
+ if (vm_page_free_verify && !mem->fictitious && !mem->private) {
+ assert(pmap_verify_free(mem->phys_page));
+ }
+ if (mem->object)
+ vm_object_lock_assert_exclusive(mem->object);
+ _mutex_assert(&vm_page_queue_lock, MA_OWNED);
+
+ if (mem->free)
+ panic("vm_page_free: freeing page on free list\n");
+#endif
+
+ if (mem->laundry) {
+ /*
+ * We may have to free a page while it's being laundered
+ * if we lost its pager (due to a forced unmount, for example).
+ * We need to call vm_pageout_throttle_up() before removing
+ * the page from its VM object, so that we can find out on
+ * which pageout queue the page is.
+ */
+ vm_pageout_throttle_up(mem);
+ counter(++c_laundry_pages_freed);
+ }
if (mem->tabled)
vm_page_remove(mem); /* clears tabled, object, offset */
- VM_PAGE_QUEUES_REMOVE(mem); /* clears active or inactive */
- if (mem->clustered) {
- mem->clustered = FALSE;
- vm_pagein_cluster_unused++;
- }
+ VM_PAGE_QUEUES_REMOVE(mem); /* clears active/inactive/throttled/speculative */
if (mem->wire_count) {
if (!mem->private && !mem->fictitious)
}
mem->gobbled = FALSE;
- if (mem->laundry) {
- extern int vm_page_laundry_min;
- if (!object->internal)
- vm_page_burst_count--;
- vm_page_laundry_count--;
- mem->laundry = FALSE; /* laundry is now clear */
- counter(++c_laundry_pages_freed);
- if (vm_page_laundry_count < vm_page_laundry_min) {
- vm_page_laundry_min = 0;
- thread_wakeup((event_t) &vm_page_laundry_count);
- }
- }
-
- mem->discard_request = FALSE;
-
PAGE_WAKEUP(mem); /* clears wanted */
- if (mem->absent)
- vm_object_absent_release(object);
-
/* Some of these may be unnecessary */
- mem->page_lock = 0;
- mem->unlock_request = 0;
mem->busy = TRUE;
mem->absent = FALSE;
mem->error = FALSE;
mem->dirty = FALSE;
mem->precious = FALSE;
mem->reference = FALSE;
-
- mem->page_error = KERN_SUCCESS;
+ mem->encrypted = FALSE;
+ mem->encrypted_cleaning = FALSE;
+ mem->deactivated = FALSE;
+ mem->pmapped = FALSE;
if (mem->private) {
mem->private = FALSE;
mem->fictitious = TRUE;
mem->phys_page = vm_page_fictitious_addr;
}
- if (mem->fictitious) {
- vm_page_release_fictitious(mem);
- } else {
- /* depends on the queues lock */
- if(mem->zero_fill) {
- vm_zf_count-=1;
+ if (!mem->fictitious) {
+ if (mem->zero_fill == TRUE) {
mem->zero_fill = FALSE;
+ OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
}
vm_page_init(mem, mem->phys_page);
- vm_page_release(mem);
}
}
+void
+vm_page_free(
+ vm_page_t mem)
+{
+ vm_page_free_prepare(mem);
+ if (mem->fictitious) {
+ vm_page_release_fictitious(mem);
+ } else {
+ vm_page_release(mem);
+ }
+}
+/*
+ * Free a list of pages. The list can be up to several hundred pages,
+ * as blocked up by vm_pageout_scan().
+ * The big win is not having to take the page q and free list locks once
+ * per page. We sort the incoming pages into n lists, one for
+ * each color.
+ *
+ * The page queues must be locked, and are kept locked.
+ */
void
vm_page_free_list(
- register vm_page_t mem)
+ vm_page_t mem)
{
- register vm_page_t nxt;
- register vm_page_t first = NULL;
- register vm_page_t last;
- register int pg_count = 0;
+ vm_page_t nxt;
+ int pg_count = 0;
+ int color;
+ int inuse_list_head = -1;
+ queue_head_t free_list[MAX_COLORS];
+ int inuse[MAX_COLORS];
+ for (color = 0; color < (signed) vm_colors; color++) {
+ queue_init(&free_list[color]);
+ }
+
+#if DEBUG
+ _mutex_assert(&vm_page_queue_lock, MA_OWNED);
+#endif
while (mem) {
- nxt = (vm_page_t)(mem->pageq.next);
-
- if (mem->clustered)
- vm_pagein_cluster_unused++;
-
- if (mem->laundry) {
- extern int vm_page_laundry_min;
-
- if (!mem->object->internal)
- vm_page_burst_count--;
- vm_page_laundry_count--;
- counter(++c_laundry_pages_freed);
-
- if (vm_page_laundry_count < vm_page_laundry_min) {
- vm_page_laundry_min = 0;
- thread_wakeup((event_t) &vm_page_laundry_count);
- }
+#if DEBUG
+ if (mem->tabled || mem->object)
+ panic("vm_page_free_list: freeing tabled page\n");
+ if (mem->inactive || mem->active || mem->throttled || mem->free)
+ panic("vm_page_free_list: freeing page on list\n");
+ if (vm_page_free_verify && !mem->fictitious && !mem->private) {
+ assert(pmap_verify_free(mem->phys_page));
}
- mem->busy = TRUE;
-
- PAGE_WAKEUP(mem); /* clears wanted */
-
- if (mem->private)
- mem->fictitious = TRUE;
+#endif
+ assert(mem->pageq.prev == NULL);
+ assert(mem->busy);
+ assert(!mem->free);
+ nxt = (vm_page_t)(mem->pageq.next);
if (!mem->fictitious) {
- /* depends on the queues lock */
- if (mem->zero_fill)
- vm_zf_count -= 1;
- vm_page_init(mem, mem->phys_page);
-
mem->free = TRUE;
- if (first == NULL)
- last = mem;
- mem->pageq.next = (queue_t) first;
- first = mem;
-
+ color = mem->phys_page & vm_color_mask;
+ if (queue_empty(&free_list[color])) {
+ inuse[color] = inuse_list_head;
+ inuse_list_head = color;
+ }
+ queue_enter_first(&free_list[color],
+ mem,
+ vm_page_t,
+ pageq);
pg_count++;
} else {
- mem->phys_page = vm_page_fictitious_addr;
+ assert(mem->phys_page == vm_page_fictitious_addr ||
+ mem->phys_page == vm_page_guard_addr);
vm_page_release_fictitious(mem);
}
mem = nxt;
}
- if (first) {
-
+ if (pg_count) {
+ unsigned int avail_free_count;
+
mutex_lock(&vm_page_queue_free_lock);
- last->pageq.next = (queue_entry_t) vm_page_queue_free;
- vm_page_queue_free = first;
+ color = inuse_list_head;
+
+ while( color != -1 ) {
+ vm_page_t first, last;
+ vm_page_t first_free;
+
+ first = (vm_page_t) queue_first(&free_list[color]);
+ last = (vm_page_t) queue_last(&free_list[color]);
+ first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
+ if (queue_empty(&vm_page_queue_free[color])) {
+ queue_last(&vm_page_queue_free[color]) =
+ (queue_entry_t) last;
+ } else {
+ queue_prev(&first_free->pageq) =
+ (queue_entry_t) last;
+ }
+ queue_first(&vm_page_queue_free[color]) =
+ (queue_entry_t) first;
+ queue_prev(&first->pageq) =
+ (queue_entry_t) &vm_page_queue_free[color];
+ queue_next(&last->pageq) =
+ (queue_entry_t) first_free;
+ color = inuse[color];
+ }
+
vm_page_free_count += pg_count;
+ avail_free_count = vm_page_free_count;
+
+ while ((vm_page_free_wanted_privileged > 0) && avail_free_count) {
+ vm_page_free_wanted_privileged--;
+ avail_free_count--;
+
+ thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
+ }
if ((vm_page_free_wanted > 0) &&
- (vm_page_free_count >= vm_page_free_reserved)) {
- int available_pages;
+ (avail_free_count >= vm_page_free_reserved)) {
+ unsigned int available_pages;
- available_pages = vm_page_free_count - vm_page_free_reserved;
+ if (avail_free_count >= vm_page_free_reserved) {
+ available_pages = (avail_free_count - vm_page_free_reserved);
+ } else {
+ available_pages = 0;
+ }
if (available_pages >= vm_page_free_wanted) {
vm_page_free_wanted = 0;
}
}
mutex_unlock(&vm_page_queue_free_lock);
+
+#if CONFIG_EMBEDDED
+ {
+ int percent_avail;
+
+ /*
+ * Decide if we need to poke the memorystatus notification thread.
+ */
+ percent_avail =
+ (vm_page_active_count + vm_page_inactive_count +
+ vm_page_speculative_count + vm_page_free_count +
+ vm_page_purgeable_count ) * 100 /
+ atop_64(max_mem);
+ if (percent_avail >= (kern_memorystatus_level + 5)) {
+ kern_memorystatus_level = percent_avail;
+ thread_wakeup((event_t)&kern_memorystatus_wakeup);
+ }
+ }
+#endif
}
}
register vm_page_t mem)
{
-// dbgLog(current_act(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
+// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
VM_PAGE_CHECK(mem);
-
+#if DEBUG
+ if (mem->object)
+ vm_object_lock_assert_exclusive(mem->object);
+ _mutex_assert(&vm_page_queue_lock, MA_OWNED);
+#endif
if (mem->wire_count == 0) {
VM_PAGE_QUEUES_REMOVE(mem);
if (!mem->private && !mem->fictitious && !mem->gobbled)
if (mem->gobbled)
vm_page_gobble_count--;
mem->gobbled = FALSE;
- if(mem->zero_fill) {
- /* depends on the queues lock */
- vm_zf_count-=1;
+ if (mem->zero_fill == TRUE) {
mem->zero_fill = FALSE;
+ OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
}
+ /*
+ * ENCRYPTED SWAP:
+ * The page could be encrypted, but
+ * We don't have to decrypt it here
+ * because we don't guarantee that the
+ * data is actually valid at this point.
+ * The page will get decrypted in
+ * vm_fault_wire() if needed.
+ */
}
assert(!mem->gobbled);
mem->wire_count++;
vm_page_gobble(
register vm_page_t mem)
{
- vm_page_lock_queues();
+ vm_page_lockspin_queues();
VM_PAGE_CHECK(mem);
assert(!mem->gobbled);
register vm_page_t mem)
{
-// dbgLog(current_act(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
+// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
VM_PAGE_CHECK(mem);
assert(mem->wire_count > 0);
-
+#if DEBUG
+ if (mem->object)
+ vm_object_lock_assert_exclusive(mem->object);
+ _mutex_assert(&vm_page_queue_lock, MA_OWNED);
+#endif
if (--mem->wire_count == 0) {
assert(!mem->private && !mem->fictitious);
vm_page_wire_count--;
- queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
- vm_page_active_count++;
- mem->active = TRUE;
+ assert(!mem->laundry);
+ assert(mem->object != kernel_object);
+ assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
+ if (!IP_VALID(memory_manager_default) &&
+ mem->dirty && mem->object->internal &&
+ (mem->object->purgable == VM_PURGABLE_DENY ||
+ mem->object->purgable == VM_PURGABLE_NONVOLATILE)) {
+ queue_enter(&vm_page_queue_throttled, mem, vm_page_t, pageq);
+ vm_page_throttled_count++;
+ mem->throttled = TRUE;
+ } else {
+ queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
+ vm_page_active_count++;
+ mem->active = TRUE;
+ }
mem->reference = TRUE;
}
}
+
/*
* vm_page_deactivate:
*
vm_page_deactivate(
register vm_page_t m)
{
+ boolean_t rapid_age = FALSE;
+
VM_PAGE_CHECK(m);
+ assert(m->object != kernel_object);
+ assert(m->phys_page != vm_page_guard_addr);
// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
-
+#if DEBUG
+ _mutex_assert(&vm_page_queue_lock, MA_OWNED);
+#endif
/*
* This page is no longer very interesting. If it was
* interesting (active or inactive/referenced), then we
*/
if (m->gobbled) { /* can this happen? */
assert(m->wire_count == 0);
+
if (!m->private && !m->fictitious)
vm_page_wire_count--;
vm_page_gobble_count--;
}
if (m->private || (m->wire_count != 0))
return;
- if (m->active || (m->inactive && m->reference)) {
- if (!m->fictitious && !m->absent)
- pmap_clear_reference(m->phys_page);
- m->reference = FALSE;
- VM_PAGE_QUEUES_REMOVE(m);
+
+ if (m->active && m->deactivated == TRUE) {
+ if (!pmap_is_referenced(m->phys_page))
+ rapid_age = TRUE;
}
- if (m->wire_count == 0 && !m->inactive) {
- m->page_ticket = vm_page_ticket;
- vm_page_ticket_roll++;
+ if (rapid_age == FALSE && !m->fictitious && !m->absent)
+ pmap_clear_reference(m->phys_page);
- if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
- vm_page_ticket_roll = 0;
- if(vm_page_ticket == VM_PAGE_TICKET_ROLL_IDS)
- vm_page_ticket= 0;
- else
- vm_page_ticket++;
- }
-
- if(m->zero_fill) {
- queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
+ m->reference = FALSE;
+ m->deactivated = FALSE;
+ m->no_cache = FALSE;
+
+ if (!m->inactive) {
+ VM_PAGE_QUEUES_REMOVE(m);
+
+ assert(!m->laundry);
+ assert(m->pageq.next == NULL && m->pageq.prev == NULL);
+
+ if (!IP_VALID(memory_manager_default) &&
+ m->dirty && m->object->internal &&
+ (m->object->purgable == VM_PURGABLE_DENY ||
+ m->object->purgable == VM_PURGABLE_NONVOLATILE)) {
+ queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
+ m->throttled = TRUE;
+ vm_page_throttled_count++;
} else {
- queue_enter(&vm_page_queue_inactive,
- m, vm_page_t, pageq);
+ if (rapid_age == TRUE ||
+ (!m->fictitious && m->object->named && m->object->ref_count == 1)) {
+ vm_page_speculate(m, FALSE);
+ vm_page_speculative_recreated++;
+ return;
+ } else {
+ if (m->zero_fill) {
+ queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
+ vm_zf_queue_count++;
+ } else {
+ queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
+ }
+ }
+ m->inactive = TRUE;
+ if (!m->fictitious) {
+ vm_page_inactive_count++;
+ token_new_pagecount++;
+ }
}
-
- m->inactive = TRUE;
- if (!m->fictitious)
- vm_page_inactive_count++;
}
}
register vm_page_t m)
{
VM_PAGE_CHECK(m);
-
+#ifdef FIXME_4778297
+ assert(m->object != kernel_object);
+#endif
+ assert(m->phys_page != vm_page_guard_addr);
+#if DEBUG
+ _mutex_assert(&vm_page_queue_lock, MA_OWNED);
+#endif
if (m->gobbled) {
assert(m->wire_count == 0);
if (!m->private && !m->fictitious)
if (m->private)
return;
- if (m->inactive) {
- if (m->zero_fill) {
- queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
+#if DEBUG
+ if (m->active)
+ panic("vm_page_activate: already active");
+#endif
+
+ if (m->speculative) {
+ DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
+ DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
+ }
+
+ VM_PAGE_QUEUES_REMOVE(m);
+
+ if (m->wire_count == 0) {
+ assert(!m->laundry);
+ assert(m->pageq.next == NULL && m->pageq.prev == NULL);
+ if (!IP_VALID(memory_manager_default) &&
+ !m->fictitious && m->dirty && m->object->internal &&
+ (m->object->purgable == VM_PURGABLE_DENY ||
+ m->object->purgable == VM_PURGABLE_NONVOLATILE)) {
+ queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
+ m->throttled = TRUE;
+ vm_page_throttled_count++;
} else {
- queue_remove(&vm_page_queue_inactive,
- m, vm_page_t, pageq);
+ queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
+ m->active = TRUE;
+ if (!m->fictitious)
+ vm_page_active_count++;
}
- if (!m->fictitious)
- vm_page_inactive_count--;
- m->inactive = FALSE;
+ m->reference = TRUE;
+ m->no_cache = FALSE;
}
+}
+
+
+/*
+ * vm_page_speculate:
+ *
+ * Put the specified page on the speculative list (if appropriate).
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_speculate(
+ vm_page_t m,
+ boolean_t new)
+{
+ struct vm_speculative_age_q *aq;
+
+ VM_PAGE_CHECK(m);
+ assert(m->object != kernel_object);
+ assert(!m->speculative && !m->active && !m->inactive && !m->throttled);
+ assert(m->phys_page != vm_page_guard_addr);
+ assert(m->pageq.next == NULL && m->pageq.prev == NULL);
+#if DEBUG
+ _mutex_assert(&vm_page_queue_lock, MA_OWNED);
+#endif
if (m->wire_count == 0) {
- if (m->active)
- panic("vm_page_activate: already active");
+ mach_timespec_t ts;
- queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
- m->active = TRUE;
- m->reference = TRUE;
- if (!m->fictitious)
- vm_page_active_count++;
+ clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec);
+
+ if (vm_page_speculative_count == 0) {
+
+ speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
+ speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
+
+ aq = &vm_page_queue_speculative[speculative_age_index];
+
+ /*
+ * set the timer to begin a new group
+ */
+ aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
+ aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
+
+ ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
+ } else {
+ aq = &vm_page_queue_speculative[speculative_age_index];
+
+ if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
+
+ speculative_age_index++;
+
+ if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
+ speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
+ if (speculative_age_index == speculative_steal_index) {
+ speculative_steal_index = speculative_age_index + 1;
+
+ if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
+ speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
+ }
+ aq = &vm_page_queue_speculative[speculative_age_index];
+
+ if (!queue_empty(&aq->age_q))
+ vm_page_speculate_ageit(aq);
+
+ aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
+ aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
+
+ ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
+ }
+ }
+ enqueue_tail(&aq->age_q, &m->pageq);
+ m->speculative = TRUE;
+ vm_page_speculative_count++;
+
+ if (new == TRUE) {
+ m->object->pages_created++;
+ vm_page_speculative_created++;
+ }
}
}
+
+/*
+ * move pages from the specified aging bin to
+ * the speculative bin that pageout_scan claims from
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
+{
+ struct vm_speculative_age_q *sq;
+ vm_page_t t;
+
+ sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
+
+ if (queue_empty(&sq->age_q)) {
+ sq->age_q.next = aq->age_q.next;
+ sq->age_q.prev = aq->age_q.prev;
+
+ t = (vm_page_t)sq->age_q.next;
+ t->pageq.prev = &sq->age_q;
+
+ t = (vm_page_t)sq->age_q.prev;
+ t->pageq.next = &sq->age_q;
+ } else {
+ t = (vm_page_t)sq->age_q.prev;
+ t->pageq.next = aq->age_q.next;
+
+ t = (vm_page_t)aq->age_q.next;
+ t->pageq.prev = sq->age_q.prev;
+
+ t = (vm_page_t)aq->age_q.prev;
+ t->pageq.next = &sq->age_q;
+
+ sq->age_q.prev = aq->age_q.prev;
+ }
+ queue_init(&aq->age_q);
+}
+
+
+void
+vm_page_lru(
+ vm_page_t m)
+{
+ VM_PAGE_CHECK(m);
+ assert(m->object != kernel_object);
+ assert(m->phys_page != vm_page_guard_addr);
+
+#if DEBUG
+ _mutex_assert(&vm_page_queue_lock, MA_OWNED);
+#endif
+ if (m->active || m->reference)
+ return;
+
+ if (m->private || (m->wire_count != 0))
+ return;
+
+ m->no_cache = FALSE;
+
+ VM_PAGE_QUEUES_REMOVE(m);
+
+ assert(!m->laundry);
+ assert(m->pageq.next == NULL && m->pageq.prev == NULL);
+
+ queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
+ m->inactive = TRUE;
+
+ vm_page_inactive_count++;
+ token_new_pagecount++;
+}
+
+
/*
* vm_page_part_zero_fill:
*
* vm_page_copy:
*
* Copy one page to another
+ *
+ * ENCRYPTED SWAP:
+ * The source page should not be encrypted. The caller should
+ * make sure the page is decrypted first, if necessary.
*/
+int vm_page_copy_cs_validations = 0;
+int vm_page_copy_cs_tainted = 0;
+
void
vm_page_copy(
vm_page_t src_m,
VM_PAGE_CHECK(src_m);
VM_PAGE_CHECK(dest_m);
- pmap_copy_page(src_m->phys_page, dest_m->phys_page);
-}
-
-/*
- * Currently, this is a primitive allocator that grabs
- * free pages from the system, sorts them by physical
- * address, then searches for a region large enough to
- * satisfy the user's request.
- *
- * Additional levels of effort:
- * + steal clean active/inactive pages
- * + force pageouts of dirty pages
- * + maintain a map of available physical
- * memory
- */
-
-#define SET_NEXT_PAGE(m,n) ((m)->pageq.next = (struct queue_entry *) (n))
-
-#if MACH_ASSERT
-int vm_page_verify_contiguous(
- vm_page_t pages,
- unsigned int npages);
-#endif /* MACH_ASSERT */
-
-cpm_counter(unsigned int vpfls_pages_handled = 0;)
-cpm_counter(unsigned int vpfls_head_insertions = 0;)
-cpm_counter(unsigned int vpfls_tail_insertions = 0;)
-cpm_counter(unsigned int vpfls_general_insertions = 0;)
-cpm_counter(unsigned int vpfc_failed = 0;)
-cpm_counter(unsigned int vpfc_satisfied = 0;)
+ /*
+ * ENCRYPTED SWAP:
+ * The source page should not be encrypted at this point.
+ * The destination page will therefore not contain encrypted
+ * data after the copy.
+ */
+ if (src_m->encrypted) {
+ panic("vm_page_copy: source page %p is encrypted\n", src_m);
+ }
+ dest_m->encrypted = FALSE;
+ if (src_m->object != VM_OBJECT_NULL &&
+ src_m->object->code_signed &&
+ !src_m->cs_validated) {
+ /*
+ * We're copying a not-yet-validated page from a
+ * code-signed object.
+ * Whoever ends up mapping the copy page might care about
+ * the original page's integrity, so let's validate the
+ * source page now.
+ */
+ vm_page_copy_cs_validations++;
+ vm_page_validate_cs(src_m);
+ }
+ /*
+ * Propagate the code-signing bits to the copy page.
+ */
+ dest_m->cs_validated = src_m->cs_validated;
+ dest_m->cs_tainted = src_m->cs_tainted;
+ if (dest_m->cs_tainted) {
+ assert(dest_m->cs_validated);
+ vm_page_copy_cs_tainted++;
+ }
+ pmap_copy_page(src_m->phys_page, dest_m->phys_page);
+}
-#if MACH_ASSERT
+#if MACH_ASSERT
/*
* Check that the list of pages is ordered by
* ascending physical address and has no holes.
*/
-int
+static int
vm_page_verify_contiguous(
vm_page_t pages,
unsigned int npages)
{
register vm_page_t m;
unsigned int page_count;
- ppnum_t prev_addr;
+ vm_offset_t prev_addr;
prev_addr = pages->phys_page;
page_count = 1;
for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
if (m->phys_page != prev_addr + 1) {
- printf("m 0x%x prev_addr 0x%x, current addr 0x%x\n",
+ printf("m %p prev_addr 0x%x, current addr 0x%x\n",
m, prev_addr, m->phys_page);
- printf("pages 0x%x page_count %u\n", pages, page_count);
+ printf("pages %p page_count %d\n", pages, page_count);
panic("vm_page_verify_contiguous: not contiguous!");
}
prev_addr = m->phys_page;
++page_count;
}
if (page_count != npages) {
- printf("pages 0x%x actual count 0x%x but requested 0x%x\n",
+ printf("pages %p actual count 0x%x but requested 0x%x\n",
pages, page_count, npages);
panic("vm_page_verify_contiguous: count error");
}
#endif /* MACH_ASSERT */
+#if MACH_ASSERT
/*
- * Find a region large enough to contain at least npages
+ * Check the free lists for proper length etc.
+ */
+static void
+vm_page_verify_free_lists( void )
+{
+ unsigned int color, npages;
+ vm_page_t m;
+ vm_page_t prev_m;
+
+ npages = 0;
+
+ mutex_lock(&vm_page_queue_free_lock);
+
+ for( color = 0; color < vm_colors; color++ ) {
+ prev_m = (vm_page_t) &vm_page_queue_free[color];
+ queue_iterate(&vm_page_queue_free[color],
+ m,
+ vm_page_t,
+ pageq) {
+ if ((vm_page_t) m->pageq.prev != prev_m)
+ panic("vm_page_verify_free_lists: corrupted prev ptr");
+ if ( ! m->free )
+ panic("vm_page_verify_free_lists: not free");
+ if ( ! m->busy )
+ panic("vm_page_verify_free_lists: not busy");
+ if ( (m->phys_page & vm_color_mask) != color)
+ panic("vm_page_verify_free_lists: wrong color");
+ ++npages;
+ prev_m = m;
+ }
+ }
+ if (npages != vm_page_free_count)
+ panic("vm_page_verify_free_lists: npages %u free_count %d",
+ npages, vm_page_free_count);
+
+ mutex_unlock(&vm_page_queue_free_lock);
+}
+#endif /* MACH_ASSERT */
+
+
+
+/*
+ * CONTIGUOUS PAGE ALLOCATION
+ * Additional levels of effort:
+ * + consider pages that are currently 'pmapped'
+ * this could be expensive since we'd have
+ * to ask the pmap layer about there state
+ * + consider dirty pages
+ * either clean them or
+ * copy them to other locations...
+ *
+ * Find a region large enough to contain at least n pages
* of contiguous physical memory.
*
+ * This is done by traversing the vm_page_t array in a linear fashion
+ * we assume that the vm_page_t array has the avaiable physical pages in an
+ * ordered, ascending list... this is currently true of all our implementations
+ * and must remain so... there can be 'holes' in the array... we also can
+ * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
+ * which use to happen via 'vm_page_convert'... that function was no longer
+ * being called and was removed...
+ *
+ * The basic flow consists of stabilizing some of the interesting state of
+ * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
+ * sweep at the beginning of the array looking for pages that meet our criterea
+ * for a 'stealable' page... currently we are pretty conservative... if the page
+ * meets this criterea and is physically contiguous to the previous page in the 'run'
+ * we keep developing it. If we hit a page that doesn't fit, we reset our state
+ * and start to develop a new run... if at this point we've already considered
+ * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
+ * and mutex_pause (which will yield the processor), to keep the latency low w/r
+ * to other threads trying to acquire free pages (or move pages from q to q),
+ * and then continue from the spot we left off... we only make 1 pass through the
+ * array. Once we have a 'run' that is long enough, we'll go into the loop which
+ * which steals the pages from the queues they're currently on... pages on the free
+ * queue can be stolen directly... pages that are on any of the other queues
+ * must be removed from the object they are tabled on... this requires taking the
+ * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
+ * or if the state of the page behind the vm_object lock is no longer viable, we'll
+ * dump the pages we've currently stolen back to the free list, and pick up our
+ * scan from the point where we aborted the 'current' run.
+ *
+ *
* Requirements:
- * - Called while holding vm_page_queue_free_lock.
- * - Doesn't respect vm_page_free_reserved; caller
- * must not ask for more pages than are legal to grab.
+ * - neither vm_page_queue nor vm_free_list lock can be held on entry
*
- * Returns a pointer to a list of gobbled pages or VM_PAGE_NULL.
+ * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
*
* Algorithm:
- * Loop over the free list, extracting one page at a time and
- * inserting those into a sorted sub-list. We stop as soon as
- * there's a contiguous range within the sorted list that can
- * satisfy the contiguous memory request. This contiguous sub-
- * list is chopped out of the sorted sub-list and the remainder
- * of the sorted sub-list is put back onto the beginning of the
- * free list.
*/
+
+#define MAX_CONSIDERED_BEFORE_YIELD 1000
+
+
+#define RESET_STATE_OF_RUN() \
+ MACRO_BEGIN \
+ prevcontaddr = -2; \
+ free_considered = 0; \
+ substitute_needed = 0; \
+ npages = 0; \
+ MACRO_END
+
+
static vm_page_t
vm_page_find_contiguous(
- unsigned int contig_pages)
+ unsigned int contig_pages,
+ ppnum_t max_pnum,
+ boolean_t wire)
{
- vm_page_t sort_list;
- vm_page_t *contfirstprev, contlast;
- vm_page_t m, m1;
+ vm_page_t m = NULL;
ppnum_t prevcontaddr;
- ppnum_t nextcontaddr;
- unsigned int npages;
-
-#if MACH_ASSERT
- /*
- * Verify pages in the free list..
- */
- npages = 0;
- for (m = vm_page_queue_free; m != VM_PAGE_NULL; m = NEXT_PAGE(m))
- ++npages;
- if (npages != vm_page_free_count)
- panic("vm_sort_free_list: prelim: npages %u free_count %d",
- npages, vm_page_free_count);
-#endif /* MACH_ASSERT */
+ unsigned int npages, considered;
+ unsigned int page_idx, start_idx;
+ int free_considered, free_available;
+ int substitute_needed;
+#if MACH_ASSERT
+ uint32_t tv_start_sec, tv_start_usec, tv_end_sec, tv_end_usec;
+ int yielded = 0;
+ int dumped_run = 0;
+ int stolen_pages = 0;
+#endif
- if (contig_pages == 0 || vm_page_queue_free == VM_PAGE_NULL)
+ if (contig_pages == 0)
return VM_PAGE_NULL;
-#define PPNUM_PREV(x) (((x) > 0) ? ((x) - 1) : 0)
-#define PPNUM_NEXT(x) (((x) < PPNUM_MAX) ? ((x) + 1) : PPNUM_MAX)
+#if MACH_ASSERT
+ vm_page_verify_free_lists();
+
+ clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
+#endif
+ vm_page_lock_queues();
+ mutex_lock(&vm_page_queue_free_lock);
- npages = 1;
- contfirstprev = &sort_list;
- contlast = sort_list = vm_page_queue_free;
- vm_page_queue_free = NEXT_PAGE(sort_list);
- SET_NEXT_PAGE(sort_list, VM_PAGE_NULL);
- prevcontaddr = PPNUM_PREV(sort_list->phys_page);
- nextcontaddr = PPNUM_NEXT(sort_list->phys_page);
+ RESET_STATE_OF_RUN();
- while (npages < contig_pages &&
- (m = vm_page_queue_free) != VM_PAGE_NULL)
- {
- cpm_counter(++vpfls_pages_handled);
+ considered = 0;
+ free_available = vm_page_free_count - vm_page_free_reserved;
- /* prepend to existing run? */
- if (m->phys_page == prevcontaddr)
- {
- vm_page_queue_free = NEXT_PAGE(m);
- cpm_counter(++vpfls_head_insertions);
- prevcontaddr = PPNUM_PREV(prevcontaddr);
- SET_NEXT_PAGE(m, *contfirstprev);
- *contfirstprev = m;
- npages++;
- continue; /* no tail expansion check needed */
- }
-
- /* append to tail of existing run? */
- else if (m->phys_page == nextcontaddr)
- {
- vm_page_queue_free = NEXT_PAGE(m);
- cpm_counter(++vpfls_tail_insertions);
- nextcontaddr = PPNUM_NEXT(nextcontaddr);
- SET_NEXT_PAGE(m, NEXT_PAGE(contlast));
- SET_NEXT_PAGE(contlast, m);
- contlast = m;
- npages++;
- }
+ for (page_idx = 0, start_idx = 0;
+ npages < contig_pages && page_idx < vm_pages_count;
+ page_idx++) {
+retry:
+ m = &vm_pages[page_idx];
- /* prepend to the very front of sorted list? */
- else if (m->phys_page < sort_list->phys_page)
- {
- vm_page_queue_free = NEXT_PAGE(m);
- cpm_counter(++vpfls_general_insertions);
- prevcontaddr = PPNUM_PREV(m->phys_page);
- nextcontaddr = PPNUM_NEXT(m->phys_page);
- SET_NEXT_PAGE(m, sort_list);
- contfirstprev = &sort_list;
- contlast = sort_list = m;
- npages = 1;
+ if (max_pnum && m->phys_page > max_pnum) {
+ /* no more low pages... */
+ break;
}
+ if (m->phys_page <= vm_lopage_poolend &&
+ m->phys_page >= vm_lopage_poolstart) {
+ /*
+ * don't want to take pages from our
+ * reserved pool of low memory
+ * so don't consider it which
+ * means starting a new run
+ */
+ RESET_STATE_OF_RUN();
- else /* get to proper place for insertion */
- {
- if (m->phys_page < nextcontaddr)
- {
- prevcontaddr = PPNUM_PREV(sort_list->phys_page);
- nextcontaddr = PPNUM_NEXT(sort_list->phys_page);
- contfirstprev = &sort_list;
- contlast = sort_list;
+ } else if (m->wire_count || m->gobbled ||
+ m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
+ m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
+ m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending) {
+ /*
+ * page is in a transient state
+ * or a state we don't want to deal
+ * with, so don't consider it which
+ * means starting a new run
+ */
+ RESET_STATE_OF_RUN();
+
+ } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
+ /*
+ * page needs to be on one of our queues
+ * in order for it to be stable behind the
+ * locks we hold at this point...
+ * if not, don't consider it which
+ * means starting a new run
+ */
+ RESET_STATE_OF_RUN();
+
+ } else if (!m->free && (!m->tabled || m->busy)) {
+ /*
+ * pages on the free list are always 'busy'
+ * so we couldn't test for 'busy' in the check
+ * for the transient states... pages that are
+ * 'free' are never 'tabled', so we also couldn't
+ * test for 'tabled'. So we check here to make
+ * sure that a non-free page is not busy and is
+ * tabled on an object...
+ * if not, don't consider it which
+ * means starting a new run
+ */
+ RESET_STATE_OF_RUN();
+
+ } else {
+ if (m->phys_page != prevcontaddr + 1) {
npages = 1;
+ start_idx = page_idx;
+ } else {
+ npages++;
}
- for (m1 = NEXT_PAGE(contlast);
- npages < contig_pages &&
- m1 != VM_PAGE_NULL && m1->phys_page < m->phys_page;
- m1 = NEXT_PAGE(m1))
- {
- if (m1->phys_page != nextcontaddr) {
- prevcontaddr = PPNUM_PREV(m1->phys_page);
- contfirstprev = NEXT_PAGE_PTR(contlast);
- npages = 1;
- } else {
- npages++;
+ prevcontaddr = m->phys_page;
+
+ if (m->pmapped || m->dirty)
+ substitute_needed++;
+
+ if (m->free) {
+ free_considered++;
+ }
+ if ((free_considered + substitute_needed) > free_available) {
+ /*
+ * if we let this run continue
+ * we will end up dropping the vm_page_free_count
+ * below the reserve limit... we need to abort
+ * this run, but we can at least re-consider this
+ * page... thus the jump back to 'retry'
+ */
+ RESET_STATE_OF_RUN();
+
+ if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
+ considered++;
+ goto retry;
}
- nextcontaddr = PPNUM_NEXT(m1->phys_page);
- contlast = m1;
+ /*
+ * free_available == 0
+ * so can't consider any free pages... if
+ * we went to retry in this case, we'd
+ * get stuck looking at the same page
+ * w/o making any forward progress
+ * we also want to take this path if we've already
+ * reached our limit that controls the lock latency
+ */
}
+ }
+ if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
+
+ mutex_unlock(&vm_page_queue_free_lock);
+ vm_page_unlock_queues();
+
+ mutex_pause(0);
+
+ vm_page_lock_queues();
+ mutex_lock(&vm_page_queue_free_lock);
+ RESET_STATE_OF_RUN();
/*
- * We may actually already have enough.
- * This could happen if a previous prepend
- * joined up two runs to meet our needs.
- * If so, bail before we take the current
- * page off the free queue.
+ * reset our free page limit since we
+ * dropped the lock protecting the vm_page_free_queue
*/
- if (npages == contig_pages)
- break;
+ free_available = vm_page_free_count - vm_page_free_reserved;
+ considered = 0;
+#if MACH_ASSERT
+ yielded++;
+#endif
+ goto retry;
+ }
+ considered++;
+ }
+ m = VM_PAGE_NULL;
+
+ if (npages != contig_pages)
+ mutex_unlock(&vm_page_queue_free_lock);
+ else {
+ vm_page_t m1;
+ vm_page_t m2;
+ unsigned int cur_idx;
+ unsigned int tmp_start_idx;
+ vm_object_t locked_object = VM_OBJECT_NULL;
+ boolean_t abort_run = FALSE;
+
+ tmp_start_idx = start_idx;
+
+ /*
+ * first pass through to pull the free pages
+ * off of the free queue so that in case we
+ * need substitute pages, we won't grab any
+ * of the free pages in the run... we'll clear
+ * the 'free' bit in the 2nd pass, and even in
+ * an abort_run case, we'll collect all of the
+ * free pages in this run and return them to the free list
+ */
+ while (start_idx < page_idx) {
+
+ m1 = &vm_pages[start_idx++];
+
+ if (m1->free) {
+ unsigned int color;
+
+ color = m1->phys_page & vm_color_mask;
+ queue_remove(&vm_page_queue_free[color],
+ m1,
+ vm_page_t,
+ pageq);
+
+ vm_page_free_count--;
+ }
+ }
+ /*
+ * adjust global freelist counts
+ */
+ if (vm_page_free_count < vm_page_free_count_minimum)
+ vm_page_free_count_minimum = vm_page_free_count;
+
+ /*
+ * we can drop the free queue lock at this point since
+ * we've pulled any 'free' candidates off of the list
+ * we need it dropped so that we can do a vm_page_grab
+ * when substituing for pmapped/dirty pages
+ */
+ mutex_unlock(&vm_page_queue_free_lock);
+
+ start_idx = tmp_start_idx;
+ cur_idx = page_idx - 1;
+
+ while (start_idx++ < page_idx) {
+ /*
+ * must go through the list from back to front
+ * so that the page list is created in the
+ * correct order - low -> high phys addresses
+ */
+ m1 = &vm_pages[cur_idx--];
+
+ if (m1->free) {
+ /*
+ * pages have already been removed from
+ * the free list in the 1st pass
+ */
+ assert(m1->free);
+ assert(m1->busy);
+ assert(!m1->wanted);
+ assert(!m1->laundry);
+ m1->free = FALSE;
- if (m->phys_page != nextcontaddr) {
- contfirstprev = NEXT_PAGE_PTR(contlast);
- prevcontaddr = PPNUM_PREV(m->phys_page);
- nextcontaddr = PPNUM_NEXT(m->phys_page);
- npages = 1;
} else {
- nextcontaddr = PPNUM_NEXT(nextcontaddr);
- npages++;
+ vm_object_t object;
+
+ if (abort_run == TRUE)
+ continue;
+
+ object = m1->object;
+
+ if (object != locked_object) {
+ if (locked_object) {
+ vm_object_unlock(locked_object);
+ locked_object = VM_OBJECT_NULL;
+ }
+ if (vm_object_lock_try(object))
+ locked_object = object;
+ }
+ if (locked_object == VM_OBJECT_NULL ||
+ (m1->wire_count || m1->gobbled ||
+ m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
+ m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
+ m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
+
+ if (locked_object) {
+ vm_object_unlock(locked_object);
+ locked_object = VM_OBJECT_NULL;
+ }
+ tmp_start_idx = cur_idx;
+ abort_run = TRUE;
+ continue;
+ }
+ if (m1->pmapped || m1->dirty) {
+ int refmod;
+ vm_object_offset_t offset;
+
+ m2 = vm_page_grab();
+
+ if (m2 == VM_PAGE_NULL) {
+ if (locked_object) {
+ vm_object_unlock(locked_object);
+ locked_object = VM_OBJECT_NULL;
+ }
+ tmp_start_idx = cur_idx;
+ abort_run = TRUE;
+ continue;
+ }
+ if (m1->pmapped)
+ refmod = pmap_disconnect(m1->phys_page);
+ else
+ refmod = 0;
+ vm_page_copy(m1, m2);
+
+ m2->reference = m1->reference;
+ m2->dirty = m1->dirty;
+
+ if (refmod & VM_MEM_REFERENCED)
+ m2->reference = TRUE;
+ if (refmod & VM_MEM_MODIFIED)
+ m2->dirty = TRUE;
+ offset = m1->offset;
+
+ /*
+ * completely cleans up the state
+ * of the page so that it is ready
+ * to be put onto the free list, or
+ * for this purpose it looks like it
+ * just came off of the free list
+ */
+ vm_page_free_prepare(m1);
+
+ /*
+ * make sure we clear the ref/mod state
+ * from the pmap layer... else we risk
+ * inheriting state from the last time
+ * this page was used...
+ */
+ pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+ /*
+ * now put the substitute page on the object
+ */
+ vm_page_insert_internal(m2, locked_object, offset, TRUE);
+
+ if (m2->reference)
+ vm_page_activate(m2);
+ else
+ vm_page_deactivate(m2);
+
+ PAGE_WAKEUP_DONE(m2);
+
+ } else {
+ /*
+ * completely cleans up the state
+ * of the page so that it is ready
+ * to be put onto the free list, or
+ * for this purpose it looks like it
+ * just came off of the free list
+ */
+ vm_page_free_prepare(m1);
+ }
+#if MACH_ASSERT
+ stolen_pages++;
+#endif
}
- vm_page_queue_free = NEXT_PAGE(m);
- cpm_counter(++vpfls_general_insertions);
- SET_NEXT_PAGE(m, NEXT_PAGE(contlast));
- SET_NEXT_PAGE(contlast, m);
- contlast = m;
+ m1->pageq.next = (queue_entry_t) m;
+ m1->pageq.prev = NULL;
+ m = m1;
}
-
- /* See how many pages are now contiguous after the insertion */
- for (m1 = NEXT_PAGE(m);
- npages < contig_pages &&
- m1 != VM_PAGE_NULL && m1->phys_page == nextcontaddr;
- m1 = NEXT_PAGE(m1))
- {
- nextcontaddr = PPNUM_NEXT(nextcontaddr);
- contlast = m1;
- npages++;
+ if (locked_object) {
+ vm_object_unlock(locked_object);
+ locked_object = VM_OBJECT_NULL;
}
- }
- /* how did we do? */
- if (npages == contig_pages)
- {
- cpm_counter(++vpfc_satisfied);
+ if (abort_run == TRUE) {
+ if (m != VM_PAGE_NULL) {
+ vm_page_free_list(m);
+ }
+#if MACH_ASSERT
+ dumped_run++;
+#endif
+ /*
+ * want the index of the last
+ * page in this run that was
+ * successfully 'stolen', so back
+ * it up 1 for the auto-decrement on use
+ * and 1 more to bump back over this page
+ */
+ page_idx = tmp_start_idx + 2;
+
+ if (page_idx >= vm_pages_count)
+ goto done_scanning;
+
+ mutex_lock(&vm_page_queue_free_lock);
- /* remove the contiguous range from the sorted list */
- m = *contfirstprev;
- *contfirstprev = NEXT_PAGE(contlast);
- SET_NEXT_PAGE(contlast, VM_PAGE_NULL);
- assert(vm_page_verify_contiguous(m, npages));
+ RESET_STATE_OF_RUN();
+
+ /*
+ * reset our free page limit since we
+ * dropped the lock protecting the vm_page_free_queue
+ */
+ free_available = vm_page_free_count - vm_page_free_reserved;
+
+ goto retry;
+ }
- /* inline vm_page_gobble() for each returned page */
for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
- assert(m1->free);
- assert(!m1->wanted);
- m1->free = FALSE;
- m1->no_isync = TRUE;
- m1->gobbled = TRUE;
+
+ if (wire == TRUE)
+ m1->wire_count++;
+ else
+ m1->gobbled = TRUE;
}
+ if (wire == FALSE)
+ vm_page_gobble_count += npages;
+
+ /*
+ * gobbled pages are also counted as wired pages
+ */
vm_page_wire_count += npages;
- vm_page_gobble_count += npages;
- vm_page_free_count -= npages;
- /* stick free list at the tail of the sorted list */
- while ((m1 = *contfirstprev) != VM_PAGE_NULL)
- contfirstprev = (vm_page_t *)&m1->pageq.next;
- *contfirstprev = vm_page_queue_free;
+ assert(vm_page_verify_contiguous(m, npages));
+ }
+done_scanning:
+ vm_page_unlock_queues();
+
+#if MACH_ASSERT
+ clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
+
+ tv_end_sec -= tv_start_sec;
+ if (tv_end_usec < tv_start_usec) {
+ tv_end_sec--;
+ tv_end_usec += 1000000;
}
+ tv_end_usec -= tv_start_usec;
+ if (tv_end_usec >= 1000000) {
+ tv_end_sec++;
+ tv_end_sec -= 1000000;
+ }
+ printf("vm_find_page_contiguous(num=%d,low=%d): found %d pages in %d.%06ds... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
+ contig_pages, max_pnum, npages, tv_end_sec, tv_end_usec, page_idx, yielded, dumped_run, stolen_pages);
- vm_page_queue_free = sort_list;
+ vm_page_verify_free_lists();
+#endif
return m;
}
cpm_allocate(
vm_size_t size,
vm_page_t *list,
+ ppnum_t max_pnum,
boolean_t wire)
{
- register vm_page_t m;
- vm_page_t *first_contig;
- vm_page_t free_list, pages;
- unsigned int npages, n1pages;
- int vm_pages_available;
- boolean_t wakeup;
+ vm_page_t pages;
+ unsigned int npages;
if (size % page_size != 0)
return KERN_INVALID_ARGUMENT;
- vm_page_lock_queues();
- mutex_lock(&vm_page_queue_free_lock);
-
- /*
- * Should also take active and inactive pages
- * into account... One day...
- */
npages = size / page_size;
- vm_pages_available = vm_page_free_count - vm_page_free_reserved;
-
- if (npages > vm_pages_available) {
- mutex_unlock(&vm_page_queue_free_lock);
- vm_page_unlock_queues();
- return KERN_RESOURCE_SHORTAGE;
- }
/*
* Obtain a pointer to a subset of the free
* list large enough to satisfy the request;
* the region will be physically contiguous.
*/
- pages = vm_page_find_contiguous(npages);
-
- /* adjust global freelist counts and determine need for wakeups */
- if (vm_page_free_count < vm_page_free_count_minimum)
- vm_page_free_count_minimum = vm_page_free_count;
-
- wakeup = ((vm_page_free_count < vm_page_free_min) ||
- ((vm_page_free_count < vm_page_free_target) &&
- (vm_page_inactive_count < vm_page_inactive_target)));
-
- mutex_unlock(&vm_page_queue_free_lock);
+ pages = vm_page_find_contiguous(npages, max_pnum, wire);
- if (pages == VM_PAGE_NULL) {
- vm_page_unlock_queues();
+ if (pages == VM_PAGE_NULL)
return KERN_NO_SPACE;
- }
-
/*
- * Walk the returned list, wiring the pages.
+ * determine need for wakeups
*/
- if (wire == TRUE)
- for (m = pages; m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
- /*
- * Essentially inlined vm_page_wire.
- */
- assert(!m->active);
- assert(!m->inactive);
- assert(!m->private);
- assert(!m->fictitious);
- assert(m->wire_count == 0);
- assert(m->gobbled);
- m->gobbled = FALSE;
- m->wire_count++;
- --vm_page_gobble_count;
- }
- vm_page_unlock_queues();
-
- if (wakeup)
+ if ((vm_page_free_count < vm_page_free_min) ||
+ ((vm_page_free_count < vm_page_free_target) &&
+ ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
thread_wakeup((event_t) &vm_page_free_wanted);
+
+#if CONFIG_EMBEDDED
+ {
+ int percent_avail;
+ /*
+ * Decide if we need to poke the memorystatus notification thread.
+ */
+ percent_avail =
+ (vm_page_active_count + vm_page_inactive_count +
+ vm_page_speculative_count + vm_page_free_count +
+ vm_page_purgeable_count ) * 100 /
+ atop_64(max_mem);
+ if (percent_avail <= (kern_memorystatus_level - 5)) {
+ kern_memorystatus_level = percent_avail;
+ thread_wakeup((event_t)&kern_memorystatus_wakeup);
+ }
+ }
+#endif
/*
* The CPM pages should now be available and
* ordered by ascending physical address.
*list = pages;
return KERN_SUCCESS;
}
-
+
#include <mach_vm_debug.h>
#if MACH_VM_DEBUG
hash_info_bucket_t *info,
unsigned int count)
{
- int i;
+ unsigned int i;
if (vm_page_bucket_count < count)
count = vm_page_bucket_count;
*/
void
vm_page_print(
- vm_page_t p)
+ db_addr_t db_addr)
{
- extern db_indent;
+ vm_page_t p;
+
+ p = (vm_page_t) (long) db_addr;
iprintf("page 0x%x\n", p);
printf(", offset=0x%x", p->offset);
printf(", wire_count=%d", p->wire_count);
- iprintf("%sinactive, %sactive, %sgobbled, %slaundry, %sfree, %sref, %sdiscard\n",
+ iprintf("%sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
(p->inactive ? "" : "!"),
(p->active ? "" : "!"),
+ (p->throttled ? "" : "!"),
(p->gobbled ? "" : "!"),
(p->laundry ? "" : "!"),
(p->free ? "" : "!"),
(p->reference ? "" : "!"),
- (p->discard_request ? "" : "!"));
+ (p->encrypted ? "" : "!"));
iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
(p->busy ? "" : "!"),
(p->wanted ? "" : "!"),
(p->cleaning ? "" : "!"),
(p->pageout ? "" : "!"),
(p->clustered ? "" : "!"));
- iprintf("%slock_supplied, %soverwriting, %srestart, %sunusual\n",
- (p->lock_supplied ? "" : "!"),
+ iprintf("%soverwriting, %srestart, %sunusual\n",
(p->overwriting ? "" : "!"),
(p->restart ? "" : "!"),
(p->unusual ? "" : "!"));
iprintf("phys_page=0x%x", p->phys_page);
- printf(", page_error=0x%x", p->page_error);
- printf(", page_lock=0x%x", p->page_lock);
- printf(", unlock_request=%d\n", p->unlock_request);
db_indent -= 2;
}