X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/0b4e3aa066abc0728aacb4bbeb86f53f9737156e..89b3af67bb32e691275bf6fa803d1834b2284115:/osfmk/vm/vm_resident.c diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index 7fd4fc0e6..91b359f80 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -56,6 +62,9 @@ * Resident memory management module. */ +#include + +#include #include #include #include @@ -73,13 +82,17 @@ #include #include #include +#include /* (BRINGUP) */ +#include /* (BRINGUP) */ + +#include /* Variables used to indicate the relative age of pages in the * inactive list */ -int vm_page_ticket_roll = 0; -int vm_page_ticket = 0; +unsigned int vm_page_ticket_roll = 0; +unsigned int vm_page_ticket = 0; /* * Associated with page of user-allocatable memory is a * page structure. @@ -115,8 +128,13 @@ vm_page_bucket_t *vm_page_buckets; /* Array of buckets */ unsigned int vm_page_bucket_count = 0; /* How big is array? */ unsigned int vm_page_hash_mask; /* Mask for hash function */ unsigned int vm_page_hash_shift; /* Shift for hash function */ +uint32_t vm_page_bucket_hash; /* Basic bucket hash */ decl_simple_lock_data(,vm_page_bucket_lock) +vm_page_t +vm_page_lookup_nohint(vm_object_t object, vm_object_offset_t offset); + + #if MACH_PAGE_HASH_STATS /* This routine is only for debug. It is intended to be called by * hand by a developer using a kernel debugger. This routine prints @@ -163,11 +181,9 @@ hash_debug(void) * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT * constants. */ -#ifndef PAGE_SIZE_FIXED -vm_size_t page_size = 4096; -vm_size_t page_mask = 4095; -int page_shift = 12; -#endif /* PAGE_SIZE_FIXED */ +vm_size_t page_size = PAGE_SIZE; +vm_size_t page_mask = PAGE_MASK; +int page_shift = PAGE_SHIFT; /* * Resident page structures are initialized from @@ -185,10 +201,9 @@ struct vm_page vm_page_template; */ vm_page_t vm_page_queue_free; vm_page_t vm_page_queue_fictitious; -decl_mutex_data(,vm_page_queue_free_lock) unsigned int vm_page_free_wanted; -int vm_page_free_count; -int vm_page_fictitious_count; +unsigned int vm_page_free_count; +unsigned int vm_page_fictitious_count; unsigned int vm_page_free_count_minimum; /* debugging */ @@ -203,10 +218,11 @@ unsigned int vm_page_free_count_minimum; /* debugging */ */ zone_t vm_page_zone; decl_mutex_data(,vm_page_alloc_lock) +unsigned int io_throttle_zero_fill; /* * Fictitious pages don't have a physical address, - * but we must initialize phys_addr to something. + * but we must initialize phys_page to something. * For debugging, this should be a strange value * that the pmap module can recognize in assertions. */ @@ -217,25 +233,28 @@ vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1; * queues that are used by the page replacement * system (pageout daemon). These queues are * defined here, but are shared by the pageout - * module. + * module. The inactive queue is broken into + * inactive and zf for convenience as the + * pageout daemon often assignes a higher + * affinity to zf pages */ queue_head_t vm_page_queue_active; queue_head_t vm_page_queue_inactive; -decl_mutex_data(,vm_page_queue_lock) -int vm_page_active_count; -int vm_page_inactive_count; -int vm_page_wire_count; -int vm_page_gobble_count = 0; -int vm_page_wire_count_warning = 0; -int vm_page_gobble_count_warning = 0; - -/* the following fields are protected by the vm_page_queue_lock */ -queue_head_t vm_page_queue_limbo; -int vm_page_limbo_count = 0; /* total pages in limbo */ -int vm_page_limbo_real_count = 0; /* real pages in limbo */ -int vm_page_pin_count = 0; /* number of pinned pages */ - -decl_simple_lock_data(,vm_page_preppin_lock) +unsigned int vm_page_active_count; +unsigned int vm_page_inactive_count; +unsigned int vm_page_wire_count; +unsigned int vm_page_gobble_count = 0; +unsigned int vm_page_wire_count_warning = 0; +unsigned int vm_page_gobble_count_warning = 0; + +unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */ +uint64_t vm_page_purged_count = 0; /* total count of purged pages */ + +ppnum_t vm_lopage_poolstart = 0; +ppnum_t vm_lopage_poolend = 0; +int vm_lopage_poolsize = 0; +uint64_t max_valid_dma_address = 0xffffffffffffffffULL; + /* * Several page replacement parameters are also @@ -243,11 +262,11 @@ decl_simple_lock_data(,vm_page_preppin_lock) * (done here in vm_page_alloc) can trigger the * pageout daemon. */ -int vm_page_free_target = 0; -int vm_page_free_min = 0; -int vm_page_inactive_target = 0; -int vm_page_free_reserved = 0; -int vm_page_laundry_count = 0; +unsigned int vm_page_free_target = 0; +unsigned int vm_page_free_min = 0; +unsigned int vm_page_inactive_target = 0; +unsigned int vm_page_free_reserved = 0; +unsigned int vm_page_throttled_count = 0; /* * The VM system has a couple of heuristics for deciding @@ -271,16 +290,14 @@ boolean_t vm_page_deactivate_hint = TRUE; void vm_set_page_size(void) { -#ifndef PAGE_SIZE_FIXED page_mask = page_size - 1; if ((page_mask & page_size) != 0) panic("vm_set_page_size: page size not a power of two"); for (page_shift = 0; ; page_shift++) - if ((1 << page_shift) == page_size) + if ((1U << page_shift) == page_size) break; -#endif /* PAGE_SIZE_FIXED */ } /* @@ -300,7 +317,7 @@ vm_page_bootstrap( vm_offset_t *endp) { register vm_page_t m; - int i; + unsigned int i; unsigned int log1; unsigned int log2; unsigned int size; @@ -310,14 +327,20 @@ vm_page_bootstrap( */ m = &vm_page_template; - m->object = VM_OBJECT_NULL; /* reset later */ - m->offset = 0; /* reset later */ + m->object = VM_OBJECT_NULL; /* reset later */ + m->offset = (vm_object_offset_t) -1; /* reset later */ m->wire_count = 0; + m->pageq.next = NULL; + m->pageq.prev = NULL; + m->listq.next = NULL; + m->listq.prev = NULL; + m->inactive = FALSE; m->active = FALSE; m->laundry = FALSE; m->free = FALSE; + m->no_isync = TRUE; m->reference = FALSE; m->pageout = FALSE; m->dump_cleaning = FALSE; @@ -337,8 +360,10 @@ vm_page_bootstrap( m->lock_supplied = FALSE; m->unusual = FALSE; m->restart = FALSE; + m->zero_fill = FALSE; + m->encrypted = FALSE; - m->phys_addr = 0; /* reset later */ + m->phys_page = 0; /* reset later */ m->page_lock = VM_PROT_NONE; m->unlock_request = VM_PROT_NONE; @@ -348,15 +373,14 @@ vm_page_bootstrap( * Initialize the page queues. */ - mutex_init(&vm_page_queue_free_lock, ETAP_VM_PAGEQ_FREE); - mutex_init(&vm_page_queue_lock, ETAP_VM_PAGEQ); - simple_lock_init(&vm_page_preppin_lock, ETAP_VM_PREPPIN); + mutex_init(&vm_page_queue_free_lock, 0); + mutex_init(&vm_page_queue_lock, 0); vm_page_queue_free = VM_PAGE_NULL; vm_page_queue_fictitious = VM_PAGE_NULL; queue_init(&vm_page_queue_active); queue_init(&vm_page_queue_inactive); - queue_init(&vm_page_queue_limbo); + queue_init(&vm_page_queue_zf); vm_page_free_wanted = 0; @@ -377,7 +401,7 @@ vm_page_bootstrap( * than the number of physical pages in the system. */ - simple_lock_init(&vm_page_bucket_lock, ETAP_VM_BUCKET); + simple_lock_init(&vm_page_bucket_lock, 0); if (vm_page_bucket_count == 0) { unsigned int npages = pmap_free_pages(); @@ -403,6 +427,10 @@ vm_page_bootstrap( for (log2 = 0; size > 1; log2++) size /= 2; vm_page_hash_shift = log1/2 - log2 + 1; + + vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */ + vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */ + vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */ if (vm_page_hash_mask & vm_page_bucket_count) printf("vm_page_bootstrap: WARNING -- strange page hash\n"); @@ -443,10 +471,12 @@ vm_page_bootstrap( * wired, they nonetheless can't be moved. At this moment, * all VM managed pages are "free", courtesy of pmap_startup. */ - vm_page_wire_count = atop(mem_size) - vm_page_free_count; /* initial value */ + vm_page_wire_count = atop_64(max_mem) - vm_page_free_count; /* initial value */ printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count); vm_page_free_count_minimum = vm_page_free_count; + + simple_lock_init(&vm_paging_lock, 0); } #ifndef MACHINE_PAGES @@ -455,11 +485,12 @@ vm_page_bootstrap( * of two simpler functions, pmap_virtual_space and pmap_next_page. */ -vm_offset_t +void * pmap_steal_memory( vm_size_t size) { - vm_offset_t addr, vaddr, paddr; + vm_offset_t addr, vaddr; + ppnum_t phys_page; /* * We round the size to a round multiple. @@ -500,7 +531,7 @@ pmap_steal_memory( for (vaddr = round_page(addr); vaddr < addr + size; vaddr += PAGE_SIZE) { - if (!pmap_next_page(&paddr)) + if (!pmap_next_page(&phys_page)) panic("pmap_steal_memory"); /* @@ -508,8 +539,9 @@ pmap_steal_memory( * but some pmap modules barf if they are. */ - pmap_enter(kernel_pmap, vaddr, paddr, - VM_PROT_READ|VM_PROT_WRITE, FALSE); + pmap_enter(kernel_pmap, vaddr, phys_page, + VM_PROT_READ|VM_PROT_WRITE, + VM_WIMG_USE_DEFAULT, FALSE); /* * Account for newly stolen memory */ @@ -517,7 +549,7 @@ pmap_steal_memory( } - return addr; + return (void *) addr; } void @@ -525,51 +557,140 @@ pmap_startup( vm_offset_t *startp, vm_offset_t *endp) { - unsigned int i, npages, pages_initialized; - vm_page_t pages; - vm_offset_t paddr; + unsigned int i, npages, pages_initialized, fill, fillval; + vm_page_t pages; + ppnum_t phys_page; + addr64_t tmpaddr; + unsigned int num_of_lopages = 0; + unsigned int last_index; /* * We calculate how many page frames we will have * and then allocate the page structures in one chunk. */ - npages = ((PAGE_SIZE * pmap_free_pages() + - (round_page(virtual_space_start) - virtual_space_start)) / - (PAGE_SIZE + sizeof *pages)); + tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */ + tmpaddr = tmpaddr + (addr64_t)(round_page_32(virtual_space_start) - virtual_space_start); /* Account for any slop */ + npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */ pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages); /* * Initialize the page frames. */ - for (i = 0, pages_initialized = 0; i < npages; i++) { - if (!pmap_next_page(&paddr)) + if (!pmap_next_page(&phys_page)) break; - vm_page_init(&pages[i], paddr); + vm_page_init(&pages[i], phys_page); vm_page_pages++; pages_initialized++; } + /* + * Check if we want to initialize pages to a known value + */ + fill = 0; /* Assume no fill */ + if (PE_parse_boot_arg("fill", &fillval)) fill = 1; /* Set fill */ + + /* + * if vm_lopage_poolsize is non-zero, than we need to reserve + * a pool of pages whose addresess are less than 4G... this pool + * is used by drivers whose hardware can't DMA beyond 32 bits... + * + * note that I'm assuming that the page list is ascending and + * ordered w/r to the physical address + */ + for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) { + vm_page_t m; + + m = &pages[i]; + + if (m->phys_page >= (1 << (32 - PAGE_SHIFT))) + panic("couldn't reserve the lopage pool: not enough lo pages\n"); + + if (m->phys_page < vm_lopage_poolend) + panic("couldn't reserve the lopage pool: page list out of order\n"); + + vm_lopage_poolend = m->phys_page; + + if (vm_lopage_poolstart == 0) + vm_lopage_poolstart = m->phys_page; + else { + if (m->phys_page < vm_lopage_poolstart) + panic("couldn't reserve the lopage pool: page list out of order\n"); + } + + if (fill) + fillPage(m->phys_page, fillval); /* Fill the page with a know value if requested at boot */ + + vm_page_release(m); + } + last_index = i; + + // -debug code remove + if (2 == vm_himemory_mode) { + // free low -> high so high is preferred + for (i = last_index + 1; i <= pages_initialized; i++) { + if(fill) fillPage(pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */ + vm_page_release(&pages[i - 1]); + } + } + else + // debug code remove- + /* * Release pages in reverse order so that physical pages * initially get allocated in ascending addresses. This keeps * the devices (which must address physical memory) happy if * they require several consecutive pages. */ - - for (i = pages_initialized; i > 0; i--) { + for (i = pages_initialized; i > last_index; i--) { + if(fill) fillPage(pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */ vm_page_release(&pages[i - 1]); } +#if 0 + { + vm_page_t xx, xxo, xxl; + int j, k, l; + + j = 0; /* (BRINGUP) */ + xxl = 0; + + for(xx = vm_page_queue_free; xx; xxl = xx, xx = xx->pageq.next) { /* (BRINGUP) */ + j++; /* (BRINGUP) */ + if(j > vm_page_free_count) { /* (BRINGUP) */ + panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl); + } + + l = vm_page_free_count - j; /* (BRINGUP) */ + k = 0; /* (BRINGUP) */ + + if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count); + + for(xxo = xx->pageq.next; xxo; xxo = xxo->pageq.next) { /* (BRINGUP) */ + k++; + if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l); + if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */ + panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo); + } + } + } + + if(j != vm_page_free_count) { /* (BRINGUP) */ + panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count); + } + } +#endif + + /* * We have to re-align virtual_space_start, * because pmap_steal_memory has been using it. */ - virtual_space_start = round_page(virtual_space_start); + virtual_space_start = round_page_32(virtual_space_start); *startp = virtual_space_start; *endp = virtual_space_end; @@ -603,7 +724,7 @@ vm_page_module_init(void) vm_page_zone->count += vm_page_pages; vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size; - mutex_init(&vm_page_alloc_lock, ETAP_VM_PAGE_ALLOC); + mutex_init(&vm_page_alloc_lock, 0); } /* @@ -617,20 +738,20 @@ vm_page_module_init(void) void vm_page_create( - vm_offset_t start, - vm_offset_t end) + ppnum_t start, + ppnum_t end) { - vm_offset_t paddr; - vm_page_t m; + ppnum_t phys_page; + vm_page_t m; - for (paddr = round_page(start); - paddr < trunc_page(end); - paddr += PAGE_SIZE) { + for (phys_page = start; + phys_page < end; + phys_page++) { while ((m = (vm_page_t) vm_page_grab_fictitious()) == VM_PAGE_NULL) vm_page_more_fictitious(); - vm_page_init(m, paddr); + vm_page_init(m, phys_page); vm_page_pages++; vm_page_release(m); } @@ -641,11 +762,10 @@ vm_page_create( * * Distributes the object/offset key pair among hash buckets. * - * NOTE: To get a good hash function, the bucket count should - * be a power of two. + * NOTE: The bucket count must be a power of 2 */ #define vm_page_hash(object, offset) (\ - ( ((natural_t)(vm_offset_t)object<Lock, MA_OWNED); - if (mem->tabled) - panic("vm_page_insert"); - + if (mem->tabled || mem->object != VM_OBJECT_NULL) + panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) " + "already in (obj=%p,off=0x%llx)", + mem, object, offset, mem->object, mem->offset); +#endif assert(!object->internal || offset < object->size); /* only insert "pageout" pages into "pageout" objects, * and normal pages into normal objects */ assert(object->pageout == mem->pageout); + assert(vm_page_lookup(object, offset) == VM_PAGE_NULL); + /* * Record the object/offset pair in this page */ @@ -705,7 +831,7 @@ vm_page_insert( * Now link into the object's list of backed pages. */ - queue_enter(&object->memq, mem, vm_page_t, listq); + VM_PAGE_INSERT(mem, object); mem->tabled = TRUE; /* @@ -713,6 +839,13 @@ vm_page_insert( */ object->resident_page_count++; + + if (object->purgable == VM_OBJECT_PURGABLE_VOLATILE || + object->purgable == VM_OBJECT_PURGABLE_EMPTY) { + vm_page_lock_queues(); + vm_page_purgeable_count++; + vm_page_unlock_queues(); + } } /* @@ -730,13 +863,19 @@ vm_page_replace( register vm_object_t object, register vm_object_offset_t offset) { - register vm_page_bucket_t *bucket; + vm_page_bucket_t *bucket; + vm_page_t found_m = VM_PAGE_NULL; VM_PAGE_CHECK(mem); - - if (mem->tabled) - panic("vm_page_replace"); - +#if DEBUG + _mutex_assert(&object->Lock, MA_OWNED); + _mutex_assert(&vm_page_queue_lock, MA_OWNED); + + if (mem->tabled || mem->object != VM_OBJECT_NULL) + panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) " + "already in (obj=%p,off=0x%llx)", + mem, object, offset, mem->object, mem->offset); +#endif /* * Record the object/offset pair in this page */ @@ -751,44 +890,65 @@ vm_page_replace( bucket = &vm_page_buckets[vm_page_hash(object, offset)]; simple_lock(&vm_page_bucket_lock); + if (bucket->pages) { vm_page_t *mp = &bucket->pages; register vm_page_t m = *mp; + do { if (m->object == object && m->offset == offset) { /* - * Remove page from bucket and from object, - * and return it to the free list. + * Remove old page from hash list */ *mp = m->next; - queue_remove(&object->memq, m, vm_page_t, - listq); - m->tabled = FALSE; - object->resident_page_count--; - - /* - * Return page to the free list. - * Note the page is not tabled now, so this - * won't self-deadlock on the bucket lock. - */ - vm_page_free(m); + found_m = m; break; } mp = &m->next; - } while (m = *mp); + } while ((m = *mp)); + mem->next = bucket->pages; } else { mem->next = VM_PAGE_NULL; } + /* + * insert new page at head of hash list + */ bucket->pages = mem; + simple_unlock(&vm_page_bucket_lock); + if (found_m) { + /* + * there was already a page at the specified + * offset for this object... remove it from + * the object and free it back to the free list + */ + VM_PAGE_REMOVE(found_m); + found_m->tabled = FALSE; + + found_m->object = VM_OBJECT_NULL; + found_m->offset = (vm_object_offset_t) -1; + object->resident_page_count--; + + if (object->purgable == VM_OBJECT_PURGABLE_VOLATILE || + object->purgable == VM_OBJECT_PURGABLE_EMPTY) { + assert(vm_page_purgeable_count > 0); + vm_page_purgeable_count--; + } + + /* + * Return page to the free list. + * Note the page is not tabled now + */ + vm_page_free(found_m); + } /* * Now link into the object's list of backed pages. */ - queue_enter(&object->memq, mem, vm_page_t, listq); + VM_PAGE_INSERT(mem, object); mem->tabled = TRUE; /* @@ -797,6 +957,11 @@ vm_page_replace( */ object->resident_page_count++; + + if (object->purgable == VM_OBJECT_PURGABLE_VOLATILE || + object->purgable == VM_OBJECT_PURGABLE_EMPTY) { + vm_page_purgeable_count++; + } } /* @@ -805,7 +970,7 @@ vm_page_replace( * Removes the given mem entry from the object/offset-page * table and the object page list. * - * The object and page must be locked. + * The object and page queues must be locked. */ void @@ -819,11 +984,15 @@ vm_page_remove( "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n", (integer_t)mem->object, (integer_t)mem->offset, (integer_t)mem, 0,0); - +#if DEBUG + _mutex_assert(&vm_page_queue_lock, MA_OWNED); + _mutex_assert(&mem->object->Lock, MA_OWNED); +#endif assert(mem->tabled); assert(!mem->cleaning); VM_PAGE_CHECK(mem); + /* * Remove from the object_object/offset hash table */ @@ -852,7 +1021,7 @@ vm_page_remove( * Now remove from the object's list of backed pages. */ - queue_remove(&mem->object->memq, mem, vm_page_t, listq); + VM_PAGE_REMOVE(mem); /* * And show that the object has one fewer resident @@ -861,9 +1030,15 @@ vm_page_remove( mem->object->resident_page_count--; + if (mem->object->purgable == VM_OBJECT_PURGABLE_VOLATILE || + mem->object->purgable == VM_OBJECT_PURGABLE_EMPTY) { + assert(vm_page_purgeable_count > 0); + vm_page_purgeable_count--; + } + mem->tabled = FALSE; mem->object = VM_OBJECT_NULL; - mem->offset = 0; + mem->offset = (vm_object_offset_t) -1; } /* @@ -875,14 +1050,105 @@ vm_page_remove( * The object must be locked. No side effects. */ +unsigned long vm_page_lookup_hint = 0; +unsigned long vm_page_lookup_hint_next = 0; +unsigned long vm_page_lookup_hint_prev = 0; +unsigned long vm_page_lookup_hint_miss = 0; + vm_page_t vm_page_lookup( register vm_object_t object, register vm_object_offset_t offset) +{ + register vm_page_t mem; + register vm_page_bucket_t *bucket; + queue_entry_t qe; +#if 0 + _mutex_assert(&object->Lock, MA_OWNED); +#endif + + mem = object->memq_hint; + if (mem != VM_PAGE_NULL) { + assert(mem->object == object); + if (mem->offset == offset) { + vm_page_lookup_hint++; + return mem; + } + qe = queue_next(&mem->listq); + if (! queue_end(&object->memq, qe)) { + vm_page_t next_page; + + next_page = (vm_page_t) qe; + assert(next_page->object == object); + if (next_page->offset == offset) { + vm_page_lookup_hint_next++; + object->memq_hint = next_page; /* new hint */ + return next_page; + } + } + qe = queue_prev(&mem->listq); + if (! queue_end(&object->memq, qe)) { + vm_page_t prev_page; + + prev_page = (vm_page_t) qe; + assert(prev_page->object == object); + if (prev_page->offset == offset) { + vm_page_lookup_hint_prev++; + object->memq_hint = prev_page; /* new hint */ + return prev_page; + } + } + } + + /* + * Search the hash table for this object/offset pair + */ + + bucket = &vm_page_buckets[vm_page_hash(object, offset)]; + + /* + * since we hold the object lock, we are guaranteed that no + * new pages can be inserted into this object... this in turn + * guarantess that the page we're looking for can't exist + * if the bucket it hashes to is currently NULL even when looked + * at outside the scope of the hash bucket lock... this is a + * really cheap optimiztion to avoid taking the lock + */ + if (bucket->pages == VM_PAGE_NULL) { + return (VM_PAGE_NULL); + } + simple_lock(&vm_page_bucket_lock); + + for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) { + VM_PAGE_CHECK(mem); + if ((mem->object == object) && (mem->offset == offset)) + break; + } + simple_unlock(&vm_page_bucket_lock); + + if (mem != VM_PAGE_NULL) { + if (object->memq_hint != VM_PAGE_NULL) { + vm_page_lookup_hint_miss++; + } + assert(mem->object == object); + object->memq_hint = mem; + } + + return(mem); +} + + +vm_page_t +vm_page_lookup_nohint( + vm_object_t object, + vm_object_offset_t offset) { register vm_page_t mem; register vm_page_bucket_t *bucket; +#if 0 + _mutex_assert(&object->Lock, MA_OWNED); +#endif /* * Search the hash table for this object/offset pair */ @@ -896,6 +1162,7 @@ vm_page_lookup( break; } simple_unlock(&vm_page_bucket_lock); + return(mem); } @@ -914,6 +1181,16 @@ vm_page_rename( vm_object_offset_t new_offset) { assert(mem->object != new_object); + /* + * ENCRYPTED SWAP: + * The encryption key is based on the page's memory object + * (aka "pager") and paging offset. Moving the page to + * another VM object changes its "pager" and "paging_offset" + * so it has to be decrypted first. + */ + if (mem->encrypted) { + panic("vm_page_rename: page %p is encrypted\n", mem); + } /* * Changes to mem->object require the page lock because * the pageout daemon uses that lock to get the object. @@ -940,10 +1217,11 @@ vm_page_rename( void vm_page_init( vm_page_t mem, - vm_offset_t phys_addr) + ppnum_t phys_page) { + assert(phys_page); *mem = vm_page_template; - mem->phys_addr = phys_addr; + mem->phys_page = phys_page; } /* @@ -963,7 +1241,6 @@ vm_page_grab_fictitious(void) m = (vm_page_t)zget(vm_page_zone); if (m) { - m->free = FALSE; vm_page_init(m, vm_page_fictitious_addr); m->fictitious = TRUE; } @@ -985,14 +1262,15 @@ vm_page_release_fictitious( assert(!m->free); assert(m->busy); assert(m->fictitious); - assert(m->phys_addr == vm_page_fictitious_addr); + assert(m->phys_page == vm_page_fictitious_addr); c_vm_page_release_fictitious++; - +#if DEBUG if (m->free) panic("vm_page_release_fictitious"); +#endif m->free = TRUE; - zfree(vm_page_zone, (vm_offset_t)m); + zfree(vm_page_zone, m); } /* @@ -1025,7 +1303,6 @@ vm_page_release_fictitious( void vm_page_more_fictitious(void) { - extern vm_map_t zone_map; register vm_page_t m; vm_offset_t addr; kern_return_t retval; @@ -1063,9 +1340,10 @@ void vm_page_more_fictitious(void) return; } - if ((retval = kernel_memory_allocate(zone_map, - &addr, PAGE_SIZE, VM_PROT_ALL, - KMA_KOBJECT|KMA_NOPAGEWAIT)) != KERN_SUCCESS) { + retval = kernel_memory_allocate(zone_map, + &addr, PAGE_SIZE, VM_PROT_ALL, + KMA_KOBJECT|KMA_NOPAGEWAIT); + if (retval != KERN_SUCCESS) { /* * No page was available. Tell the pageout daemon, drop the * lock to give another thread a chance at it, and @@ -1086,7 +1364,7 @@ void vm_page_more_fictitious(void) m->fictitious = TRUE; m++; } - zcram(vm_page_zone, addr, PAGE_SIZE); + zcram(vm_page_zone, (void *) addr, PAGE_SIZE); mutex_unlock(&vm_page_alloc_lock); } @@ -1110,19 +1388,18 @@ vm_page_convert( if (real_m == VM_PAGE_NULL) return FALSE; - m->phys_addr = real_m->phys_addr; + m->phys_page = real_m->phys_page; m->fictitious = FALSE; + m->no_isync = TRUE; vm_page_lock_queues(); - m->no_isync = TRUE; - real_m->no_isync = FALSE; if (m->active) vm_page_active_count++; else if (m->inactive) vm_page_inactive_count++; vm_page_unlock_queues(); - real_m->phys_addr = vm_page_fictitious_addr; + real_m->phys_page = vm_page_fictitious_addr; real_m->fictitious = TRUE; vm_page_release_fictitious(real_m); @@ -1143,6 +1420,55 @@ vm_pool_low(void) return( vm_page_free_count < vm_page_free_reserved ); } + + +/* + * this is an interface to support bring-up of drivers + * on platforms with physical memory > 4G... + */ +int vm_himemory_mode = 0; + + +/* + * this interface exists to support hardware controllers + * incapable of generating DMAs with more than 32 bits + * of address on platforms with physical memory > 4G... + */ +unsigned int vm_lopage_free_count = 0; +unsigned int vm_lopage_max_count = 0; +vm_page_t vm_lopage_queue_free = VM_PAGE_NULL; + +vm_page_t +vm_page_grablo(void) +{ + register vm_page_t mem; + unsigned int vm_lopage_alloc_count; + + if (vm_lopage_poolsize == 0) + return (vm_page_grab()); + + mutex_lock(&vm_page_queue_free_lock); + + if ((mem = vm_lopage_queue_free) != VM_PAGE_NULL) { + + vm_lopage_queue_free = (vm_page_t) mem->pageq.next; + mem->pageq.next = NULL; + mem->pageq.prev = NULL; + mem->free = FALSE; + mem->no_isync = TRUE; + + vm_lopage_free_count--; + vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count; + if (vm_lopage_alloc_count > vm_lopage_max_count) + vm_lopage_max_count = vm_lopage_alloc_count; + } + mutex_unlock(&vm_page_queue_free_lock); + + return (mem); +} + + + /* * vm_page_grab: * @@ -1183,14 +1509,13 @@ vm_page_grab(void) */ if ((vm_page_free_count < vm_page_free_reserved) && - !current_thread()->vm_privilege) { + !(current_thread()->options & TH_OPT_VMPRIV)) { mutex_unlock(&vm_page_queue_free_lock); mem = VM_PAGE_NULL; goto wakeup_pageout; } while (vm_page_queue_free == VM_PAGE_NULL) { - printf("vm_page_grab: no free pages, trouble expected...\n"); mutex_unlock(&vm_page_queue_free_lock); VM_PAGE_WAIT(); mutex_lock(&vm_page_queue_free_lock); @@ -1200,10 +1525,18 @@ vm_page_grab(void) vm_page_free_count_minimum = vm_page_free_count; mem = vm_page_queue_free; vm_page_queue_free = (vm_page_t) mem->pageq.next; + mem->pageq.next = NULL; + mem->pageq.prev = NULL; + assert(mem->listq.next == NULL && mem->listq.prev == NULL); + assert(mem->tabled == FALSE); + assert(mem->object == VM_OBJECT_NULL); + assert(!mem->laundry); mem->free = FALSE; mem->no_isync = TRUE; mutex_unlock(&vm_page_queue_free_lock); + assert(pmap_verify_free(mem->phys_page)); + /* * Decide if we should poke the pageout daemon. * We do this if the free count is less than the low @@ -1221,7 +1554,7 @@ wakeup_pageout: (vm_page_inactive_count < vm_page_inactive_target))) thread_wakeup((event_t) &vm_page_free_wanted); -// dbgLog(mem->phys_addr, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */ +// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */ return mem; } @@ -1236,44 +1569,71 @@ void vm_page_release( register vm_page_t mem) { + +#if 0 + unsigned int pindex; + phys_entry *physent; + + physent = mapping_phys_lookup(mem->phys_page, &pindex); /* (BRINGUP) */ + if(physent->ppLink & ppN) { /* (BRINGUP) */ + panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page); + } + physent->ppLink = physent->ppLink | ppN; /* (BRINGUP) */ +#endif assert(!mem->private && !mem->fictitious); -// dbgLog(mem->phys_addr, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */ +// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */ mutex_lock(&vm_page_queue_free_lock); +#if DEBUG if (mem->free) panic("vm_page_release"); +#endif mem->free = TRUE; - mem->pageq.next = (queue_entry_t) vm_page_queue_free; - vm_page_queue_free = mem; - vm_page_free_count++; - - /* - * Check if we should wake up someone waiting for page. - * But don't bother waking them unless they can allocate. - * - * We wakeup only one thread, to prevent starvation. - * Because the scheduling system handles wait queues FIFO, - * if we wakeup all waiting threads, one greedy thread - * can starve multiple niceguy threads. When the threads - * all wakeup, the greedy threads runs first, grabs the page, - * and waits for another page. It will be the first to run - * when the next page is freed. - * - * However, there is a slight danger here. - * The thread we wake might not use the free page. - * Then the other threads could wait indefinitely - * while the page goes unused. To forestall this, - * the pageout daemon will keep making free pages - * as long as vm_page_free_wanted is non-zero. - */ + assert(!mem->laundry); + assert(mem->object == VM_OBJECT_NULL); + assert(mem->pageq.next == NULL && + mem->pageq.prev == NULL); + + if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) { + /* + * this exists to support hardware controllers + * incapable of generating DMAs with more than 32 bits + * of address on platforms with physical memory > 4G... + */ + mem->pageq.next = (queue_entry_t) vm_lopage_queue_free; + vm_lopage_queue_free = mem; + vm_lopage_free_count++; + } else { + mem->pageq.next = (queue_entry_t) vm_page_queue_free; + vm_page_queue_free = mem; + vm_page_free_count++; + /* + * Check if we should wake up someone waiting for page. + * But don't bother waking them unless they can allocate. + * + * We wakeup only one thread, to prevent starvation. + * Because the scheduling system handles wait queues FIFO, + * if we wakeup all waiting threads, one greedy thread + * can starve multiple niceguy threads. When the threads + * all wakeup, the greedy threads runs first, grabs the page, + * and waits for another page. It will be the first to run + * when the next page is freed. + * + * However, there is a slight danger here. + * The thread we wake might not use the free page. + * Then the other threads could wait indefinitely + * while the page goes unused. To forestall this, + * the pageout daemon will keep making free pages + * as long as vm_page_free_wanted is non-zero. + */ - if ((vm_page_free_wanted > 0) && - (vm_page_free_count >= vm_page_free_reserved)) { - vm_page_free_wanted--; - thread_wakeup_one((event_t) &vm_page_free_count); + if ((vm_page_free_wanted > 0) && + (vm_page_free_count >= vm_page_free_reserved)) { + vm_page_free_wanted--; + thread_wakeup_one((event_t) &vm_page_free_count); + } } - mutex_unlock(&vm_page_queue_free_lock); } @@ -1299,20 +1659,22 @@ vm_page_wait( * succeeds, the second fails. After the first page is freed, * a call to vm_page_wait must really block. */ - kern_return_t wait_result; - int need_wakeup = 0; + kern_return_t wait_result; + int need_wakeup = 0; mutex_lock(&vm_page_queue_free_lock); if (vm_page_free_count < vm_page_free_target) { if (vm_page_free_wanted++ == 0) need_wakeup = 1; - assert_wait((event_t)&vm_page_free_count, interruptible); + wait_result = assert_wait((event_t)&vm_page_free_count, interruptible); mutex_unlock(&vm_page_queue_free_lock); counter(c_vm_page_wait_block++); if (need_wakeup) thread_wakeup((event_t)&vm_page_free_wanted); - wait_result = thread_block((void (*)(void))0); + + if (wait_result == THREAD_WAITING) + wait_result = thread_block(THREAD_CONTINUE_NULL); return(wait_result == THREAD_AWAKENED); } else { @@ -1337,6 +1699,9 @@ vm_page_alloc( { register vm_page_t mem; +#if DEBUG + _mutex_assert(&object->Lock, MA_OWNED); +#endif mem = vm_page_grab(); if (mem == VM_PAGE_NULL) return VM_PAGE_NULL; @@ -1346,10 +1711,31 @@ vm_page_alloc( return(mem); } + +vm_page_t +vm_page_alloclo( + vm_object_t object, + vm_object_offset_t offset) +{ + register vm_page_t mem; + +#if DEBUG + _mutex_assert(&object->Lock, MA_OWNED); +#endif + mem = vm_page_grablo(); + if (mem == VM_PAGE_NULL) + return VM_PAGE_NULL; + + vm_page_insert(mem, object, offset); + + return(mem); +} + + counter(unsigned int c_laundry_pages_freed = 0;) int vm_pagein_cluster_unused = 0; -boolean_t vm_page_free_verify = FALSE; +boolean_t vm_page_free_verify = TRUE; /* * vm_page_free: * @@ -1367,8 +1753,18 @@ vm_page_free( assert(!mem->free); assert(!mem->cleaning); assert(!mem->pageout); - assert(!vm_page_free_verify || pmap_verify_free(mem->phys_addr)); + if (vm_page_free_verify && !mem->fictitious && !mem->private) { + assert(pmap_verify_free(mem->phys_page)); + } +#if DEBUG + if (mem->object) + _mutex_assert(&mem->object->Lock, MA_OWNED); + _mutex_assert(&vm_page_queue_lock, MA_OWNED); + + if (mem->free) + panic("vm_page_free: freeing page on free list\n"); +#endif if (mem->tabled) vm_page_remove(mem); /* clears tabled, object, offset */ VM_PAGE_QUEUES_REMOVE(mem); /* clears active or inactive */ @@ -1391,18 +1787,10 @@ vm_page_free( mem->gobbled = FALSE; if (mem->laundry) { - extern int vm_page_laundry_min; - vm_page_laundry_count--; - mem->laundry = FALSE; /* laundry is now clear */ + vm_pageout_throttle_up(mem); counter(++c_laundry_pages_freed); - if (vm_page_laundry_count < vm_page_laundry_min) { - vm_page_laundry_min = 0; - thread_wakeup((event_t) &vm_page_laundry_count); - } } - mem->discard_request = FALSE; - PAGE_WAKEUP(mem); /* clears wanted */ if (mem->absent) @@ -1417,22 +1805,121 @@ vm_page_free( mem->dirty = FALSE; mem->precious = FALSE; mem->reference = FALSE; + mem->encrypted = FALSE; mem->page_error = KERN_SUCCESS; if (mem->private) { mem->private = FALSE; mem->fictitious = TRUE; - mem->phys_addr = vm_page_fictitious_addr; + mem->phys_page = vm_page_fictitious_addr; } if (mem->fictitious) { vm_page_release_fictitious(mem); } else { - vm_page_init(mem, mem->phys_addr); + /* depends on the queues lock */ + if(mem->zero_fill) { + vm_zf_count-=1; + mem->zero_fill = FALSE; + } + vm_page_init(mem, mem->phys_page); vm_page_release(mem); } } + +void +vm_page_free_list( + register vm_page_t mem) +{ + register vm_page_t nxt; + register vm_page_t first = NULL; + register vm_page_t last = VM_PAGE_NULL; + register int pg_count = 0; + +#if DEBUG + _mutex_assert(&vm_page_queue_lock, MA_OWNED); +#endif + while (mem) { +#if DEBUG + if (mem->tabled || mem->object) + panic("vm_page_free_list: freeing tabled page\n"); + if (mem->inactive || mem->active || mem->free) + panic("vm_page_free_list: freeing page on list\n"); +#endif + assert(mem->pageq.prev == NULL); + nxt = (vm_page_t)(mem->pageq.next); + + if (mem->clustered) + vm_pagein_cluster_unused++; + + if (mem->laundry) { + vm_pageout_throttle_up(mem); + counter(++c_laundry_pages_freed); + } + mem->busy = TRUE; + + PAGE_WAKEUP(mem); /* clears wanted */ + + if (mem->private) + mem->fictitious = TRUE; + + if (!mem->fictitious) { + /* depends on the queues lock */ + if (mem->zero_fill) + vm_zf_count -= 1; + assert(!mem->laundry); + vm_page_init(mem, mem->phys_page); + + mem->free = TRUE; + + if (first == NULL) + last = mem; + mem->pageq.next = (queue_t) first; + first = mem; + + pg_count++; + } else { + mem->phys_page = vm_page_fictitious_addr; + vm_page_release_fictitious(mem); + } + mem = nxt; + } + if (first) { + + mutex_lock(&vm_page_queue_free_lock); + + last->pageq.next = (queue_entry_t) vm_page_queue_free; + vm_page_queue_free = first; + + vm_page_free_count += pg_count; + + if ((vm_page_free_wanted > 0) && + (vm_page_free_count >= vm_page_free_reserved)) { + unsigned int available_pages; + + if (vm_page_free_count >= vm_page_free_reserved) { + available_pages = (vm_page_free_count + - vm_page_free_reserved); + } else { + available_pages = 0; + } + + if (available_pages >= vm_page_free_wanted) { + vm_page_free_wanted = 0; + thread_wakeup((event_t) &vm_page_free_count); + } else { + while (available_pages--) { + vm_page_free_wanted--; + thread_wakeup_one((event_t) &vm_page_free_count); + } + } + } + mutex_unlock(&vm_page_queue_free_lock); + } +} + + /* * vm_page_wire: * @@ -1447,10 +1934,14 @@ vm_page_wire( register vm_page_t mem) { -// dbgLog(current_act(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */ +// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */ VM_PAGE_CHECK(mem); - +#if DEBUG + if (mem->object) + _mutex_assert(&mem->object->Lock, MA_OWNED); + _mutex_assert(&vm_page_queue_lock, MA_OWNED); +#endif if (mem->wire_count == 0) { VM_PAGE_QUEUES_REMOVE(mem); if (!mem->private && !mem->fictitious && !mem->gobbled) @@ -1458,6 +1949,20 @@ vm_page_wire( if (mem->gobbled) vm_page_gobble_count--; mem->gobbled = FALSE; + if(mem->zero_fill) { + /* depends on the queues lock */ + vm_zf_count-=1; + mem->zero_fill = FALSE; + } + /* + * ENCRYPTED SWAP: + * The page could be encrypted, but + * We don't have to decrypt it here + * because we don't guarantee that the + * data is actually valid at this point. + * The page will get decrypted in + * vm_fault_wire() if needed. + */ } assert(!mem->gobbled); mem->wire_count++; @@ -1502,14 +2007,21 @@ vm_page_unwire( register vm_page_t mem) { -// dbgLog(current_act(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */ +// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */ VM_PAGE_CHECK(mem); assert(mem->wire_count > 0); - +#if DEBUG + if (mem->object) + _mutex_assert(&mem->object->Lock, MA_OWNED); + _mutex_assert(&vm_page_queue_lock, MA_OWNED); +#endif if (--mem->wire_count == 0) { assert(!mem->private && !mem->fictitious); vm_page_wire_count--; + assert(!mem->laundry); + assert(mem->object != kernel_object); + assert(mem->pageq.next == NULL && mem->pageq.prev == NULL); queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq); vm_page_active_count++; mem->active = TRUE; @@ -1531,9 +2043,12 @@ vm_page_deactivate( register vm_page_t m) { VM_PAGE_CHECK(m); + assert(m->object != kernel_object); -// dbgLog(m->phys_addr, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */ - +// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */ +#if DEBUG + _mutex_assert(&vm_page_queue_lock, MA_OWNED); +#endif /* * This page is no longer very interesting. If it was * interesting (active or inactive/referenced), then we @@ -1552,7 +2067,7 @@ vm_page_deactivate( return; if (m->active || (m->inactive && m->reference)) { if (!m->fictitious && !m->absent) - pmap_clear_reference(m->phys_addr); + pmap_clear_reference(m->phys_page); m->reference = FALSE; VM_PAGE_QUEUES_REMOVE(m); } @@ -1568,7 +2083,15 @@ vm_page_deactivate( vm_page_ticket++; } - queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq); + assert(!m->laundry); + assert(m->pageq.next == NULL && m->pageq.prev == NULL); + if(m->zero_fill) { + queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq); + } else { + queue_enter(&vm_page_queue_inactive, + m, vm_page_t, pageq); + } + m->inactive = TRUE; if (!m->fictitious) vm_page_inactive_count++; @@ -1588,7 +2111,10 @@ vm_page_activate( register vm_page_t m) { VM_PAGE_CHECK(m); - + assert(m->object != kernel_object); +#if DEBUG + _mutex_assert(&vm_page_queue_lock, MA_OWNED); +#endif if (m->gobbled) { assert(m->wire_count == 0); if (!m->private && !m->fictitious) @@ -1600,15 +2126,26 @@ vm_page_activate( return; if (m->inactive) { - queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); + assert(!m->laundry); + if (m->zero_fill) { + queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq); + } else { + queue_remove(&vm_page_queue_inactive, + m, vm_page_t, pageq); + } + m->pageq.next = NULL; + m->pageq.prev = NULL; if (!m->fictitious) vm_page_inactive_count--; m->inactive = FALSE; } if (m->wire_count == 0) { +#if DEBUG if (m->active) panic("vm_page_activate: already active"); - +#endif + assert(!m->laundry); + assert(m->pageq.next == NULL && m->pageq.prev == NULL); queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); m->active = TRUE; m->reference = TRUE; @@ -1632,7 +2169,7 @@ vm_page_part_zero_fill( VM_PAGE_CHECK(m); #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED - pmap_zero_part_page(m->phys_addr, m_pa, len); + pmap_zero_part_page(m->phys_page, m_pa, len); #else while (1) { tmp = vm_page_grab(); @@ -1673,7 +2210,8 @@ vm_page_zero_fill( VM_PAGE_CHECK(m); - pmap_zero_page(m->phys_addr); +// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */ + pmap_zero_page(m->phys_page); } /* @@ -1693,14 +2231,18 @@ vm_page_part_copy( VM_PAGE_CHECK(src_m); VM_PAGE_CHECK(dst_m); - pmap_copy_part_page(src_m->phys_addr, src_pa, - dst_m->phys_addr, dst_pa, len); + pmap_copy_part_page(src_m->phys_page, src_pa, + dst_m->phys_page, dst_pa, len); } /* * vm_page_copy: * * Copy one page to another + * + * ENCRYPTED SWAP: + * The source page should not be encrypted. The caller should + * make sure the page is decrypted first, if necessary. */ void @@ -1717,7 +2259,18 @@ vm_page_copy( VM_PAGE_CHECK(src_m); VM_PAGE_CHECK(dest_m); - pmap_copy_page(src_m->phys_addr, dest_m->phys_addr); + /* + * ENCRYPTED SWAP: + * The source page should not be encrypted at this point. + * The destination page will therefore not contain encrypted + * data after the copy. + */ + if (src_m->encrypted) { + panic("vm_page_copy: source page %p is encrypted\n", src_m); + } + dest_m->encrypted = FALSE; + + pmap_copy_page(src_m->phys_page, dest_m->phys_page); } /* @@ -1733,115 +2286,15 @@ vm_page_copy( * memory */ -#define SET_NEXT_PAGE(m,n) ((m)->pageq.next = (struct queue_entry *) (n)) - -#if MACH_ASSERT -int vm_page_verify_contiguous( - vm_page_t pages, - unsigned int npages); -#endif /* MACH_ASSERT */ - -cpm_counter(unsigned int vpfls_pages_handled = 0;) -cpm_counter(unsigned int vpfls_head_insertions = 0;) -cpm_counter(unsigned int vpfls_tail_insertions = 0;) -cpm_counter(unsigned int vpfls_general_insertions = 0;) -cpm_counter(unsigned int vpfc_failed = 0;) -cpm_counter(unsigned int vpfc_satisfied = 0;) - -/* - * Sort free list by ascending physical address, - * using a not-particularly-bright sort algorithm. - * Caller holds vm_page_queue_free_lock. - */ -static void -vm_page_free_list_sort(void) -{ - vm_page_t sort_list; - vm_page_t sort_list_end; - vm_page_t m, m1, *prev, next_m; - vm_offset_t addr; -#if MACH_ASSERT - unsigned int npages; - int old_free_count; -#endif /* MACH_ASSERT */ - -#if MACH_ASSERT - /* - * Verify pages in the free list.. - */ - npages = 0; - for (m = vm_page_queue_free; m != VM_PAGE_NULL; m = NEXT_PAGE(m)) - ++npages; - if (npages != vm_page_free_count) - panic("vm_sort_free_list: prelim: npages %d free_count %d", - npages, vm_page_free_count); - old_free_count = vm_page_free_count; -#endif /* MACH_ASSERT */ - - sort_list = sort_list_end = vm_page_queue_free; - m = NEXT_PAGE(vm_page_queue_free); - SET_NEXT_PAGE(vm_page_queue_free, VM_PAGE_NULL); - cpm_counter(vpfls_pages_handled = 0); - while (m != VM_PAGE_NULL) { - cpm_counter(++vpfls_pages_handled); - next_m = NEXT_PAGE(m); - if (m->phys_addr < sort_list->phys_addr) { - cpm_counter(++vpfls_head_insertions); - SET_NEXT_PAGE(m, sort_list); - sort_list = m; - } else if (m->phys_addr > sort_list_end->phys_addr) { - cpm_counter(++vpfls_tail_insertions); - SET_NEXT_PAGE(sort_list_end, m); - SET_NEXT_PAGE(m, VM_PAGE_NULL); - sort_list_end = m; - } else { - cpm_counter(++vpfls_general_insertions); - /* general sorted list insertion */ - prev = &sort_list; - for (m1=sort_list; m1!=VM_PAGE_NULL; m1=NEXT_PAGE(m1)) { - if (m1->phys_addr > m->phys_addr) { - if (*prev != m1) - panic("vm_sort_free_list: ugh"); - SET_NEXT_PAGE(m, *prev); - *prev = m; - break; - } - prev = (vm_page_t *) &m1->pageq.next; - } - } - m = next_m; - } - -#if MACH_ASSERT - /* - * Verify that pages are sorted into ascending order. - */ - for (m = sort_list, npages = 0; m != VM_PAGE_NULL; m = NEXT_PAGE(m)) { - if (m != sort_list && - m->phys_addr <= addr) { - printf("m 0x%x addr 0x%x\n", m, addr); - panic("vm_sort_free_list"); - } - addr = m->phys_addr; - ++npages; - } - if (old_free_count != vm_page_free_count) - panic("vm_sort_free_list: old_free %d free_count %d", - old_free_count, vm_page_free_count); - if (npages != vm_page_free_count) - panic("vm_sort_free_list: npages %d free_count %d", - npages, vm_page_free_count); -#endif /* MACH_ASSERT */ - - vm_page_queue_free = sort_list; -} - - #if MACH_ASSERT /* * Check that the list of pages is ordered by * ascending physical address and has no holes. */ +int vm_page_verify_contiguous( + vm_page_t pages, + unsigned int npages); + int vm_page_verify_contiguous( vm_page_t pages, @@ -1851,16 +2304,16 @@ vm_page_verify_contiguous( unsigned int page_count; vm_offset_t prev_addr; - prev_addr = pages->phys_addr; + prev_addr = pages->phys_page; page_count = 1; for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) { - if (m->phys_addr != prev_addr + page_size) { + if (m->phys_page != prev_addr + 1) { printf("m 0x%x prev_addr 0x%x, current addr 0x%x\n", - m, prev_addr, m->phys_addr); + m, prev_addr, m->phys_page); printf("pages 0x%x page_count %d\n", pages, page_count); panic("vm_page_verify_contiguous: not contiguous!"); } - prev_addr = m->phys_addr; + prev_addr = m->phys_page; ++page_count; } if (page_count != npages) { @@ -1873,6 +2326,13 @@ vm_page_verify_contiguous( #endif /* MACH_ASSERT */ +cpm_counter(unsigned int vpfls_pages_handled = 0;) +cpm_counter(unsigned int vpfls_head_insertions = 0;) +cpm_counter(unsigned int vpfls_tail_insertions = 0;) +cpm_counter(unsigned int vpfls_general_insertions = 0;) +cpm_counter(unsigned int vpfc_failed = 0;) +cpm_counter(unsigned int vpfc_satisfied = 0;) + /* * Find a region large enough to contain at least npages * of contiguous physical memory. @@ -1884,61 +2344,196 @@ vm_page_verify_contiguous( * * Returns a pointer to a list of gobbled pages or VM_PAGE_NULL. * + * Algorithm: + * Loop over the free list, extracting one page at a time and + * inserting those into a sorted sub-list. We stop as soon as + * there's a contiguous range within the sorted list that can + * satisfy the contiguous memory request. This contiguous sub- + * list is chopped out of the sorted sub-list and the remainder + * of the sorted sub-list is put back onto the beginning of the + * free list. */ static vm_page_t vm_page_find_contiguous( - int npages) + unsigned int contig_pages) { - vm_page_t m, *contig_prev, *prev_ptr; - vm_offset_t prev_addr; - unsigned int contig_npages; - vm_page_t list; + vm_page_t sort_list; + vm_page_t *contfirstprev, contlast; + vm_page_t m, m1; + ppnum_t prevcontaddr; + ppnum_t nextcontaddr; + unsigned int npages; + + m = NULL; +#if DEBUG + _mutex_assert(&vm_page_queue_free_lock, MA_OWNED); +#endif +#if MACH_ASSERT + /* + * Verify pages in the free list.. + */ + npages = 0; + for (m = vm_page_queue_free; m != VM_PAGE_NULL; m = NEXT_PAGE(m)) + ++npages; + if (npages != vm_page_free_count) + panic("vm_sort_free_list: prelim: npages %u free_count %d", + npages, vm_page_free_count); +#endif /* MACH_ASSERT */ - if (npages < 1) + if (contig_pages == 0 || vm_page_queue_free == VM_PAGE_NULL) return VM_PAGE_NULL; - prev_addr = vm_page_queue_free->phys_addr - (page_size + 1); - prev_ptr = &vm_page_queue_free; - for (m = vm_page_queue_free; m != VM_PAGE_NULL; m = NEXT_PAGE(m)) { +#define PPNUM_PREV(x) (((x) > 0) ? ((x) - 1) : 0) +#define PPNUM_NEXT(x) (((x) < PPNUM_MAX) ? ((x) + 1) : PPNUM_MAX) +#define SET_NEXT_PAGE(m,n) ((m)->pageq.next = (struct queue_entry *) (n)) + + npages = 1; + contfirstprev = &sort_list; + contlast = sort_list = vm_page_queue_free; + vm_page_queue_free = NEXT_PAGE(sort_list); + SET_NEXT_PAGE(sort_list, VM_PAGE_NULL); + prevcontaddr = PPNUM_PREV(sort_list->phys_page); + nextcontaddr = PPNUM_NEXT(sort_list->phys_page); + + while (npages < contig_pages && + (m = vm_page_queue_free) != VM_PAGE_NULL) + { + cpm_counter(++vpfls_pages_handled); - if (m->phys_addr != prev_addr + page_size) { - /* - * Whoops! Pages aren't contiguous. Start over. - */ - contig_npages = 0; - contig_prev = prev_ptr; + /* prepend to existing run? */ + if (m->phys_page == prevcontaddr) + { + vm_page_queue_free = NEXT_PAGE(m); + cpm_counter(++vpfls_head_insertions); + prevcontaddr = PPNUM_PREV(prevcontaddr); + SET_NEXT_PAGE(m, *contfirstprev); + *contfirstprev = m; + npages++; + continue; /* no tail expansion check needed */ + } + + /* append to tail of existing run? */ + else if (m->phys_page == nextcontaddr) + { + vm_page_queue_free = NEXT_PAGE(m); + cpm_counter(++vpfls_tail_insertions); + nextcontaddr = PPNUM_NEXT(nextcontaddr); + SET_NEXT_PAGE(m, NEXT_PAGE(contlast)); + SET_NEXT_PAGE(contlast, m); + contlast = m; + npages++; + } + + /* prepend to the very front of sorted list? */ + else if (m->phys_page < sort_list->phys_page) + { + vm_page_queue_free = NEXT_PAGE(m); + cpm_counter(++vpfls_general_insertions); + prevcontaddr = PPNUM_PREV(m->phys_page); + nextcontaddr = PPNUM_NEXT(m->phys_page); + SET_NEXT_PAGE(m, sort_list); + contfirstprev = &sort_list; + contlast = sort_list = m; + npages = 1; } - if (++contig_npages == npages) { + else /* get to proper place for insertion */ + { + if (m->phys_page < nextcontaddr) + { + prevcontaddr = PPNUM_PREV(sort_list->phys_page); + nextcontaddr = PPNUM_NEXT(sort_list->phys_page); + contfirstprev = &sort_list; + contlast = sort_list; + npages = 1; + } + for (m1 = NEXT_PAGE(contlast); + npages < contig_pages && + m1 != VM_PAGE_NULL && m1->phys_page < m->phys_page; + m1 = NEXT_PAGE(m1)) + { + if (m1->phys_page != nextcontaddr) { + prevcontaddr = PPNUM_PREV(m1->phys_page); + contfirstprev = NEXT_PAGE_PTR(contlast); + npages = 1; + } else { + npages++; + } + nextcontaddr = PPNUM_NEXT(m1->phys_page); + contlast = m1; + } + /* - * Chop these pages out of the free list. - * Mark them all as gobbled. + * We may actually already have enough. + * This could happen if a previous prepend + * joined up two runs to meet our needs. + * If so, bail before we take the current + * page off the free queue. */ - list = *contig_prev; - *contig_prev = NEXT_PAGE(m); - SET_NEXT_PAGE(m, VM_PAGE_NULL); - for (m = list; m != VM_PAGE_NULL; m = NEXT_PAGE(m)) { - assert(m->free); - assert(!m->wanted); - m->free = FALSE; - m->gobbled = TRUE; + if (npages == contig_pages) + break; + + if (m->phys_page != nextcontaddr) + { + contfirstprev = NEXT_PAGE_PTR(contlast); + prevcontaddr = PPNUM_PREV(m->phys_page); + nextcontaddr = PPNUM_NEXT(m->phys_page); + npages = 1; + } else { + nextcontaddr = PPNUM_NEXT(nextcontaddr); + npages++; } - vm_page_free_count -= npages; - if (vm_page_free_count < vm_page_free_count_minimum) - vm_page_free_count_minimum = vm_page_free_count; - vm_page_wire_count += npages; - vm_page_gobble_count += npages; - cpm_counter(++vpfc_satisfied); - assert(vm_page_verify_contiguous(list, contig_npages)); - return list; + vm_page_queue_free = NEXT_PAGE(m); + cpm_counter(++vpfls_general_insertions); + SET_NEXT_PAGE(m, NEXT_PAGE(contlast)); + SET_NEXT_PAGE(contlast, m); + contlast = m; + } + + /* See how many pages are now contiguous after the insertion */ + for (m1 = NEXT_PAGE(m); + npages < contig_pages && + m1 != VM_PAGE_NULL && m1->phys_page == nextcontaddr; + m1 = NEXT_PAGE(m1)) + { + nextcontaddr = PPNUM_NEXT(nextcontaddr); + contlast = m1; + npages++; } + } - assert(contig_npages < npages); - prev_ptr = (vm_page_t *) &m->pageq.next; - prev_addr = m->phys_addr; + /* how did we do? */ + if (npages == contig_pages) + { + cpm_counter(++vpfc_satisfied); + + /* remove the contiguous range from the sorted list */ + m = *contfirstprev; + *contfirstprev = NEXT_PAGE(contlast); + SET_NEXT_PAGE(contlast, VM_PAGE_NULL); + assert(vm_page_verify_contiguous(m, npages)); + + /* inline vm_page_gobble() for each returned page */ + for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) { + assert(m1->free); + assert(!m1->wanted); + assert(!m1->laundry); + m1->free = FALSE; + m1->no_isync = TRUE; + m1->gobbled = TRUE; + } + vm_page_wire_count += npages; + vm_page_gobble_count += npages; + vm_page_free_count -= npages; + + /* stick free list at the tail of the sorted list */ + while ((m1 = *contfirstprev) != VM_PAGE_NULL) + contfirstprev = (vm_page_t *)&m1->pageq.next; + *contfirstprev = vm_page_queue_free; } - cpm_counter(++vpfc_failed); - return VM_PAGE_NULL; + + vm_page_queue_free = sort_list; + return m; } /* @@ -1951,10 +2546,10 @@ cpm_allocate( boolean_t wire) { register vm_page_t m; - vm_page_t *first_contig; - vm_page_t free_list, pages; - unsigned int npages, n1pages; - int vm_pages_available; + vm_page_t pages; + unsigned int npages; + unsigned int vm_pages_available; + boolean_t wakeup; if (size % page_size != 0) return KERN_INVALID_ARGUMENT; @@ -1966,31 +2561,37 @@ cpm_allocate( * Should also take active and inactive pages * into account... One day... */ + npages = size / page_size; vm_pages_available = vm_page_free_count - vm_page_free_reserved; - if (size > vm_pages_available * page_size) { + if (npages > vm_pages_available) { mutex_unlock(&vm_page_queue_free_lock); + vm_page_unlock_queues(); return KERN_RESOURCE_SHORTAGE; } - vm_page_free_list_sort(); - - npages = size / page_size; - /* * Obtain a pointer to a subset of the free * list large enough to satisfy the request; * the region will be physically contiguous. */ pages = vm_page_find_contiguous(npages); + + /* adjust global freelist counts and determine need for wakeups */ + if (vm_page_free_count < vm_page_free_count_minimum) + vm_page_free_count_minimum = vm_page_free_count; + + wakeup = ((vm_page_free_count < vm_page_free_min) || + ((vm_page_free_count < vm_page_free_target) && + (vm_page_inactive_count < vm_page_inactive_target))); + + mutex_unlock(&vm_page_queue_free_lock); + if (pages == VM_PAGE_NULL) { - mutex_unlock(&vm_page_queue_free_lock); vm_page_unlock_queues(); return KERN_NO_SPACE; } - mutex_unlock(&vm_page_queue_free_lock); - /* * Walk the returned list, wiring the pages. */ @@ -2011,6 +2612,9 @@ cpm_allocate( } vm_page_unlock_queues(); + if (wakeup) + thread_wakeup((event_t) &vm_page_free_wanted); + /* * The CPM pages should now be available and * ordered by ascending physical address. @@ -2044,7 +2648,7 @@ vm_page_info( hash_info_bucket_t *info, unsigned int count) { - int i; + unsigned int i; if (vm_page_bucket_count < count) count = vm_page_bucket_count; @@ -2079,9 +2683,11 @@ vm_page_info( */ void vm_page_print( - vm_page_t p) + db_addr_t db_addr) { - extern db_indent; + vm_page_t p; + + p = (vm_page_t) (long) db_addr; iprintf("page 0x%x\n", p); @@ -2091,14 +2697,14 @@ vm_page_print( printf(", offset=0x%x", p->offset); printf(", wire_count=%d", p->wire_count); - iprintf("%sinactive, %sactive, %sgobbled, %slaundry, %sfree, %sref, %sdiscard\n", + iprintf("%sinactive, %sactive, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n", (p->inactive ? "" : "!"), (p->active ? "" : "!"), (p->gobbled ? "" : "!"), (p->laundry ? "" : "!"), (p->free ? "" : "!"), (p->reference ? "" : "!"), - (p->discard_request ? "" : "!")); + (p->encrypted ? "" : "!")); iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n", (p->busy ? "" : "!"), (p->wanted ? "" : "!"), @@ -2119,7 +2725,7 @@ vm_page_print( (p->restart ? "" : "!"), (p->unusual ? "" : "!")); - iprintf("phys_addr=0x%x", p->phys_addr); + iprintf("phys_page=0x%x", p->phys_page); printf(", page_error=0x%x", p->page_error); printf(", page_lock=0x%x", p->page_lock); printf(", unlock_request=%d\n", p->unlock_request);