/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
- *
- * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* Please see the License for the specific language governing rights and
* limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
* The proverbial page-out daemon.
*/
+#include <stdint.h>
+
+#include <debug.h>
#include <mach_pagemap.h>
#include <mach_cluster_stats.h>
#include <mach_kdb.h>
#include <mach/memory_object_default.h>
#include <mach/memory_object_control_server.h>
#include <mach/mach_host_server.h>
+#include <mach/upl.h>
+#include <mach/vm_map.h>
#include <mach/vm_param.h>
#include <mach/vm_statistics.h>
-#include <kern/host_statistics.h>
+
+#include <kern/kern_types.h>
#include <kern/counters.h>
+#include <kern/host_statistics.h>
+#include <kern/machine.h>
+#include <kern/misc_protos.h>
#include <kern/thread.h>
#include <kern/xpr.h>
+#include <kern/kalloc.h>
+
+#include <machine/vm_tuning.h>
+
#include <vm/pmap.h>
#include <vm/vm_fault.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
-#include <machine/vm_tuning.h>
-#include <kern/misc_protos.h>
+#include <vm/vm_protos.h> /* must be last */
+
+/*
+ * ENCRYPTED SWAP:
+ */
+#include <../bsd/crypto/aes/aes.h>
extern ipc_port_t memory_manager_default;
-#ifndef VM_PAGE_LAUNDRY_MAX
-#define VM_PAGE_LAUNDRY_MAX 6 /* outstanding DMM page cleans */
-#endif /* VM_PAGEOUT_LAUNDRY_MAX */
-#ifndef VM_PAGEOUT_BURST_MAX
-#define VM_PAGEOUT_BURST_MAX 32 /* simultaneous EMM page cleans */
-#endif /* VM_PAGEOUT_BURST_MAX */
+#ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE
+#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 10000 /* maximum iterations of the active queue to move pages to inactive */
+#endif
+
+#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE
+#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
+#endif
+
+#ifndef VM_PAGEOUT_DEADLOCK_RELIEF
+#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
+#endif
+
+#ifndef VM_PAGEOUT_INACTIVE_RELIEF
+#define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
+#endif
-#ifndef VM_PAGEOUT_DISCARD_MAX
-#define VM_PAGEOUT_DISCARD_MAX 68 /* simultaneous EMM page cleans */
-#endif /* VM_PAGEOUT_DISCARD_MAX */
+#ifndef VM_PAGE_LAUNDRY_MAX
+#define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */
+#endif /* VM_PAGEOUT_LAUNDRY_MAX */
#ifndef VM_PAGEOUT_BURST_WAIT
#define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
#endif /* VM_PAGEOUT_EMPTY_WAIT */
+#ifndef VM_PAGEOUT_DEADLOCK_WAIT
+#define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
+#endif /* VM_PAGEOUT_DEADLOCK_WAIT */
+
+#ifndef VM_PAGEOUT_IDLE_WAIT
+#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
+#endif /* VM_PAGEOUT_IDLE_WAIT */
+
+
/*
* To obtain a reasonable LRU approximation, the inactive queue
* needs to be large enough to give pages on it a chance to be
*/
#ifndef VM_PAGE_FREE_RESERVED
-#define VM_PAGE_FREE_RESERVED \
- ((16 * VM_PAGE_LAUNDRY_MAX) + NCPUS)
+#define VM_PAGE_FREE_RESERVED(n) \
+ ((6 * VM_PAGE_LAUNDRY_MAX) + (n))
#endif /* VM_PAGE_FREE_RESERVED */
+
+/*
+ * must hold the page queues lock to
+ * manipulate this structure
+ */
+struct vm_pageout_queue {
+ queue_head_t pgo_pending; /* laundry pages to be processed by pager's iothread */
+ unsigned int pgo_laundry; /* current count of laundry pages on queue or in flight */
+ unsigned int pgo_maxlaundry;
+
+ unsigned int pgo_idle:1, /* iothread is blocked waiting for work to do */
+ pgo_busy:1, /* iothread is currently processing request from pgo_pending */
+ pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
+ :0;
+};
+
+#define VM_PAGE_Q_THROTTLED(q) \
+ ((q)->pgo_laundry >= (q)->pgo_maxlaundry)
+
+
/*
* Exported variable used to broadcast the activation of the pageout scan
* Working Set uses this to throttle its use of pmap removes. In this
/*
* Forward declarations for internal routines.
*/
+
+static void vm_pageout_garbage_collect(int);
+static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
+static void vm_pageout_iothread_external(void);
+static void vm_pageout_iothread_internal(void);
+static void vm_pageout_queue_steal(vm_page_t);
+
extern void vm_pageout_continue(void);
extern void vm_pageout_scan(void);
-extern void vm_pageout_throttle(vm_page_t m);
-extern vm_page_t vm_pageout_cluster_page(
- vm_object_t object,
- vm_object_offset_t offset,
- boolean_t precious_clean);
unsigned int vm_pageout_reserved_internal = 0;
unsigned int vm_pageout_reserved_really = 0;
-unsigned int vm_page_laundry_max = 0; /* # of clusters outstanding */
-unsigned int vm_page_laundry_min = 0;
-unsigned int vm_pageout_burst_max = 0;
-unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
+unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
-unsigned int vm_pageout_burst_min = 0;
-unsigned int vm_pageout_pause_count = 0;
-unsigned int vm_pageout_pause_max = 0;
-unsigned int vm_free_page_pause = 100; /* milliseconds */
+unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
+unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
+unsigned int vm_pageout_deadlock_relief = 0;
+unsigned int vm_pageout_inactive_relief = 0;
+unsigned int vm_pageout_burst_active_throttle = 0;
+unsigned int vm_pageout_burst_inactive_throttle = 0;
/*
* Protection against zero fill flushing live working sets derived
unsigned int vm_pageout_inactive_clean = 0; /* debugging */
unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
+unsigned int vm_pageout_purged_objects = 0; /* debugging */
unsigned int vm_stat_discard = 0; /* debugging */
unsigned int vm_stat_discard_sent = 0; /* debugging */
unsigned int vm_stat_discard_failure = 0; /* debugging */
unsigned int vm_stat_discard_throttle = 0; /* debugging */
-unsigned int vm_pageout_scan_active_emm_throttle = 0; /* debugging */
-unsigned int vm_pageout_scan_active_emm_throttle_success = 0; /* debugging */
-unsigned int vm_pageout_scan_active_emm_throttle_failure = 0; /* debugging */
-unsigned int vm_pageout_scan_inactive_emm_throttle = 0; /* debugging */
-unsigned int vm_pageout_scan_inactive_emm_throttle_success = 0; /* debugging */
-unsigned int vm_pageout_scan_inactive_emm_throttle_failure = 0; /* debugging */
+unsigned int vm_pageout_scan_active_throttled = 0;
+unsigned int vm_pageout_scan_inactive_throttled = 0;
+unsigned int vm_pageout_scan_throttle = 0; /* debugging */
+unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
+unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
+unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
+unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
+unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
+/*
+ * Backing store throttle when BS is exhausted
+ */
+unsigned int vm_backing_store_low = 0;
unsigned int vm_pageout_out_of_line = 0;
unsigned int vm_pageout_in_place = 0;
+
+/*
+ * ENCRYPTED SWAP:
+ * counters and statistics...
+ */
+unsigned long vm_page_decrypt_counter = 0;
+unsigned long vm_page_decrypt_for_upl_counter = 0;
+unsigned long vm_page_encrypt_counter = 0;
+unsigned long vm_page_encrypt_abort_counter = 0;
+unsigned long vm_page_encrypt_already_encrypted_counter = 0;
+boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
+
+
+struct vm_pageout_queue vm_pageout_queue_internal;
+struct vm_pageout_queue vm_pageout_queue_external;
+
+
+/*
+ * Routine: vm_backing_store_disable
+ * Purpose:
+ * Suspend non-privileged threads wishing to extend
+ * backing store when we are low on backing store
+ * (Synchronized by caller)
+ */
+void
+vm_backing_store_disable(
+ boolean_t disable)
+{
+ if(disable) {
+ vm_backing_store_low = 1;
+ } else {
+ if(vm_backing_store_low) {
+ vm_backing_store_low = 0;
+ thread_wakeup((event_t) &vm_backing_store_low);
+ }
+ }
+}
+
+
/*
* Routine: vm_pageout_object_allocate
* Purpose:
assert(object->pager_ready);
- if (object->pager_trusted || object->internal)
- vm_pageout_throttle(m);
-
new_object = vm_object_allocate(size);
if (object->pager_trusted) {
*/
vm_object_lock(object);
vm_object_paging_begin(object);
+ vm_page_lock_queues();
+ vm_page_unlock_queues();
vm_object_unlock(object);
vm_pageout_in_place++;
vm_object_t object)
{
vm_object_t shadow_object;
+ boolean_t shadow_internal;
/*
* Deal with the deallocation (last reference) of a pageout object
assert(object->pageout);
shadow_object = object->shadow;
vm_object_lock(shadow_object);
+ shadow_internal = shadow_object->internal;
while (!queue_empty(&object->memq)) {
vm_page_t p, m;
/*
* Handle the trusted pager throttle.
+ * Also decrement the burst throttle (if external).
*/
vm_page_lock_queues();
if (m->laundry) {
- vm_page_laundry_count--;
- m->laundry = FALSE;
- if (vm_page_laundry_count < vm_page_laundry_min) {
- vm_page_laundry_min = 0;
- thread_wakeup((event_t) &vm_page_laundry_count);
- }
+ vm_pageout_throttle_up(m);
}
/*
/*
* Revoke all access to the page. Since the object is
* locked, and the page is busy, this prevents the page
- * from being dirtied after the pmap_is_modified() call
+ * from being dirtied after the pmap_disconnect() call
* returns.
- */
- pmap_page_protect(m->phys_page, VM_PROT_NONE);
-
- /*
+ *
* Since the page is left "dirty" but "not modifed", we
* can detect whether the page was redirtied during
* pageout by checking the modify state.
*/
- m->dirty = pmap_is_modified(m->phys_page);
+ if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
+ m->dirty = TRUE;
+ else
+ m->dirty = FALSE;
if (m->dirty) {
CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
/* We do not re-set m->dirty ! */
/* The page was busy so no extraneous activity */
- /* could have occured. COPY_INTO is a read into the */
+ /* could have occurred. COPY_INTO is a read into the */
/* new pages. CLEAN_IN_PLACE does actually write */
/* out the pages but handling outside of this code */
/* will take care of resetting dirty. We clear the */
vm_object_offset_t offset;
register vm_page_t holding_page;
register vm_page_t new_m;
- register vm_page_t new_page;
boolean_t need_to_wire = FALSE;
vm_pageout_initialize_page(
vm_page_t m)
{
- vm_map_copy_t copy;
- vm_object_t new_object;
vm_object_t object;
vm_object_offset_t paging_offset;
vm_page_t holding_page;
object = m->object;
paging_offset = m->offset + object->paging_offset;
vm_object_paging_begin(object);
- vm_object_unlock(object);
if (m->absent || m->error || m->restart ||
(!m->dirty && !m->precious)) {
VM_PAGE_FREE(m);
panic("reservation without pageout?"); /* alan */
+ vm_object_unlock(object);
return;
}
/* set the page for future call to vm_fault_list_request */
holding_page = NULL;
- vm_object_lock(m->object);
vm_page_lock_queues();
pmap_clear_modify(m->phys_page);
m->dirty = TRUE;
- m->busy = TRUE;
- m->list_req_pending = TRUE;
- m->cleaning = TRUE;
+ m->busy = TRUE;
+ m->list_req_pending = TRUE;
+ m->cleaning = TRUE;
m->pageout = TRUE;
vm_page_wire(m);
vm_page_unlock_queues();
- vm_object_unlock(m->object);
- vm_pageout_throttle(m);
+ vm_object_unlock(object);
/*
* Write the data to its pager.
/*
* vm_pageout_cluster:
*
- * Given a page, page it out, and attempt to clean adjacent pages
+ * Given a page, queue it to the appropriate I/O thread,
+ * which will page it out and attempt to clean adjacent pages
* in the same operation.
*
- * The page must be busy, and the object unlocked w/ paging reference
- * to prevent deallocation or collapse. The page must not be on any
- * pageout queue.
+ * The page must be busy, and the object and queues locked. We will take a
+ * paging reference to prevent deallocation or collapse when we
+ * release the object lock back at the call site. The I/O thread
+ * is responsible for consuming this reference
+ *
+ * The page must not be on any pageout queue.
*/
+
void
-vm_pageout_cluster(
- vm_page_t m)
+vm_pageout_cluster(vm_page_t m)
{
vm_object_t object = m->object;
- vm_object_offset_t offset = m->offset; /* from vm_object start */
- vm_object_offset_t paging_offset = m->offset + object->paging_offset;
- vm_object_t new_object;
- vm_object_offset_t new_offset;
- vm_size_t cluster_size;
- vm_object_offset_t cluster_offset; /* from memory_object start */
- vm_object_offset_t cluster_lower_bound; /* from vm_object_start */
- vm_object_offset_t cluster_upper_bound; /* from vm_object_start */
- vm_object_offset_t cluster_start, cluster_end;/* from vm_object start */
- vm_object_offset_t offset_within_cluster;
- vm_size_t length_of_data;
- vm_page_t friend, holding_page;
- kern_return_t rc;
- boolean_t precious_clean = TRUE;
- int pages_in_cluster;
-
- CLUSTER_STAT(int pages_at_higher_offsets = 0;)
- CLUSTER_STAT(int pages_at_lower_offsets = 0;)
+ struct vm_pageout_queue *q;
+
XPR(XPR_VM_PAGEOUT,
"vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
- (integer_t)object, offset, (integer_t)m, 0, 0);
+ (integer_t)object, m->offset, (integer_t)m, 0, 0);
- CLUSTER_STAT(vm_pageout_cluster_clusters++;)
/*
* Only a certain kind of page is appreciated here.
*/
assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
- vm_object_lock(object);
- cluster_size = object->cluster_size;
-
- assert(cluster_size >= PAGE_SIZE);
- if (cluster_size < PAGE_SIZE) cluster_size = PAGE_SIZE;
- assert(object->pager_created && object->pager_initialized);
- assert(object->internal || object->pager_ready);
-
- if (m->precious && !m->dirty)
- precious_clean = TRUE;
-
- if (!object->pager_trusted || !allow_clustered_pageouts)
- cluster_size = PAGE_SIZE;
- vm_object_unlock(object);
-
- cluster_offset = paging_offset & (vm_object_offset_t)(cluster_size - 1);
- /* bytes from beginning of cluster */
- /*
- * Due to unaligned mappings, we have to be careful
- * of negative offsets into the VM object. Clip the cluster
- * boundary to the VM object, not the memory object.
- */
- if (offset > cluster_offset) {
- cluster_lower_bound = offset - cluster_offset;
- /* from vm_object */
- } else {
- cluster_lower_bound = 0;
- }
- cluster_upper_bound = (offset - cluster_offset) +
- (vm_object_offset_t)cluster_size;
-
- /* set the page for future call to vm_fault_list_request */
- holding_page = NULL;
- vm_object_lock(m->object);
- vm_page_lock_queues();
- m->busy = TRUE;
- m->list_req_pending = TRUE;
- m->cleaning = TRUE;
- m->pageout = TRUE;
- vm_page_wire(m);
- vm_page_unlock_queues();
- vm_object_unlock(m->object);
- vm_pageout_throttle(m);
-
- /*
- * Search backward for adjacent eligible pages to clean in
- * this operation.
- */
-
- cluster_start = offset;
- if (offset) { /* avoid wrap-around at zero */
- for (cluster_start = offset - PAGE_SIZE_64;
- cluster_start >= cluster_lower_bound;
- cluster_start -= PAGE_SIZE_64) {
- assert(cluster_size > PAGE_SIZE);
-
- vm_object_lock(object);
- vm_page_lock_queues();
-
- if ((friend = vm_pageout_cluster_page(object, cluster_start,
- precious_clean)) == VM_PAGE_NULL) {
- vm_page_unlock_queues();
- vm_object_unlock(object);
- break;
- }
- new_offset = (cluster_start + object->paging_offset)
- & (cluster_size - 1);
-
- assert(new_offset < cluster_offset);
- m->list_req_pending = TRUE;
- m->cleaning = TRUE;
-/* do nothing except advance the write request, all we really need to */
-/* do is push the target page and let the code at the other end decide */
-/* what is really the right size */
- if (vm_page_free_count <= vm_page_free_reserved) {
- m->busy = TRUE;
- m->pageout = TRUE;
- vm_page_wire(m);
- }
-
- vm_page_unlock_queues();
- vm_object_unlock(object);
- if(m->dirty || m->object->internal) {
- CLUSTER_STAT(pages_at_lower_offsets++;)
- }
-
- }
- cluster_start += PAGE_SIZE_64;
- }
- assert(cluster_start >= cluster_lower_bound);
- assert(cluster_start <= offset);
- /*
- * Search forward for adjacent eligible pages to clean in
- * this operation.
- */
- for (cluster_end = offset + PAGE_SIZE_64;
- cluster_end < cluster_upper_bound;
- cluster_end += PAGE_SIZE_64) {
- assert(cluster_size > PAGE_SIZE);
-
- vm_object_lock(object);
- vm_page_lock_queues();
-
- if ((friend = vm_pageout_cluster_page(object, cluster_end,
- precious_clean)) == VM_PAGE_NULL) {
- vm_page_unlock_queues();
- vm_object_unlock(object);
- break;
- }
- new_offset = (cluster_end + object->paging_offset)
- & (cluster_size - 1);
-
- assert(new_offset < cluster_size);
- m->list_req_pending = TRUE;
- m->cleaning = TRUE;
-/* do nothing except advance the write request, all we really need to */
-/* do is push the target page and let the code at the other end decide */
-/* what is really the right size */
- if (vm_page_free_count <= vm_page_free_reserved) {
- m->busy = TRUE;
- m->pageout = TRUE;
- vm_page_wire(m);
- }
-
- vm_page_unlock_queues();
- vm_object_unlock(object);
-
- if(m->dirty || m->object->internal) {
- CLUSTER_STAT(pages_at_higher_offsets++;)
- }
- }
- assert(cluster_end <= cluster_upper_bound);
- assert(cluster_end >= offset + PAGE_SIZE);
-
/*
- * (offset - cluster_offset) is beginning of cluster_object
- * relative to vm_object start.
+ * protect the object from collapse -
+ * locking in the object's paging_offset.
*/
- offset_within_cluster = cluster_start - (offset - cluster_offset);
- length_of_data = cluster_end - cluster_start;
-
- assert(offset_within_cluster < cluster_size);
- assert((offset_within_cluster + length_of_data) <= cluster_size);
-
- rc = KERN_SUCCESS;
- assert(rc == KERN_SUCCESS);
-
- pages_in_cluster = length_of_data/PAGE_SIZE;
-
-#if MACH_CLUSTER_STATS
- (cluster_stats[pages_at_lower_offsets].pages_at_lower_offsets)++;
- (cluster_stats[pages_at_higher_offsets].pages_at_higher_offsets)++;
- (cluster_stats[pages_in_cluster].pages_in_cluster)++;
-#endif /* MACH_CLUSTER_STATS */
+ vm_object_paging_begin(object);
/*
- * Send the data to the pager.
+ * set the page for future call to vm_fault_list_request
+ * page should already be marked busy
*/
- paging_offset = cluster_start + object->paging_offset;
-
- rc = memory_object_data_return(object->pager,
- paging_offset,
- length_of_data,
- !precious_clean,
- FALSE);
+ vm_page_wire(m);
+ m->list_req_pending = TRUE;
+ m->cleaning = TRUE;
+ m->pageout = TRUE;
+ m->laundry = TRUE;
- vm_object_lock(object);
- vm_object_paging_end(object);
+ if (object->internal == TRUE)
+ q = &vm_pageout_queue_internal;
+ else
+ q = &vm_pageout_queue_external;
+ q->pgo_laundry++;
- if (holding_page) {
- assert(!object->pager_trusted);
- VM_PAGE_FREE(holding_page);
- vm_object_paging_end(object);
+ m->pageout_queue = TRUE;
+ queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
+
+ if (q->pgo_idle == TRUE) {
+ q->pgo_idle = FALSE;
+ thread_wakeup((event_t) &q->pgo_pending);
}
-
- vm_object_unlock(object);
}
-/*
- * Trusted pager throttle.
- * Object must be unlocked, page queues must be unlocked.
- */
-void
-vm_pageout_throttle(
- register vm_page_t m)
-{
- vm_page_lock_queues();
- assert(!m->laundry);
- m->laundry = TRUE;
- while (vm_page_laundry_count >= vm_page_laundry_max) {
- /*
- * Set the threshold for when vm_page_free()
- * should wake us up.
- */
- vm_page_laundry_min = vm_page_laundry_max/2;
-
- assert_wait((event_t) &vm_page_laundry_count, THREAD_UNINT);
- vm_page_unlock_queues();
- /*
- * Pause to let the default pager catch up.
- */
- thread_block((void (*)(void)) 0);
- vm_page_lock_queues();
- }
- vm_page_laundry_count++;
- vm_page_unlock_queues();
-}
+unsigned long vm_pageout_throttle_up_count = 0;
/*
- * The global variable vm_pageout_clean_active_pages controls whether
- * active pages are considered valid to be cleaned in place during a
- * clustered pageout. Performance measurements are necessary to determine
- * the best policy.
- */
-int vm_pageout_clean_active_pages = 1;
-/*
- * vm_pageout_cluster_page: [Internal]
- *
- * return a vm_page_t to the page at (object,offset) if it is appropriate
- * to clean in place. Pages that are non-existent, busy, absent, already
- * cleaning, or not dirty are not eligible to be cleaned as an adjacent
- * page in a cluster.
+ * A page is back from laundry. See if there are some pages waiting to
+ * go to laundry and if we can let some of them go now.
*
- * The object must be locked on entry, and remains locked throughout
- * this call.
+ * Object and page queues must be locked.
*/
-
-vm_page_t
-vm_pageout_cluster_page(
- vm_object_t object,
- vm_object_offset_t offset,
- boolean_t precious_clean)
+void
+vm_pageout_throttle_up(
+ vm_page_t m)
{
- vm_page_t m;
-
- XPR(XPR_VM_PAGEOUT,
- "vm_pageout_cluster_page, object 0x%X offset 0x%X\n",
- (integer_t)object, offset, 0, 0, 0);
-
- if ((m = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
- return(VM_PAGE_NULL);
+ struct vm_pageout_queue *q;
- if (m->busy || m->absent || m->cleaning ||
- (m->wire_count != 0) || m->error)
- return(VM_PAGE_NULL);
+ vm_pageout_throttle_up_count++;
- if (vm_pageout_clean_active_pages) {
- if (!m->active && !m->inactive) return(VM_PAGE_NULL);
- } else {
- if (!m->inactive) return(VM_PAGE_NULL);
- }
+ assert(m->laundry);
+ assert(m->object != VM_OBJECT_NULL);
+ assert(m->object != kernel_object);
- assert(!m->private);
- assert(!m->fictitious);
+ if (m->object->internal == TRUE)
+ q = &vm_pageout_queue_internal;
+ else
+ q = &vm_pageout_queue_external;
- if (!m->dirty) m->dirty = pmap_is_modified(m->phys_page);
+ m->laundry = FALSE;
+ q->pgo_laundry--;
- if (precious_clean) {
- if (!m->precious || !m->dirty)
- return(VM_PAGE_NULL);
- } else {
- if (!m->dirty)
- return(VM_PAGE_NULL);
+ if (q->pgo_throttled == TRUE) {
+ q->pgo_throttled = FALSE;
+ thread_wakeup((event_t) &q->pgo_laundry);
}
- return(m);
}
+
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
* It returns with vm_page_queue_free_lock held and
* vm_page_free_wanted == 0.
*/
-extern void vm_pageout_scan_continue(void); /* forward; */
+
+#define DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER)
+
+#define FCS_IDLE 0
+#define FCS_DELAYED 1
+#define FCS_DEADLOCK_DETECTED 2
+
+struct flow_control {
+ int state;
+ mach_timespec_t ts;
+};
void
vm_pageout_scan(void)
{
- unsigned int burst_count;
- boolean_t now = FALSE;
- unsigned int laundry_pages;
- boolean_t need_more_inactive_pages;
- unsigned int loop_detect;
+ unsigned int loop_count = 0;
+ unsigned int inactive_burst_count = 0;
+ unsigned int active_burst_count = 0;
+ vm_page_t local_freeq = 0;
+ int local_freed = 0;
+ int delayed_unlock = 0;
+ int need_internal_inactive = 0;
+ int refmod_state = 0;
+ int vm_pageout_deadlock_target = 0;
+ struct vm_pageout_queue *iq;
+ struct vm_pageout_queue *eq;
+ struct flow_control flow_control;
+ boolean_t active_throttled = FALSE;
+ boolean_t inactive_throttled = FALSE;
+ mach_timespec_t ts;
+ unsigned int msecs = 0;
+ vm_object_t object;
+
+
+ flow_control.state = FCS_IDLE;
+ iq = &vm_pageout_queue_internal;
+ eq = &vm_pageout_queue_external;
XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
* When memory is very tight, we can't rely on external pagers to
* clean pages. They probably aren't running, because they
* aren't vm-privileged. If we kept sending dirty pages to them,
- * we could exhaust the free list. However, we can't just ignore
- * pages belonging to external objects, because there might be no
- * pages belonging to internal objects. Hence, we get the page
- * into an internal object and then immediately double-page it,
- * sending it to the default pager.
- *
- * consider_zone_gc should be last, because the other operations
- * might return memory to zones.
+ * we could exhaust the free list.
*/
+ vm_page_lock_queues();
+ delayed_unlock = 1;
- Restart:
-
-#if THREAD_SWAPPER
- mutex_lock(&vm_page_queue_free_lock);
- now = (vm_page_free_count < vm_page_free_min);
- mutex_unlock(&vm_page_queue_free_lock);
-
- swapout_threads(now);
-#endif /* THREAD_SWAPPER */
-
- stack_collect();
- consider_task_collect();
- consider_thread_collect();
- consider_zone_gc();
- consider_machine_collect();
+Restart:
+ /*
+ * Recalculate vm_page_inactivate_target.
+ */
+ vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
+ vm_page_inactive_count);
+ object = NULL;
- loop_detect = vm_page_active_count + vm_page_inactive_count;
-#if 0
- if (vm_page_free_count <= vm_page_free_reserved) {
- need_more_inactive_pages = TRUE;
- } else {
- need_more_inactive_pages = FALSE;
- }
-#else
- need_more_inactive_pages = FALSE;
-#endif
+ for (;;) {
+ vm_page_t m;
- for (burst_count = 0;;) {
- register vm_page_t m;
- register vm_object_t object;
+ if (delayed_unlock == 0)
+ vm_page_lock_queues();
- /*
- * Recalculate vm_page_inactivate_target.
- */
+ active_burst_count = vm_page_active_count;
- vm_page_lock_queues();
- vm_page_inactive_target =
- VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
- vm_page_inactive_count);
+ if (active_burst_count > vm_pageout_burst_active_throttle)
+ active_burst_count = vm_pageout_burst_active_throttle;
/*
* Move pages from active to inactive.
*/
-
- while ((vm_page_inactive_count < vm_page_inactive_target ||
- need_more_inactive_pages) &&
- !queue_empty(&vm_page_queue_active)) {
- register vm_object_t object;
+ while ((need_internal_inactive ||
+ vm_page_inactive_count < vm_page_inactive_target) &&
+ !queue_empty(&vm_page_queue_active) &&
+ ((active_burst_count--) > 0)) {
vm_pageout_active++;
+
m = (vm_page_t) queue_first(&vm_page_queue_active);
+ assert(m->active && !m->inactive);
+ assert(!m->laundry);
+ assert(m->object != kernel_object);
+
/*
- * If we're getting really low on memory,
- * try selecting a page that will go
- * directly to the default_pager.
- * If there are no such pages, we have to
- * page out a page backed by an EMM,
- * so that the default_pager can recover
- * it eventually.
+ * Try to lock object; since we've already got the
+ * page queues lock, we can only 'try' for this one.
+ * if the 'try' fails, we need to do a mutex_pause
+ * to allow the owner of the object lock a chance to
+ * run... otherwise, we're likely to trip over this
+ * object in the same state as we work our way through
+ * the queue... clumps of pages associated with the same
+ * object are fairly typical on the inactive and active queues
*/
- if (need_more_inactive_pages &&
- (IP_VALID(memory_manager_default))) {
- vm_pageout_scan_active_emm_throttle++;
- do {
- assert(m->active && !m->inactive);
- object = m->object;
-
- if (vm_object_lock_try(object)) {
-#if 0
- if (object->pager_trusted ||
- object->internal) {
- /* found one ! */
- vm_pageout_scan_active_emm_throttle_success++;
- goto object_locked_active;
- }
-#else
- vm_pageout_scan_active_emm_throttle_success++;
- goto object_locked_active;
-#endif
- vm_object_unlock(object);
- }
- m = (vm_page_t) queue_next(&m->pageq);
- } while (!queue_end(&vm_page_queue_active,
- (queue_entry_t) m));
- if (queue_end(&vm_page_queue_active,
- (queue_entry_t) m)) {
- vm_pageout_scan_active_emm_throttle_failure++;
- m = (vm_page_t)
- queue_first(&vm_page_queue_active);
+ if (m->object != object) {
+ if (object != NULL) {
+ vm_object_unlock(object);
+ object = NULL;
}
+ if (!vm_object_lock_try(m->object)) {
+ /*
+ * move page to end of active queue and continue
+ */
+ queue_remove(&vm_page_queue_active, m,
+ vm_page_t, pageq);
+ queue_enter(&vm_page_queue_active, m,
+ vm_page_t, pageq);
+
+ goto done_with_activepage;
+ }
+ object = m->object;
}
-
- assert(m->active && !m->inactive);
-
- object = m->object;
- if (!vm_object_lock_try(object)) {
- /*
- * Move page to end and continue.
- */
-
- queue_remove(&vm_page_queue_active, m,
- vm_page_t, pageq);
- queue_enter(&vm_page_queue_active, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
-
- mutex_pause();
- vm_page_lock_queues();
- continue;
- }
-
- object_locked_active:
/*
- * If the page is busy, then we pull it
- * off the active queue and leave it alone.
+ * if the page is BUSY, then we pull it
+ * off the active queue and leave it alone.
+ * when BUSY is cleared, it will get stuck
+ * back on the appropriate queue
*/
-
if (m->busy) {
- vm_object_unlock(object);
queue_remove(&vm_page_queue_active, m,
vm_page_t, pageq);
- m->active = FALSE;
+ m->pageq.next = NULL;
+ m->pageq.prev = NULL;
+
if (!m->fictitious)
vm_page_active_count--;
- continue;
+ m->active = FALSE;
+
+ goto done_with_activepage;
}
+ if (need_internal_inactive) {
+ /*
+ * If we're unable to make forward progress
+ * with the current set of pages on the
+ * inactive queue due to busy objects or
+ * throttled pageout queues, then
+ * move a page that is already clean
+ * or belongs to a pageout queue that
+ * isn't currently throttled
+ */
+ active_throttled = FALSE;
+ if (object->internal) {
+ if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default)))
+ active_throttled = TRUE;
+ } else if (VM_PAGE_Q_THROTTLED(eq)) {
+ active_throttled = TRUE;
+ }
+ if (active_throttled == TRUE) {
+ if (!m->dirty) {
+ refmod_state = pmap_get_refmod(m->phys_page);
+
+ if (refmod_state & VM_MEM_REFERENCED)
+ m->reference = TRUE;
+ if (refmod_state & VM_MEM_MODIFIED)
+ m->dirty = TRUE;
+ }
+ if (m->dirty || m->precious) {
+ /*
+ * page is dirty and targets a THROTTLED queue
+ * so all we can do is move it back to the
+ * end of the active queue to get it out
+ * of the way
+ */
+ queue_remove(&vm_page_queue_active, m,
+ vm_page_t, pageq);
+ queue_enter(&vm_page_queue_active, m,
+ vm_page_t, pageq);
+
+ vm_pageout_scan_active_throttled++;
+
+ goto done_with_activepage;
+ }
+ }
+ vm_pageout_scan_active_throttle_success++;
+ need_internal_inactive--;
+ }
/*
* Deactivate the page while holding the object
* locked, so we know the page is still not busy.
* absent or fictitious, but vm_page_deactivate
* can handle that.
*/
-
vm_page_deactivate(m);
- vm_object_unlock(object);
- }
+done_with_activepage:
+ if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
- /*
- * We are done if we have met our target *and*
- * nobody is still waiting for a page.
- */
- if (vm_page_free_count >= vm_page_free_target) {
+ if (object != NULL) {
+ vm_object_unlock(object);
+ object = NULL;
+ }
+ if (local_freeq) {
+ vm_page_free_list(local_freeq);
+
+ local_freeq = 0;
+ local_freed = 0;
+ }
+ delayed_unlock = 0;
+ vm_page_unlock_queues();
+
+ mutex_pause();
+ vm_page_lock_queues();
+ /*
+ * continue the while loop processing
+ * the active queue... need to hold
+ * the page queues lock
+ */
+ continue;
+ }
+ }
+
+
+
+ /**********************************************************************
+ * above this point we're playing with the active queue
+ * below this point we're playing with the throttling mechanisms
+ * and the inactive queue
+ **********************************************************************/
+
+
+
+ /*
+ * We are done if we have met our target *and*
+ * nobody is still waiting for a page.
+ */
+ if (vm_page_free_count + local_freed >= vm_page_free_target) {
+ if (object != NULL) {
+ vm_object_unlock(object);
+ object = NULL;
+ }
+ if (local_freeq) {
+ vm_page_free_list(local_freeq);
+
+ local_freeq = 0;
+ local_freed = 0;
+ }
mutex_lock(&vm_page_queue_free_lock);
+
if ((vm_page_free_count >= vm_page_free_target) &&
(vm_page_free_wanted == 0)) {
+
vm_page_unlock_queues();
- break;
+
+ thread_wakeup((event_t) &vm_pageout_garbage_collect);
+ return;
}
mutex_unlock(&vm_page_queue_free_lock);
}
+
+
/*
* Sometimes we have to pause:
* 1) No inactive pages - nothing to do.
- * 2) Flow control - wait for untrusted pagers to catch up.
+ * 2) Flow control - default pageout queue is full
+ * 3) Loop control - no acceptable pages found on the inactive queue
+ * within the last vm_pageout_burst_inactive_throttle iterations
*/
+ if ((queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf))) {
+ vm_pageout_scan_empty_throttle++;
+ msecs = vm_pageout_empty_wait;
+ goto vm_pageout_scan_delay;
+
+ } else if (inactive_burst_count >= vm_pageout_burst_inactive_throttle) {
+ vm_pageout_scan_burst_throttle++;
+ msecs = vm_pageout_burst_wait;
+ goto vm_pageout_scan_delay;
+
+ } else if (VM_PAGE_Q_THROTTLED(iq)) {
+
+ switch (flow_control.state) {
+
+ case FCS_IDLE:
+reset_deadlock_timer:
+ ts.tv_sec = vm_pageout_deadlock_wait / 1000;
+ ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
+ clock_get_system_nanotime(
+ &flow_control.ts.tv_sec,
+ (uint32_t *) &flow_control.ts.tv_nsec);
+ ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
+
+ flow_control.state = FCS_DELAYED;
+ msecs = vm_pageout_deadlock_wait;
+
+ break;
+
+ case FCS_DELAYED:
+ clock_get_system_nanotime(
+ &ts.tv_sec,
+ (uint32_t *) &ts.tv_nsec);
+
+ if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
+ /*
+ * the pageout thread for the default pager is potentially
+ * deadlocked since the
+ * default pager queue has been throttled for more than the
+ * allowable time... we need to move some clean pages or dirty
+ * pages belonging to the external pagers if they aren't throttled
+ * vm_page_free_wanted represents the number of threads currently
+ * blocked waiting for pages... we'll move one page for each of
+ * these plus a fixed amount to break the logjam... once we're done
+ * moving this number of pages, we'll re-enter the FSC_DELAYED state
+ * with a new timeout target since we have no way of knowing
+ * whether we've broken the deadlock except through observation
+ * of the queue associated with the default pager... we need to
+ * stop moving pagings and allow the system to run to see what
+ * state it settles into.
+ */
+ vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted;
+ vm_pageout_scan_deadlock_detected++;
+ flow_control.state = FCS_DEADLOCK_DETECTED;
- if ((queue_empty(&vm_page_queue_inactive) &&
- (queue_empty(&vm_page_queue_zf))) ||
- ((--loop_detect) == 0) ||
- (burst_count >= vm_pageout_burst_max)) {
- unsigned int pages, msecs;
- int wait_result;
+ thread_wakeup((event_t) &vm_pageout_garbage_collect);
+ goto consider_inactive;
+ }
+ /*
+ * just resniff instead of trying
+ * to compute a new delay time... we're going to be
+ * awakened immediately upon a laundry completion,
+ * so we won't wait any longer than necessary
+ */
+ msecs = vm_pageout_idle_wait;
+ break;
- consider_machine_adjust();
- /*
- * vm_pageout_burst_wait is msecs/page.
- * If there is nothing for us to do, we wait
- * at least vm_pageout_empty_wait msecs.
- */
- pages = burst_count;
-
- if (loop_detect == 0) {
- printf("Warning: No physical memory suitable for pageout or reclaim, pageout thread temporarily going to sleep\n");
- msecs = vm_free_page_pause;
+ case FCS_DEADLOCK_DETECTED:
+ if (vm_pageout_deadlock_target)
+ goto consider_inactive;
+ goto reset_deadlock_timer;
+
+ }
+ vm_pageout_scan_throttle++;
+ iq->pgo_throttled = TRUE;
+vm_pageout_scan_delay:
+ if (object != NULL) {
+ vm_object_unlock(object);
+ object = NULL;
}
- else {
- msecs = burst_count * vm_pageout_burst_wait;
+ if (local_freeq) {
+ vm_page_free_list(local_freeq);
+
+ local_freeq = 0;
+ local_freed = 0;
}
+ assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
+
+ counter(c_vm_pageout_scan_block++);
- if (queue_empty(&vm_page_queue_inactive) &&
- queue_empty(&vm_page_queue_zf) &&
- (msecs < vm_pageout_empty_wait))
- msecs = vm_pageout_empty_wait;
vm_page_unlock_queues();
+
+ thread_block(THREAD_CONTINUE_NULL);
- assert_wait_timeout(msecs, THREAD_INTERRUPTIBLE);
- counter(c_vm_pageout_scan_block++);
+ vm_page_lock_queues();
+ delayed_unlock = 1;
- /*
- * Unfortunately, we don't have call_continuation
- * so we can't rely on tail-recursion.
- */
- wait_result = thread_block((void (*)(void)) 0);
- if (wait_result != THREAD_TIMED_OUT)
- thread_cancel_timer();
- vm_pageout_scan_continue();
+ iq->pgo_throttled = FALSE;
+
+ if (loop_count >= vm_page_inactive_count) {
+ if (VM_PAGE_Q_THROTTLED(eq) || VM_PAGE_Q_THROTTLED(iq)) {
+ /*
+ * Make sure we move enough "appropriate"
+ * pages to the inactive queue before trying
+ * again.
+ */
+ need_internal_inactive = vm_pageout_inactive_relief;
+ }
+ loop_count = 0;
+ }
+ inactive_burst_count = 0;
goto Restart;
/*NOTREACHED*/
}
+
+ flow_control.state = FCS_IDLE;
+consider_inactive:
+ loop_count++;
+ inactive_burst_count++;
vm_pageout_inactive++;
+ if (!queue_empty(&vm_page_queue_inactive)) {
+ m = (vm_page_t) queue_first(&vm_page_queue_inactive);
+
+ if (m->clustered && (m->no_isync == TRUE)) {
+ goto use_this_page;
+ }
+ }
if (vm_zf_count < vm_accellerate_zf_pageout_trigger) {
vm_zf_iterator = 0;
} else {
vm_zf_iterator = 0;
}
}
- if(queue_empty(&vm_page_queue_zf) ||
+ if (queue_empty(&vm_page_queue_zf) ||
(((last_page_zf) || (vm_zf_iterator == 0)) &&
!queue_empty(&vm_page_queue_inactive))) {
m = (vm_page_t) queue_first(&vm_page_queue_inactive);
m = (vm_page_t) queue_first(&vm_page_queue_zf);
last_page_zf = 1;
}
+use_this_page:
+ assert(!m->active && m->inactive);
+ assert(!m->laundry);
+ assert(m->object != kernel_object);
- if ((vm_page_free_count <= vm_page_free_reserved) &&
- (IP_VALID(memory_manager_default))) {
- /*
- * We're really low on memory. Try to select a page that
- * would go directly to the default_pager.
- * If there are no such pages, we have to page out a
- * page backed by an EMM, so that the default_pager
- * can recover it eventually.
- */
- vm_pageout_scan_inactive_emm_throttle++;
- do {
- assert(!m->active && m->inactive);
- object = m->object;
-
- if (vm_object_lock_try(object)) {
-#if 0
- if (object->pager_trusted ||
- object->internal) {
- /* found one ! */
- vm_pageout_scan_inactive_emm_throttle_success++;
- goto object_locked_inactive;
- }
-#else
- vm_pageout_scan_inactive_emm_throttle_success++;
- goto object_locked_inactive;
-#endif /* 0 */
- vm_object_unlock(object);
+ /*
+ * Try to lock object; since we've alread got the
+ * page queues lock, we can only 'try' for this one.
+ * if the 'try' fails, we need to do a mutex_pause
+ * to allow the owner of the object lock a chance to
+ * run... otherwise, we're likely to trip over this
+ * object in the same state as we work our way through
+ * the queue... clumps of pages associated with the same
+ * object are fairly typical on the inactive and active queues
+ */
+ if (m->object != object) {
+ if (object != NULL) {
+ vm_object_unlock(object);
+ object = NULL;
+ }
+ if (!vm_object_lock_try(m->object)) {
+ /*
+ * Move page to end and continue.
+ * Don't re-issue ticket
+ */
+ if (m->zero_fill) {
+ queue_remove(&vm_page_queue_zf, m,
+ vm_page_t, pageq);
+ queue_enter(&vm_page_queue_zf, m,
+ vm_page_t, pageq);
+ } else {
+ queue_remove(&vm_page_queue_inactive, m,
+ vm_page_t, pageq);
+ queue_enter(&vm_page_queue_inactive, m,
+ vm_page_t, pageq);
}
- m = (vm_page_t) queue_next(&m->pageq);
- } while ((!queue_end(&vm_page_queue_zf,
- (queue_entry_t) m))
- && (!queue_end(&vm_page_queue_inactive,
- (queue_entry_t) m)));
-
- if ((queue_end(&vm_page_queue_zf,
- (queue_entry_t) m))
- || (queue_end(&vm_page_queue_inactive,
- (queue_entry_t) m))) {
- vm_pageout_scan_inactive_emm_throttle_failure++;
+ vm_pageout_inactive_nolock++;
+
/*
- * We should check the "active" queue
- * for good candidates to page out.
+ * force us to dump any collected free pages
+ * and to pause before moving on
*/
- need_more_inactive_pages = TRUE;
+ delayed_unlock = DELAYED_UNLOCK_LIMIT + 1;
- if(last_page_zf == 0) {
- last_page_zf = 1;
- vm_zf_iterator = vm_zf_iterator_count - 1;
- } else {
- last_page_zf = 0;
- vm_zf_iterator = vm_zf_iterator_count - 2;
- }
- vm_page_unlock_queues();
- goto Restart;
+ goto done_with_inactivepage;
}
+ object = m->object;
}
-
- assert(!m->active && m->inactive);
- object = m->object;
-
/*
- * Try to lock object; since we've got the
- * page queues lock, we can only try for this one.
+ * If the page belongs to a purgable object with no pending copies
+ * against it, then we reap all of the pages in the object
+ * and note that the object has been "emptied". It'll be up to the
+ * application the discover this and recreate its contents if desired.
*/
+ if ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE ||
+ object->purgable == VM_OBJECT_PURGABLE_EMPTY) &&
+ object->copy == VM_OBJECT_NULL) {
- if (!vm_object_lock_try(object)) {
+ (void) vm_object_purge(object);
+ vm_pageout_purged_objects++;
/*
- * Move page to end and continue.
- * Don't re-issue ticket
+ * we've just taken all of the pages from this object,
+ * so drop the lock now since we're not going to find
+ * any more pages belonging to it anytime soon
*/
- if(m->zero_fill) {
- queue_remove(&vm_page_queue_zf, m,
- vm_page_t, pageq);
- queue_enter(&vm_page_queue_zf, m,
- vm_page_t, pageq);
- } else {
- queue_remove(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- queue_enter(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- }
- vm_page_unlock_queues();
+ vm_object_unlock(object);
+ object = NULL;
- mutex_pause();
- vm_pageout_inactive_nolock++;
- continue;
+ inactive_burst_count = 0;
+
+ goto done_with_inactivepage;
}
- object_locked_inactive:
/*
- * Paging out pages of objects which pager is being
- * created by another thread must be avoided, because
- * this thread may claim for memory, thus leading to a
- * possible dead lock between it and the pageout thread
- * which will wait for pager creation, if such pages are
- * finally chosen. The remaining assumption is that there
- * will finally be enough available pages in the inactive
- * pool to page out in order to satisfy all memory claimed
- * by the thread which concurrently creates the pager.
+ * Paging out pages of external objects which
+ * are currently being created must be avoided.
+ * The pager may claim for memory, thus leading to a
+ * possible dead lock between it and the pageout thread,
+ * if such pages are finally chosen. The remaining assumption
+ * is that there will finally be enough available pages in the
+ * inactive pool to page out in order to satisfy all memory
+ * claimed by the thread which concurrently creates the pager.
*/
-
if (!object->pager_initialized && object->pager_created) {
/*
* Move page to end and continue, hoping that
* one of its logically adjacent fellows is
* targeted.
*/
- if(m->zero_fill) {
+ if (m->zero_fill) {
queue_remove(&vm_page_queue_zf, m,
vm_page_t, pageq);
queue_enter(&vm_page_queue_zf, m,
last_page_zf = 0;
vm_zf_iterator = 1;
}
- vm_page_unlock_queues();
- vm_object_unlock(object);
vm_pageout_inactive_avoid++;
- continue;
- }
+ goto done_with_inactivepage;
+ }
/*
* Remove the page from the inactive list.
*/
-
- if(m->zero_fill) {
+ if (m->zero_fill) {
queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
} else {
queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
}
+ m->pageq.next = NULL;
+ m->pageq.prev = NULL;
m->inactive = FALSE;
if (!m->fictitious)
vm_page_inactive_count--;
* Somebody is already playing with this page.
* Leave it off the pageout queues.
*/
-
- vm_page_unlock_queues();
- vm_object_unlock(object);
vm_pageout_inactive_busy++;
- continue;
+
+ goto done_with_inactivepage;
}
/*
if (m->absent || m->error) {
vm_pageout_inactive_absent++;
- reclaim_page:
- vm_page_free(m);
- vm_page_unlock_queues();
- vm_object_unlock(object);
- continue;
+reclaim_page:
+ if (vm_pageout_deadlock_target) {
+ vm_pageout_scan_inactive_throttle_success++;
+ vm_pageout_deadlock_target--;
+ }
+ if (m->tabled)
+ vm_page_remove(m); /* clears tabled, object, offset */
+ if (m->absent)
+ vm_object_absent_release(object);
+
+ assert(m->pageq.next == NULL &&
+ m->pageq.prev == NULL);
+ m->pageq.next = (queue_entry_t)local_freeq;
+ local_freeq = m;
+ local_freed++;
+
+ inactive_burst_count = 0;
+
+ goto done_with_inactivepage;
}
assert(!m->private);
*/
if (m->cleaning) {
-#if MACH_CLUSTER_STATS
- vm_pageout_cluster_conversions++;
-#endif
m->busy = TRUE;
m->pageout = TRUE;
m->dump_cleaning = TRUE;
vm_page_wire(m);
- vm_object_unlock(object);
- vm_page_unlock_queues();
- continue;
+
+ CLUSTER_STAT(vm_pageout_cluster_conversions++);
+
+ inactive_burst_count = 0;
+
+ goto done_with_inactivepage;
}
/*
* If it's being used, reactivate.
* (Fictitious pages are either busy or absent.)
*/
-
- if (m->reference || pmap_is_referenced(m->phys_page)) {
- vm_pageout_inactive_used++;
- reactivate_page:
-#if ADVISORY_PAGEOUT
- if (m->discard_request) {
- m->discard_request = FALSE;
- }
-#endif /* ADVISORY_PAGEOUT */
- last_page_zf = 0;
- vm_object_unlock(object);
+ if ( (!m->reference) ) {
+ refmod_state = pmap_get_refmod(m->phys_page);
+
+ if (refmod_state & VM_MEM_REFERENCED)
+ m->reference = TRUE;
+ if (refmod_state & VM_MEM_MODIFIED)
+ m->dirty = TRUE;
+ }
+ if (m->reference) {
+was_referenced:
vm_page_activate(m);
VM_STAT(reactivations++);
- vm_page_unlock_queues();
- continue;
- }
-#if ADVISORY_PAGEOUT
- if (object->advisory_pageout) {
- boolean_t do_throttle;
- memory_object_t pager;
- vm_object_offset_t discard_offset;
-
- if (m->discard_request) {
- vm_stat_discard_failure++;
- goto mandatory_pageout;
- }
+ vm_pageout_inactive_used++;
+ last_page_zf = 0;
+ inactive_burst_count = 0;
- assert(object->pager_initialized);
- m->discard_request = TRUE;
- pager = object->pager;
+ goto done_with_inactivepage;
+ }
- /* system-wide throttle */
- do_throttle = (vm_page_free_count <=
- vm_page_free_reserved);
+ XPR(XPR_VM_PAGEOUT,
+ "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
+ (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
-#if 0
- /*
- * JMM - Do we need a replacement throttle
- * mechanism for pagers?
- */
- if (!do_throttle) {
- /* throttle on this pager */
- /* XXX lock ordering ? */
- ip_lock(port);
- do_throttle= imq_full(&port->ip_messages);
- ip_unlock(port);
+ /*
+ * we've got a candidate page to steal...
+ *
+ * m->dirty is up to date courtesy of the
+ * preceding check for m->reference... if
+ * we get here, then m->reference had to be
+ * FALSE which means we did a pmap_get_refmod
+ * and updated both m->reference and m->dirty
+ *
+ * if it's dirty or precious we need to
+ * see if the target queue is throtttled
+ * it if is, we need to skip over it by moving it back
+ * to the end of the inactive queue
+ */
+ inactive_throttled = FALSE;
+
+ if (m->dirty || m->precious) {
+ if (object->internal) {
+ if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default)))
+ inactive_throttled = TRUE;
+ } else if (VM_PAGE_Q_THROTTLED(eq)) {
+ inactive_throttled = TRUE;
}
-#endif
-
- if (do_throttle) {
- vm_stat_discard_throttle++;
-#if 0
- /* ignore this page and skip to next */
- vm_page_unlock_queues();
- vm_object_unlock(object);
- continue;
-#else
- /* force mandatory pageout */
- goto mandatory_pageout;
-#endif
+ }
+ if (inactive_throttled == TRUE) {
+ if (m->zero_fill) {
+ queue_enter(&vm_page_queue_zf, m,
+ vm_page_t, pageq);
+ } else {
+ queue_enter(&vm_page_queue_inactive, m,
+ vm_page_t, pageq);
}
+ if (!m->fictitious)
+ vm_page_inactive_count++;
+ m->inactive = TRUE;
- /* proceed with discard_request */
- vm_page_activate(m);
- vm_stat_discard++;
- VM_STAT(reactivations++);
- discard_offset = m->offset + object->paging_offset;
- vm_stat_discard_sent++;
- vm_page_unlock_queues();
- vm_object_unlock(object);
+ vm_pageout_scan_inactive_throttled++;
-/*
- memory_object_discard_request(object->pager,
- discard_offset,
- PAGE_SIZE);
-*/
- continue;
+ goto done_with_inactivepage;
}
- mandatory_pageout:
-#endif /* ADVISORY_PAGEOUT */
-
- XPR(XPR_VM_PAGEOUT,
- "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
- (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
-
/*
- * Eliminate all mappings.
+ * we've got a page that we can steal...
+ * eliminate all mappings and make sure
+ * we have the up-to-date modified state
+ * first take the page BUSY, so that no new
+ * mappings can be made
*/
-
m->busy = TRUE;
- pmap_page_protect(m->phys_page, VM_PROT_NONE);
+
+ /*
+ * if we need to do a pmap_disconnect then we
+ * need to re-evaluate m->dirty since the pmap_disconnect
+ * provides the true state atomically... the
+ * page was still mapped up to the pmap_disconnect
+ * and may have been dirtied at the last microsecond
+ *
+ * we also check for the page being referenced 'late'
+ * if it was, we first need to do a WAKEUP_DONE on it
+ * since we already set m->busy = TRUE, before
+ * going off to reactivate it
+ *
+ * if we don't need the pmap_disconnect, then
+ * m->dirty is up to date courtesy of the
+ * earlier check for m->reference... if
+ * we get here, then m->reference had to be
+ * FALSE which means we did a pmap_get_refmod
+ * and updated both m->reference and m->dirty...
+ */
+ if (m->no_isync == FALSE) {
+ refmod_state = pmap_disconnect(m->phys_page);
- if (!m->dirty)
- m->dirty = pmap_is_modified(m->phys_page);
+ if (refmod_state & VM_MEM_MODIFIED)
+ m->dirty = TRUE;
+ if (refmod_state & VM_MEM_REFERENCED) {
+ m->reference = TRUE;
+
+ PAGE_WAKEUP_DONE(m);
+ goto was_referenced;
+ }
+ }
/*
* If it's clean and not precious, we can free the page.
*/
-
if (!m->dirty && !m->precious) {
vm_pageout_inactive_clean++;
goto reclaim_page;
}
- vm_page_unlock_queues();
-
- /*
- * If there is no memory object for the page, create
- * one and hand it to the default pager.
- */
+ vm_pageout_cluster(m);
- if (!object->pager_initialized)
- vm_object_collapse(object);
- if (!object->pager_initialized)
- vm_object_pager_create(object);
- if (!object->pager_initialized) {
- /*
- * Still no pager for the object.
- * Reactivate the page.
- *
- * Should only happen if there is no
- * default pager.
- */
- vm_page_lock_queues();
- vm_page_activate(m);
- vm_page_unlock_queues();
+ vm_pageout_inactive_dirty++;
- /*
- * And we are done with it.
- */
- PAGE_WAKEUP_DONE(m);
- vm_object_unlock(object);
+ inactive_burst_count = 0;
- /*
- * break here to get back to the preemption
- * point in the outer loop so that we don't
- * spin forever if there is no default pager.
- */
- vm_pageout_dirty_no_pager++;
- /*
- * Well there's no pager, but we can still reclaim
- * free pages out of the inactive list. Go back
- * to top of loop and look for suitable pages.
- */
- continue;
- }
+done_with_inactivepage:
+ if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
- if ((object->pager_initialized) &&
- (object->pager == MEMORY_OBJECT_NULL)) {
- /*
- * This pager has been destroyed by either
- * memory_object_destroy or vm_object_destroy, and
- * so there is nowhere for the page to go.
- * Just free the page.
- */
- VM_PAGE_FREE(m);
- vm_object_unlock(object);
- continue;
+ if (object != NULL) {
+ vm_object_unlock(object);
+ object = NULL;
+ }
+ if (local_freeq) {
+ vm_page_free_list(local_freeq);
+
+ local_freeq = 0;
+ local_freed = 0;
+ }
+ delayed_unlock = 0;
+ vm_page_unlock_queues();
+ mutex_pause();
}
-
- vm_pageout_inactive_dirty++;
-/*
- if (!object->internal)
- burst_count++;
-*/
- vm_object_paging_begin(object);
- vm_object_unlock(object);
- vm_pageout_cluster(m); /* flush it */
+ /*
+ * back to top of pageout scan loop
+ */
}
- consider_machine_adjust();
}
-counter(unsigned int c_vm_pageout_scan_continue = 0;)
-
-void
-vm_pageout_scan_continue(void)
-{
- /*
- * We just paused to let the pagers catch up.
- * If vm_page_laundry_count is still high,
- * then we aren't waiting long enough.
- * If we have paused some vm_pageout_pause_max times without
- * adjusting vm_pageout_burst_wait, it might be too big,
- * so we decrease it.
- */
-
- vm_page_lock_queues();
- counter(++c_vm_pageout_scan_continue);
- if (vm_page_laundry_count > vm_pageout_burst_min) {
- vm_pageout_burst_wait++;
- vm_pageout_pause_count = 0;
- } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
- vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
- if (vm_pageout_burst_wait < 1)
- vm_pageout_burst_wait = 1;
- vm_pageout_pause_count = 0;
- }
- vm_page_unlock_queues();
-}
-void vm_page_free_reserve(int pages);
int vm_page_free_count_init;
void
* vm_pageout is the high level pageout daemon.
*/
-
void
-vm_pageout(void)
+vm_pageout_continue(void)
{
- thread_t self = current_thread();
- spl_t s;
+ vm_pageout_scan_event_counter++;
+ vm_pageout_scan();
+ /* we hold vm_page_queue_free_lock now */
+ assert(vm_page_free_wanted == 0);
+ assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
+ mutex_unlock(&vm_page_queue_free_lock);
- /*
- * Set thread privileges.
- */
- self->vm_privilege = TRUE;
- stack_privilege(self);
+ counter(c_vm_pageout_block++);
+ thread_block((thread_continue_t)vm_pageout_continue);
+ /*NOTREACHED*/
+}
- s = splsched();
- thread_lock(self);
- self->priority = BASEPRI_PREEMPT - 1;
- set_sched_pri(self, self->priority);
- thread_unlock(self);
- splx(s);
- /*
- * Initialize some paging parameters.
- */
+/*
+ * must be called with the
+ * queues and object locks held
+ */
+static void
+vm_pageout_queue_steal(vm_page_t m)
+{
+ struct vm_pageout_queue *q;
- if (vm_page_laundry_max == 0)
- vm_page_laundry_max = VM_PAGE_LAUNDRY_MAX;
+ if (m->object->internal == TRUE)
+ q = &vm_pageout_queue_internal;
+ else
+ q = &vm_pageout_queue_external;
- if (vm_pageout_burst_max == 0)
- vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
+ m->laundry = FALSE;
+ m->pageout_queue = FALSE;
+ queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
- if (vm_pageout_burst_wait == 0)
- vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
+ m->pageq.next = NULL;
+ m->pageq.prev = NULL;
- if (vm_pageout_empty_wait == 0)
- vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
+ vm_object_paging_end(m->object);
- vm_page_free_count_init = vm_page_free_count;
- vm_zf_iterator = 0;
- /*
- * even if we've already called vm_page_free_reserve
- * call it again here to insure that the targets are
- * accurately calculated (it uses vm_page_free_count_init)
- * calling it with an arg of 0 will not change the reserve
- * but will re-calculate free_min and free_target
- */
- if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED)
- vm_page_free_reserve(VM_PAGE_FREE_RESERVED - vm_page_free_reserved);
- else
- vm_page_free_reserve(0);
-
- /*
- * vm_pageout_scan will set vm_page_inactive_target.
- *
- * The pageout daemon is never done, so loop forever.
- * We should call vm_pageout_scan at least once each
- * time we are woken, even if vm_page_free_wanted is
- * zero, to check vm_page_free_target and
- * vm_page_inactive_target.
- */
- for (;;) {
- vm_pageout_scan_event_counter++;
- vm_pageout_scan();
- /* we hold vm_page_queue_free_lock now */
- assert(vm_page_free_wanted == 0);
- assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
- mutex_unlock(&vm_page_queue_free_lock);
- counter(c_vm_pageout_block++);
- thread_block((void (*)(void)) 0);
- }
- /*NOTREACHED*/
+ q->pgo_laundry--;
}
-kern_return_t
-vm_pageout_emergency_availability_request()
+
+#ifdef FAKE_DEADLOCK
+
+#define FAKE_COUNT 5000
+
+int internal_count = 0;
+int fake_deadlock = 0;
+
+#endif
+
+static void
+vm_pageout_iothread_continue(struct vm_pageout_queue *q)
{
- vm_page_t m;
+ vm_page_t m = NULL;
vm_object_t object;
+ boolean_t need_wakeup;
vm_page_lock_queues();
- m = (vm_page_t) queue_first(&vm_page_queue_inactive);
- while (!queue_end(&vm_page_queue_inactive, (queue_entry_t) m)) {
- if(m->fictitious) {
- m = (vm_page_t) queue_next(&m->pageq);
- continue;
- }
- if (!m->dirty)
- m->dirty = pmap_is_modified(m->phys_page);
- if(m->dirty || m->busy || m->wire_count || m->absent
- || m->precious || m->cleaning
- || m->dump_cleaning || m->error
- || m->pageout || m->laundry
- || m->list_req_pending
- || m->overwriting) {
- m = (vm_page_t) queue_next(&m->pageq);
- continue;
- }
- object = m->object;
+ while ( !queue_empty(&q->pgo_pending) ) {
- if (vm_object_lock_try(object)) {
- if((!object->alive) ||
- (object->pageout)) {
- vm_object_unlock(object);
- m = (vm_page_t) queue_next(&m->pageq);
- continue;
- }
- m->busy = TRUE;
- pmap_page_protect(m->phys_page, VM_PROT_NONE);
- vm_page_free(m);
- vm_object_unlock(object);
- vm_page_unlock_queues();
- return KERN_SUCCESS;
- }
- m = (vm_page_t) queue_next(&m->pageq);
- }
+ q->pgo_busy = TRUE;
+ queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
+ m->pageout_queue = FALSE;
+ vm_page_unlock_queues();
- m = (vm_page_t) queue_first(&vm_page_queue_active);
+ m->pageq.next = NULL;
+ m->pageq.prev = NULL;
+#ifdef FAKE_DEADLOCK
+ if (q == &vm_pageout_queue_internal) {
+ vm_offset_t addr;
+ int pg_count;
- while (!queue_end(&vm_page_queue_active, (queue_entry_t) m)) {
- if(m->fictitious) {
- m = (vm_page_t) queue_next(&m->pageq);
- continue;
- }
- if (!m->dirty)
- m->dirty = pmap_is_modified(m->phys_page);
- if(m->dirty || m->busy || m->wire_count || m->absent
- || m->precious || m->cleaning
- || m->dump_cleaning || m->error
- || m->pageout || m->laundry
- || m->list_req_pending
- || m->overwriting) {
- m = (vm_page_t) queue_next(&m->pageq);
- continue;
- }
- object = m->object;
+ internal_count++;
- if (vm_object_lock_try(object)) {
- if((!object->alive) ||
- (object->pageout)) {
- vm_object_unlock(object);
- m = (vm_page_t) queue_next(&m->pageq);
- continue;
- }
- m->busy = TRUE;
- pmap_page_protect(m->phys_page, VM_PROT_NONE);
- vm_page_free(m);
- vm_object_unlock(object);
- vm_page_unlock_queues();
- return KERN_SUCCESS;
- }
- m = (vm_page_t) queue_next(&m->pageq);
+ if ((internal_count == FAKE_COUNT)) {
+
+ pg_count = vm_page_free_count + vm_page_free_reserved;
+
+ if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
+ kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
+ }
+ internal_count = 0;
+ fake_deadlock++;
+ }
+ }
+#endif
+ object = m->object;
+
+ if (!object->pager_initialized) {
+ vm_object_lock(object);
+
+ /*
+ * If there is no memory object for the page, create
+ * one and hand it to the default pager.
+ */
+
+ if (!object->pager_initialized)
+ vm_object_collapse(object,
+ (vm_object_offset_t) 0,
+ TRUE);
+ if (!object->pager_initialized)
+ vm_object_pager_create(object);
+ if (!object->pager_initialized) {
+ /*
+ * Still no pager for the object.
+ * Reactivate the page.
+ *
+ * Should only happen if there is no
+ * default pager.
+ */
+ m->list_req_pending = FALSE;
+ m->cleaning = FALSE;
+ m->pageout = FALSE;
+ vm_page_unwire(m);
+
+ vm_pageout_throttle_up(m);
+
+ vm_page_lock_queues();
+ vm_pageout_dirty_no_pager++;
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+
+ /*
+ * And we are done with it.
+ */
+ PAGE_WAKEUP_DONE(m);
+
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ vm_page_lock_queues();
+ continue;
+ } else if (object->pager == MEMORY_OBJECT_NULL) {
+ /*
+ * This pager has been destroyed by either
+ * memory_object_destroy or vm_object_destroy, and
+ * so there is nowhere for the page to go.
+ * Just free the page... VM_PAGE_FREE takes
+ * care of cleaning up all the state...
+ * including doing the vm_pageout_throttle_up
+ */
+ VM_PAGE_FREE(m);
+
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ vm_page_lock_queues();
+ continue;
+ }
+ vm_object_unlock(object);
+ }
+ /*
+ * we expect the paging_in_progress reference to have
+ * already been taken on the object before it was added
+ * to the appropriate pageout I/O queue... this will
+ * keep the object from being terminated and/or the
+ * paging_offset from changing until the I/O has
+ * completed... therefore no need to lock the object to
+ * pull the paging_offset from it.
+ *
+ * Send the data to the pager.
+ * any pageout clustering happens there
+ */
+ memory_object_data_return(object->pager,
+ m->offset + object->paging_offset,
+ PAGE_SIZE,
+ NULL,
+ NULL,
+ FALSE,
+ FALSE,
+ 0);
+
+ vm_object_lock(object);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ vm_page_lock_queues();
}
+ assert_wait((event_t) q, THREAD_UNINT);
+
+
+ if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) {
+ q->pgo_throttled = FALSE;
+ need_wakeup = TRUE;
+ } else
+ need_wakeup = FALSE;
+
+ q->pgo_busy = FALSE;
+ q->pgo_idle = TRUE;
vm_page_unlock_queues();
- return KERN_FAILURE;
+
+ if (need_wakeup == TRUE)
+ thread_wakeup((event_t) &q->pgo_laundry);
+
+ thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending);
+ /*NOTREACHED*/
+}
+
+
+static void
+vm_pageout_iothread_external(void)
+{
+
+ vm_pageout_iothread_continue(&vm_pageout_queue_external);
+ /*NOTREACHED*/
+}
+
+
+static void
+vm_pageout_iothread_internal(void)
+{
+ thread_t self = current_thread();
+
+ self->options |= TH_OPT_VMPRIV;
+
+ vm_pageout_iothread_continue(&vm_pageout_queue_internal);
+ /*NOTREACHED*/
+}
+
+static void
+vm_pageout_garbage_collect(int collect)
+{
+ if (collect) {
+ stack_collect();
+
+ /*
+ * consider_zone_gc should be last, because the other operations
+ * might return memory to zones.
+ */
+ consider_machine_collect();
+ consider_zone_gc();
+
+ consider_machine_adjust();
+ }
+
+ assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
+
+ thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
+ /*NOTREACHED*/
+}
+
+
+
+void
+vm_pageout(void)
+{
+ thread_t self = current_thread();
+ thread_t thread;
+ kern_return_t result;
+ spl_t s;
+
+ /*
+ * Set thread privileges.
+ */
+ s = splsched();
+ thread_lock(self);
+ self->priority = BASEPRI_PREEMPT - 1;
+ set_sched_pri(self, self->priority);
+ thread_unlock(self);
+ splx(s);
+
+ /*
+ * Initialize some paging parameters.
+ */
+
+ if (vm_pageout_idle_wait == 0)
+ vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
+
+ if (vm_pageout_burst_wait == 0)
+ vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
+
+ if (vm_pageout_empty_wait == 0)
+ vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
+
+ if (vm_pageout_deadlock_wait == 0)
+ vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
+
+ if (vm_pageout_deadlock_relief == 0)
+ vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
+
+ if (vm_pageout_inactive_relief == 0)
+ vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
+
+ if (vm_pageout_burst_active_throttle == 0)
+ vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
+
+ if (vm_pageout_burst_inactive_throttle == 0)
+ vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
+
+ /*
+ * Set kernel task to low backing store privileged
+ * status
+ */
+ task_lock(kernel_task);
+ kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
+ task_unlock(kernel_task);
+
+ vm_page_free_count_init = vm_page_free_count;
+ vm_zf_iterator = 0;
+ /*
+ * even if we've already called vm_page_free_reserve
+ * call it again here to insure that the targets are
+ * accurately calculated (it uses vm_page_free_count_init)
+ * calling it with an arg of 0 will not change the reserve
+ * but will re-calculate free_min and free_target
+ */
+ if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
+ vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
+ } else
+ vm_page_free_reserve(0);
+
+
+ queue_init(&vm_pageout_queue_external.pgo_pending);
+ vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
+ vm_pageout_queue_external.pgo_laundry = 0;
+ vm_pageout_queue_external.pgo_idle = FALSE;
+ vm_pageout_queue_external.pgo_busy = FALSE;
+ vm_pageout_queue_external.pgo_throttled = FALSE;
+
+ queue_init(&vm_pageout_queue_internal.pgo_pending);
+ vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
+ vm_pageout_queue_internal.pgo_laundry = 0;
+ vm_pageout_queue_internal.pgo_idle = FALSE;
+ vm_pageout_queue_internal.pgo_busy = FALSE;
+ vm_pageout_queue_internal.pgo_throttled = FALSE;
+
+
+ result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &thread);
+ if (result != KERN_SUCCESS)
+ panic("vm_pageout_iothread_internal: create failed");
+
+ thread_deallocate(thread);
+
+
+ result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL, BASEPRI_PREEMPT - 1, &thread);
+ if (result != KERN_SUCCESS)
+ panic("vm_pageout_iothread_external: create failed");
+
+ thread_deallocate(thread);
+
+
+ result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL, BASEPRI_PREEMPT - 2, &thread);
+ if (result != KERN_SUCCESS)
+ panic("vm_pageout_garbage_collect: create failed");
+
+ thread_deallocate(thread);
+
+ vm_object_reaper_init();
+
+ vm_pageout_continue();
+ /*NOTREACHED*/
}
static upl_t
upl_create(
- int flags,
- vm_size_t size)
+ int flags,
+ upl_size_t size)
{
upl_t upl;
int page_field_size; /* bit field in word size buf */
upl->size = 0;
upl->map_object = NULL;
upl->ref_count = 1;
+ upl->highest_page = 0;
upl_lock_init(upl);
-#ifdef UBC_DEBUG
+#ifdef UPL_DEBUG
upl->ubc_alias1 = 0;
upl->ubc_alias2 = 0;
-#endif /* UBC_DEBUG */
+#endif /* UPL_DEBUG */
return(upl);
}
{
int page_field_size; /* bit field in word size buf */
-#ifdef UBC_DEBUG
+#ifdef UPL_DEBUG
{
upl_t upl_ele;
vm_object_t object;
}
vm_object_unlock(object);
}
-#endif /* UBC_DEBUG */
+#endif /* UPL_DEBUG */
/* drop a reference on the map_object whether or */
/* not a pageout object is inserted */
if(upl->map_object->pageout)
page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
}
if(upl->flags & UPL_INTERNAL) {
- kfree((vm_offset_t)upl,
- sizeof(struct upl) +
- (sizeof(struct upl_page_info) * (upl->size/PAGE_SIZE))
- + page_field_size);
+ kfree(upl,
+ sizeof(struct upl) +
+ (sizeof(struct upl_page_info) * (upl->size/PAGE_SIZE))
+ + page_field_size);
} else {
- kfree((vm_offset_t)upl, sizeof(struct upl) + page_field_size);
+ kfree(upl, sizeof(struct upl) + page_field_size);
}
}
+void uc_upl_dealloc(upl_t upl);
__private_extern__ void
uc_upl_dealloc(
upl_t upl)
}
}
+/*
+ * Statistics about UPL enforcement of copy-on-write obligations.
+ */
+unsigned long upl_cow = 0;
+unsigned long upl_cow_again = 0;
+unsigned long upl_cow_contiguous = 0;
+unsigned long upl_cow_pages = 0;
+unsigned long upl_cow_again_pages = 0;
+unsigned long upl_cow_contiguous_pages = 0;
+
/*
* Routine: vm_object_upl_request
* Purpose:
* the vm_objects (cache objects), they support.
*
*/
+
__private_extern__ kern_return_t
vm_object_upl_request(
vm_object_t object,
- vm_object_offset_t offset,
- vm_size_t size,
+ vm_object_offset_t offset,
+ upl_size_t size,
upl_t *upl_ptr,
upl_page_info_array_t user_page_list,
unsigned int *page_list_count,
- int cntrl_flags)
+ int cntrl_flags)
{
- vm_page_t dst_page;
+ vm_page_t dst_page = VM_PAGE_NULL;
vm_object_offset_t dst_offset = offset;
- vm_size_t xfer_size = size;
+ upl_size_t xfer_size = size;
boolean_t do_m_lock = FALSE;
boolean_t dirty;
+ boolean_t hw_dirty;
upl_t upl = NULL;
- int entry;
+ unsigned int entry;
+#if MACH_CLUSTER_STATS
boolean_t encountered_lrp = FALSE;
-
+#endif
vm_page_t alias_page = NULL;
int page_ticket;
- wpl_array_t lite_list;
+ int refmod_state;
+ wpl_array_t lite_list = NULL;
+ vm_object_t last_copy_object;
+
+
+ if (cntrl_flags & ~UPL_VALID_FLAGS) {
+ /*
+ * For forward compatibility's sake,
+ * reject any unknown flag.
+ */
+ return KERN_INVALID_VALUE;
+ }
page_ticket = (cntrl_flags & UPL_PAGE_TICKET_MASK)
>> UPL_PAGE_TICKET_SHIFT;
if(cntrl_flags & UPL_SET_INTERNAL)
if(page_list_count != NULL)
*page_list_count = MAX_UPL_TRANSFER;
- if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
- ((page_list_count != NULL) && (*page_list_count != 0)
- && *page_list_count < (size/page_size)))
- return KERN_INVALID_ARGUMENT;
if((!object->internal) && (object->paging_offset != 0))
- panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
+ panic("vm_object_upl_request: external object with non-zero paging offset\n");
if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) {
return KERN_SUCCESS;
}
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ vm_object_unlock(object);
+
if(upl_ptr) {
if(cntrl_flags & UPL_SET_INTERNAL) {
if(cntrl_flags & UPL_SET_LITE) {
- vm_offset_t page_field_size;
+ uintptr_t page_field_size;
upl = upl_create(
UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
size);
user_page_list = (upl_page_info_t *)
- (((vm_offset_t)upl) + sizeof(struct upl));
+ (((uintptr_t)upl) + sizeof(struct upl));
lite_list = (wpl_array_t)
- (((vm_offset_t)user_page_list) +
+ (((uintptr_t)user_page_list) +
((size/PAGE_SIZE) *
sizeof(upl_page_info_t)));
page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
} else {
upl = upl_create(UPL_CREATE_INTERNAL, size);
user_page_list = (upl_page_info_t *)
- (((vm_offset_t)upl)
- + sizeof(struct upl));
+ (((uintptr_t)upl) + sizeof(struct upl));
upl->flags = UPL_INTERNAL;
}
} else {
if(cntrl_flags & UPL_SET_LITE) {
- vm_offset_t page_field_size;
+ uintptr_t page_field_size;
upl = upl_create(UPL_CREATE_LITE, size);
lite_list = (wpl_array_t)
- (((vm_offset_t)upl) + sizeof(struct upl));
+ (((uintptr_t)upl) + sizeof(struct upl));
page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
page_field_size =
(page_field_size + 3) & 0xFFFFFFFC;
}
}
- if(object->phys_contiguous) {
- upl->size = size;
- upl->offset = offset + object->paging_offset;
- *upl_ptr = upl;
- if(user_page_list) {
- user_page_list[0].phys_addr =
- (offset + object->shadow_offset)>>12;
- user_page_list[0].device = TRUE;
+ if (object->phys_contiguous) {
+ if ((cntrl_flags & UPL_WILL_MODIFY) &&
+ object->copy != VM_OBJECT_NULL) {
+ /* Honor copy-on-write obligations */
+
+ /*
+ * XXX FBDP
+ * We could still have a race...
+ * A is here building the UPL for a write().
+ * A pushes the pages to the current copy
+ * object.
+ * A returns the UPL to the caller.
+ * B comes along and establishes another
+ * private mapping on this object, inserting
+ * a new copy object between the original
+ * object and the old copy object.
+ * B reads a page and gets the original contents
+ * from the original object.
+ * A modifies the page in the original object.
+ * B reads the page again and sees A's changes,
+ * which is wrong...
+ *
+ * The problem is that the pages are not
+ * marked "busy" in the original object, so
+ * nothing prevents B from reading it before
+ * before A's changes are completed.
+ *
+ * The "paging_in_progress" might protect us
+ * from the insertion of a new copy object
+ * though... To be verified.
+ */
+ vm_object_lock_request(object,
+ offset,
+ size,
+ FALSE,
+ MEMORY_OBJECT_COPY_SYNC,
+ VM_PROT_NO_CHANGE);
+ upl_cow_contiguous++;
+ upl_cow_contiguous_pages += size >> PAGE_SHIFT;
}
+
upl->map_object = object;
/* don't need any shadow mappings for this one */
/* since it is already I/O memory */
upl->flags |= UPL_DEVICE_MEMORY;
- vm_object_lock(object);
- vm_object_paging_begin(object);
- vm_object_unlock(object);
+
+ /* paging_in_progress protects paging_offset */
+ upl->offset = offset + object->paging_offset;
+ upl->size = size;
+ *upl_ptr = upl;
+ if(user_page_list) {
+ user_page_list[0].phys_addr =
+ (offset + object->shadow_offset)>>PAGE_SHIFT;
+ user_page_list[0].device = TRUE;
+ }
+ upl->highest_page = (offset + object->shadow_offset + size - 1)>>PAGE_SHIFT;
if(page_list_count != NULL) {
if (upl->flags & UPL_INTERNAL) {
*page_list_count = 1;
}
}
+
return KERN_SUCCESS;
}
+ if(user_page_list)
+ user_page_list[0].device = FALSE;
+
if(cntrl_flags & UPL_SET_LITE) {
upl->map_object = object;
} else {
upl->map_object = vm_object_allocate(size);
- vm_object_lock(upl->map_object);
+ /*
+ * No neeed to lock the new object: nobody else knows
+ * about it yet, so it's all ours so far.
+ */
upl->map_object->shadow = object;
upl->map_object->pageout = TRUE;
upl->map_object->can_persist = FALSE;
MEMORY_OBJECT_COPY_NONE;
upl->map_object->shadow_offset = offset;
upl->map_object->wimg_bits = object->wimg_bits;
- vm_object_unlock(upl->map_object);
}
- upl->size = size;
- upl->offset = offset + object->paging_offset;
- *upl_ptr = upl;
+
}
if (!(cntrl_flags & UPL_SET_LITE)) {
VM_PAGE_GRAB_FICTITIOUS(alias_page);
}
+
+ /*
+ * ENCRYPTED SWAP:
+ * Just mark the UPL as "encrypted" here.
+ * We'll actually encrypt the pages later,
+ * in upl_encrypt(), when the caller has
+ * selected which pages need to go to swap.
+ */
+ if (cntrl_flags & UPL_ENCRYPT) {
+ upl->flags |= UPL_ENCRYPTED;
+ }
+ if (cntrl_flags & UPL_FOR_PAGEOUT) {
+ upl->flags |= UPL_PAGEOUT;
+ }
vm_object_lock(object);
-#ifdef UBC_DEBUG
- if(upl_ptr)
+
+ /* we can lock in the paging_offset once paging_in_progress is set */
+ if(upl_ptr) {
+ upl->size = size;
+ upl->offset = offset + object->paging_offset;
+ *upl_ptr = upl;
+#ifdef UPL_DEBUG
queue_enter(&object->uplq, upl, upl_t, uplq);
-#endif /* UBC_DEBUG */
- vm_object_paging_begin(object);
+#endif /* UPL_DEBUG */
+ }
+
+ if ((cntrl_flags & UPL_WILL_MODIFY) &&
+ object->copy != VM_OBJECT_NULL) {
+ /* Honor copy-on-write obligations */
+
+ /*
+ * The caller is gathering these pages and
+ * might modify their contents. We need to
+ * make sure that the copy object has its own
+ * private copies of these pages before we let
+ * the caller modify them.
+ */
+ vm_object_update(object,
+ offset,
+ size,
+ NULL,
+ NULL,
+ FALSE, /* should_return */
+ MEMORY_OBJECT_COPY_SYNC,
+ VM_PROT_NO_CHANGE);
+ upl_cow++;
+ upl_cow_pages += size >> PAGE_SHIFT;
+
+ }
+ /* remember which copy object we synchronized with */
+ last_copy_object = object->copy;
+
entry = 0;
if(cntrl_flags & UPL_COPYOUT_FROM) {
upl->flags |= UPL_PAGE_SYNC_DONE;
+
while (xfer_size) {
if((alias_page == NULL) &&
!(cntrl_flags & UPL_SET_LITE)) {
VM_PAGE_GRAB_FICTITIOUS(alias_page);
vm_object_lock(object);
}
- if(((dst_page = vm_page_lookup(object,
- dst_offset)) == VM_PAGE_NULL) ||
+ if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
dst_page->fictitious ||
dst_page->absent ||
dst_page->error ||
- (dst_page->wire_count != 0 &&
- !dst_page->pageout) ||
- ((!(dst_page->dirty || dst_page->precious ||
- pmap_is_modified(dst_page->phys_page)))
- && (cntrl_flags & UPL_RET_ONLY_DIRTY)) ||
- ((!(dst_page->inactive))
- && (dst_page->page_ticket != page_ticket)
- && ((dst_page->page_ticket+1) != page_ticket)
- && (cntrl_flags & UPL_FOR_PAGEOUT)) ||
- ((!dst_page->list_req_pending) &&
- (cntrl_flags & UPL_RET_ONLY_DIRTY) &&
- pmap_is_referenced(dst_page->phys_page))) {
- if(user_page_list) {
+ (dst_page->wire_count && !dst_page->pageout) ||
+
+ ((!dst_page->inactive) && (cntrl_flags & UPL_FOR_PAGEOUT) &&
+ (dst_page->page_ticket != page_ticket) &&
+ ((dst_page->page_ticket+1) != page_ticket)) ) {
+
+ if (user_page_list)
user_page_list[entry].phys_addr = 0;
- user_page_list[entry].device = FALSE;
- }
- } else {
-
- if(dst_page->busy &&
- (!(dst_page->list_req_pending &&
- dst_page->pageout))) {
- if(cntrl_flags & UPL_NOBLOCK) {
- if(user_page_list) {
+ } else {
+ /*
+ * grab this up front...
+ * a high percentange of the time we're going to
+ * need the hardware modification state a bit later
+ * anyway... so we can eliminate an extra call into
+ * the pmap layer by grabbing it here and recording it
+ */
+ refmod_state = pmap_get_refmod(dst_page->phys_page);
+
+ if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
+ /*
+ * we're only asking for DIRTY pages to be returned
+ */
+
+ if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
+ /*
+ * if we were the page stolen by vm_pageout_scan to be
+ * cleaned (as opposed to a buddy being clustered in
+ * or this request is not being driven by a PAGEOUT cluster
+ * then we only need to check for the page being diry or
+ * precious to decide whether to return it
+ */
+ if (dst_page->dirty || dst_page->precious ||
+ (refmod_state & VM_MEM_MODIFIED)) {
+ goto check_busy;
+ }
+ }
+ /*
+ * this is a request for a PAGEOUT cluster and this page
+ * is merely along for the ride as a 'buddy'... not only
+ * does it have to be dirty to be returned, but it also
+ * can't have been referenced recently... note that we've
+ * already filtered above based on whether this page is
+ * currently on the inactive queue or it meets the page
+ * ticket (generation count) check
+ */
+ if ( !(refmod_state & VM_MEM_REFERENCED) &&
+ ((refmod_state & VM_MEM_MODIFIED) ||
+ dst_page->dirty || dst_page->precious) ) {
+ goto check_busy;
+ }
+ /*
+ * if we reach here, we're not to return
+ * the page... go on to the next one
+ */
+ if (user_page_list)
+ user_page_list[entry].phys_addr = 0;
+ entry++;
+ dst_offset += PAGE_SIZE_64;
+ xfer_size -= PAGE_SIZE;
+ continue;
+ }
+check_busy:
+ if(dst_page->busy &&
+ (!(dst_page->list_req_pending &&
+ dst_page->pageout))) {
+ if(cntrl_flags & UPL_NOBLOCK) {
+ if(user_page_list) {
user_page_list[entry].phys_addr = 0;
- user_page_list[entry].device = FALSE;
}
entry++;
dst_offset += PAGE_SIZE_64;
xfer_size -= PAGE_SIZE;
continue;
}
- /*someone else is playing with the */
- /* page. We will have to wait. */
+ /*
+ * someone else is playing with the
+ * page. We will have to wait.
+ */
PAGE_SLEEP(object, dst_page, THREAD_UNINT);
continue;
}
!dst_page->list_req_pending) {
if(user_page_list) {
user_page_list[entry].phys_addr = 0;
- user_page_list[entry].device = FALSE;
}
entry++;
dst_offset += PAGE_SIZE_64;
/* original object and its prodigy */
vm_page_lock_queues();
- if( !(cntrl_flags & UPL_FILE_IO)) {
- pmap_page_protect(dst_page->phys_page, VM_PROT_NONE);
- }
+
+ if (dst_page->pageout_queue == TRUE)
+ /*
+ * we've buddied up a page for a clustered pageout
+ * that has already been moved to the pageout
+ * queue by pageout_scan... we need to remove
+ * it from the queue and drop the laundry count
+ * on that queue
+ */
+ vm_pageout_queue_steal(dst_page);
+#if MACH_CLUSTER_STATS
/* pageout statistics gathering. count */
/* all the pages we will page out that */
/* were not counted in the initial */
(pages_at_lower_offsets++;)
}
}
-
+#endif
/* Turn off busy indication on pending */
/* pageout. Note: we can only get here */
/* in the request pending case. */
dst_page->busy = FALSE;
dst_page->cleaning = FALSE;
- dirty = pmap_is_modified(dst_page->phys_page);
- dirty = dirty ? TRUE : dst_page->dirty;
+ hw_dirty = refmod_state & VM_MEM_MODIFIED;
+ dirty = hw_dirty ? TRUE : dst_page->dirty;
if(cntrl_flags & UPL_SET_LITE) {
int pg_num;
pg_num = (dst_offset-offset)/PAGE_SIZE;
lite_list[pg_num>>5] |=
1 << (pg_num & 31);
- pmap_clear_modify(dst_page->phys_page);
+ if (hw_dirty)
+ pmap_clear_modify(dst_page->phys_page);
/*
* Record that this page has been
* written out
/* use pageclean setup, it is more */
/* convenient even for the pageout */
/* cases here */
+
+ vm_object_lock(upl->map_object);
vm_pageclean_setup(dst_page,
alias_page, upl->map_object,
size - xfer_size);
+ vm_object_unlock(upl->map_object);
alias_page->absent = FALSE;
alias_page = NULL;
if(dst_page->pageout)
dst_page->busy = TRUE;
- if((!(cntrl_flags & UPL_CLEAN_IN_PLACE))
- || (cntrl_flags & UPL_FOR_PAGEOUT)) {
- /* deny access to the target page */
- /* while it is being worked on */
- if((!dst_page->pageout) &&
- (dst_page->wire_count == 0)) {
+ if ( (cntrl_flags & UPL_ENCRYPT) ) {
+ /*
+ * ENCRYPTED SWAP:
+ * We want to deny access to the target page
+ * because its contents are about to be
+ * encrypted and the user would be very
+ * confused to see encrypted data instead
+ * of their data.
+ */
+ dst_page->busy = TRUE;
+ }
+ if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
+ /*
+ * deny access to the target page
+ * while it is being worked on
+ */
+ if ((!dst_page->pageout) &&
+ (dst_page->wire_count == 0)) {
dst_page->busy = TRUE;
dst_page->pageout = TRUE;
vm_page_wire(dst_page);
}
}
+
+ if (dst_page->phys_page > upl->highest_page)
+ upl->highest_page = dst_page->phys_page;
+
if(user_page_list) {
user_page_list[entry].phys_addr
= dst_page->phys_page;
dst_page->absent;
user_page_list[entry].precious =
dst_page->precious;
- user_page_list[entry].device =
- FALSE;
}
-
vm_page_unlock_queues();
+
+ /*
+ * ENCRYPTED SWAP:
+ * The caller is gathering this page and might
+ * access its contents later on. Decrypt the
+ * page before adding it to the UPL, so that
+ * the caller never sees encrypted data.
+ */
+ if (! (cntrl_flags & UPL_ENCRYPT) &&
+ dst_page->encrypted) {
+ assert(dst_page->busy);
+
+ vm_page_decrypt(dst_page, 0);
+ vm_page_decrypt_for_upl_counter++;
+
+ /*
+ * Retry this page, since anything
+ * could have changed while we were
+ * decrypting.
+ */
+ continue;
+ }
}
entry++;
dst_offset += PAGE_SIZE_64;
VM_PAGE_GRAB_FICTITIOUS(alias_page);
vm_object_lock(object);
}
+
+ if ((cntrl_flags & UPL_WILL_MODIFY) &&
+ object->copy != last_copy_object) {
+ /* Honor copy-on-write obligations */
+
+ /*
+ * The copy object has changed since we
+ * last synchronized for copy-on-write.
+ * Another copy object might have been
+ * inserted while we released the object's
+ * lock. Since someone could have seen the
+ * original contents of the remaining pages
+ * through that new object, we have to
+ * synchronize with it again for the remaining
+ * pages only. The previous pages are "busy"
+ * so they can not be seen through the new
+ * mapping. The new mapping will see our
+ * upcoming changes for those previous pages,
+ * but that's OK since they couldn't see what
+ * was there before. It's just a race anyway
+ * and there's no guarantee of consistency or
+ * atomicity. We just don't want new mappings
+ * to see both the *before* and *after* pages.
+ */
+ if (object->copy != VM_OBJECT_NULL) {
+ vm_object_update(
+ object,
+ dst_offset,/* current offset */
+ xfer_size, /* remaining size */
+ NULL,
+ NULL,
+ FALSE, /* should_return */
+ MEMORY_OBJECT_COPY_SYNC,
+ VM_PROT_NO_CHANGE);
+ upl_cow_again++;
+ upl_cow_again_pages +=
+ xfer_size >> PAGE_SHIFT;
+ }
+ /* remember the copy object we synced with */
+ last_copy_object = object->copy;
+ }
+
dst_page = vm_page_lookup(object, dst_offset);
+
if(dst_page != VM_PAGE_NULL) {
if((cntrl_flags & UPL_RET_ONLY_ABSENT) &&
!((dst_page->list_req_pending)
/* already present. */
if(user_page_list) {
user_page_list[entry].phys_addr = 0;
- user_page_list[entry].device = FALSE;
}
entry++;
dst_offset += PAGE_SIZE_64;
/* dump the fictitious page */
dst_page->list_req_pending = FALSE;
dst_page->clustered = FALSE;
+
vm_page_lock_queues();
vm_page_free(dst_page);
vm_page_unlock_queues();
+
+ dst_page = NULL;
} else if ((dst_page->absent &&
dst_page->list_req_pending)) {
/* the default_pager case */
dst_page->list_req_pending = FALSE;
dst_page->busy = FALSE;
- dst_page->clustered = FALSE;
}
}
- if((dst_page = vm_page_lookup(object, dst_offset)) ==
- VM_PAGE_NULL) {
+ if(dst_page == VM_PAGE_NULL) {
if(object->private) {
/*
* This is a nasty wrinkle for users
*/
if(user_page_list) {
user_page_list[entry].phys_addr = 0;
- user_page_list[entry].device = FALSE;
}
entry++;
dst_offset += PAGE_SIZE_64;
dst_page->unlock_request = 0;
}
#endif
+ if(cntrl_flags & UPL_RET_ONLY_ABSENT) {
+ /*
+ * if UPL_RET_ONLY_ABSENT was specified,
+ * than we're definitely setting up a
+ * upl for a clustered read/pagein
+ * operation... mark the pages as clustered
+ * so vm_fault can correctly attribute them
+ * to the 'pagein' bucket the first time
+ * a fault happens on them
+ */
+ dst_page->clustered = TRUE;
+ }
dst_page->absent = TRUE;
object->absent_count++;
}
dst_page->unlock_request = 0;
}
#endif /* 1 */
+
+ /*
+ * ENCRYPTED SWAP:
+ */
+ if (cntrl_flags & UPL_ENCRYPT) {
+ /*
+ * The page is going to be encrypted when we
+ * get it from the pager, so mark it so.
+ */
+ dst_page->encrypted = TRUE;
+ } else {
+ /*
+ * Otherwise, the page will not contain
+ * encrypted data.
+ */
+ dst_page->encrypted = FALSE;
+ }
+
dst_page->overwriting = TRUE;
if(dst_page->fictitious) {
panic("need corner case for fictitious page");
PAGE_SLEEP(object, dst_page, THREAD_UNINT);
continue;
}
-
vm_page_lock_queues();
- if( !(cntrl_flags & UPL_FILE_IO)) {
- pmap_page_protect(dst_page->phys_page, VM_PROT_NONE);
- }
- dirty = pmap_is_modified(dst_page->phys_page);
- dirty = dirty ? TRUE : dst_page->dirty;
+
+ if( !(cntrl_flags & UPL_FILE_IO))
+ hw_dirty = pmap_disconnect(dst_page->phys_page) & VM_MEM_MODIFIED;
+ else
+ hw_dirty = pmap_get_refmod(dst_page->phys_page) & VM_MEM_MODIFIED;
+ dirty = hw_dirty ? TRUE : dst_page->dirty;
if(cntrl_flags & UPL_SET_LITE) {
int pg_num;
pg_num = (dst_offset-offset)/PAGE_SIZE;
lite_list[pg_num>>5] |=
1 << (pg_num & 31);
- pmap_clear_modify(dst_page->phys_page);
+ if (hw_dirty)
+ pmap_clear_modify(dst_page->phys_page);
/*
* Record that this page has been
* written out
/* use pageclean setup, it is more */
/* convenient even for the pageout */
/* cases here */
+ vm_object_lock(upl->map_object);
vm_pageclean_setup(dst_page,
alias_page, upl->map_object,
size - xfer_size);
+ vm_object_unlock(upl->map_object);
alias_page->absent = FALSE;
alias_page = NULL;
} else {
vm_page_wire(dst_page);
}
- /* expect the page to be used */
- dst_page->reference = TRUE;
+ if(cntrl_flags & UPL_RET_ONLY_ABSENT) {
+ /*
+ * expect the page not to be used
+ * since it's coming in as part
+ * of a cluster and could be
+ * speculative... pages that
+ * are 'consumed' will get a
+ * hardware reference
+ */
+ dst_page->reference = FALSE;
+ } else {
+ /*
+ * expect the page to be used
+ */
+ dst_page->reference = TRUE;
+ }
dst_page->precious =
(cntrl_flags & UPL_PRECIOUS)
? TRUE : FALSE;
+
+ if (dst_page->phys_page > upl->highest_page)
+ upl->highest_page = dst_page->phys_page;
+
if(user_page_list) {
user_page_list[entry].phys_addr
= dst_page->phys_page;
dst_page->absent;
user_page_list[entry].precious =
dst_page->precious;
- user_page_list[entry].device =
- FALSE;
}
vm_page_unlock_queues();
}
-
entry++;
dst_offset += PAGE_SIZE_64;
xfer_size -= PAGE_SIZE;
THREAD_UNINT);
if (wait_result != THREAD_AWAKENED) {
vm_object_unlock(object);
- return(KERN_FAILURE);
+ return KERN_FAILURE;
}
continue;
}
vm_object_unlock(object);
-
- if (rc = memory_object_data_unlock(
+ rc = memory_object_data_unlock(
object->pager,
dst_offset + object->paging_offset,
size,
- access_required)) {
- if (rc == MACH_SEND_INTERRUPTED)
- continue;
- else
- return KERN_FAILURE;
- }
- break;
-
+ access_required);
+ if (rc != KERN_SUCCESS && rc != MACH_SEND_INTERRUPTED)
+ return KERN_FAILURE;
+ vm_object_lock(object);
+
+ if (rc == KERN_SUCCESS)
+ break;
}
+
/* lets wait on the last page requested */
/* NOTE: we will have to update lock completed routine to signal */
if(dst_page != VM_PAGE_NULL &&
(access_required & dst_page->page_lock) != access_required) {
PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
- thread_block((void (*)(void))0);
- vm_object_lock(object);
+ vm_object_unlock(object);
+ thread_block(THREAD_CONTINUE_NULL);
+ return KERN_SUCCESS;
}
}
+
vm_object_unlock(object);
return KERN_SUCCESS;
}
/* JMM - Backward compatability for now */
kern_return_t
+vm_fault_list_request( /* forward */
+ memory_object_control_t control,
+ vm_object_offset_t offset,
+ upl_size_t size,
+ upl_t *upl_ptr,
+ upl_page_info_t **user_page_list_ptr,
+ int page_list_count,
+ int cntrl_flags);
+kern_return_t
vm_fault_list_request(
memory_object_control_t control,
vm_object_offset_t offset,
- vm_size_t size,
+ upl_size_t size,
upl_t *upl_ptr,
upl_page_info_t **user_page_list_ptr,
int page_list_count,
int cntrl_flags)
{
- int local_list_count;
+ unsigned int local_list_count;
upl_page_info_t *user_page_list;
kern_return_t kr;
vm_object_super_upl_request(
vm_object_t object,
vm_object_offset_t offset,
- vm_size_t size,
- vm_size_t super_cluster,
+ upl_size_t size,
+ upl_size_t super_cluster,
upl_t *upl,
upl_page_info_t *user_page_list,
unsigned int *page_list_count,
vm_page_t target_page;
int ticket;
+
if(object->paging_offset > offset)
return KERN_FAILURE;
+ assert(object->paging_in_progress);
offset = offset - object->paging_offset;
+
if(cntrl_flags & UPL_FOR_PAGEOUT) {
+
+ vm_object_lock(object);
+
if((target_page = vm_page_lookup(object, offset))
!= VM_PAGE_NULL) {
ticket = target_page->page_ticket;
((ticket << UPL_PAGE_TICKET_SHIFT)
& UPL_PAGE_TICKET_MASK);
}
+ vm_object_unlock(object);
}
-
-/* turns off super cluster exercised by the default_pager */
-/*
-super_cluster = size;
-*/
- if ((super_cluster > size) &&
- (vm_page_free_count > vm_page_free_reserved)) {
+ if (super_cluster > size) {
vm_object_offset_t base_offset;
- vm_size_t super_size;
+ upl_size_t super_size;
base_offset = (offset &
~((vm_object_offset_t) super_cluster - 1));
super_size = ((base_offset + super_size) > object->size) ?
(object->size - base_offset) : super_size;
if(offset > (base_offset + super_size))
- panic("vm_object_super_upl_request: Missed target pageout 0x%x,0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n", offset, base_offset, super_size, super_cluster, size, object->paging_offset);
- /* apparently there is a case where the vm requests a */
- /* page to be written out who's offset is beyond the */
- /* object size */
+ panic("vm_object_super_upl_request: Missed target pageout"
+ " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
+ offset, base_offset, super_size, super_cluster,
+ size, object->paging_offset);
+ /*
+ * apparently there is a case where the vm requests a
+ * page to be written out who's offset is beyond the
+ * object size
+ */
if((offset + size) > (base_offset + super_size))
super_size = (offset + size) - base_offset;
offset = base_offset;
size = super_size;
}
- vm_object_upl_request(object, offset, size,
- upl, user_page_list, page_list_count,
- cntrl_flags);
+ return vm_object_upl_request(object, offset, size,
+ upl, user_page_list, page_list_count,
+ cntrl_flags);
}
+
+kern_return_t
+vm_map_create_upl(
+ vm_map_t map,
+ vm_map_address_t offset,
+ upl_size_t *upl_size,
+ upl_t *upl,
+ upl_page_info_array_t page_list,
+ unsigned int *count,
+ int *flags)
+{
+ vm_map_entry_t entry;
+ int caller_flags;
+ int force_data_sync;
+ int sync_cow_data;
+ vm_object_t local_object;
+ vm_map_offset_t local_offset;
+ vm_map_offset_t local_start;
+ kern_return_t ret;
+
+ caller_flags = *flags;
+
+ if (caller_flags & ~UPL_VALID_FLAGS) {
+ /*
+ * For forward compatibility's sake,
+ * reject any unknown flag.
+ */
+ return KERN_INVALID_VALUE;
+ }
+
+ force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
+ sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
+
+ if(upl == NULL)
+ return KERN_INVALID_ARGUMENT;
+
+
+REDISCOVER_ENTRY:
+ vm_map_lock(map);
+ if (vm_map_lookup_entry(map, offset, &entry)) {
+ if (entry->object.vm_object == VM_OBJECT_NULL ||
+ !entry->object.vm_object->phys_contiguous) {
+ if((*upl_size/page_size) > MAX_UPL_TRANSFER) {
+ *upl_size = MAX_UPL_TRANSFER * page_size;
+ }
+ }
+ if((entry->vme_end - offset) < *upl_size) {
+ *upl_size = entry->vme_end - offset;
+ }
+ if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
+ if (entry->object.vm_object == VM_OBJECT_NULL) {
+ *flags = 0;
+ } else if (entry->object.vm_object->private) {
+ *flags = UPL_DEV_MEMORY;
+ if (entry->object.vm_object->phys_contiguous) {
+ *flags |= UPL_PHYS_CONTIG;
+ }
+ } else {
+ *flags = 0;
+ }
+ vm_map_unlock(map);
+ return KERN_SUCCESS;
+ }
+ /*
+ * Create an object if necessary.
+ */
+ if (entry->object.vm_object == VM_OBJECT_NULL) {
+ entry->object.vm_object = vm_object_allocate(
+ (vm_size_t)(entry->vme_end - entry->vme_start));
+ entry->offset = 0;
+ }
+ if (!(caller_flags & UPL_COPYOUT_FROM)) {
+ if (!(entry->protection & VM_PROT_WRITE)) {
+ vm_map_unlock(map);
+ return KERN_PROTECTION_FAILURE;
+ }
+ if (entry->needs_copy) {
+ vm_map_t local_map;
+ vm_object_t object;
+ vm_map_offset_t offset_hi;
+ vm_map_offset_t offset_lo;
+ vm_object_offset_t new_offset;
+ vm_prot_t prot;
+ boolean_t wired;
+ vm_behavior_t behavior;
+ vm_map_version_t version;
+ vm_map_t real_map;
+
+ local_map = map;
+ vm_map_lock_write_to_read(map);
+ if(vm_map_lookup_locked(&local_map,
+ offset, VM_PROT_WRITE,
+ &version, &object,
+ &new_offset, &prot, &wired,
+ &behavior, &offset_lo,
+ &offset_hi, &real_map)) {
+ vm_map_unlock(local_map);
+ return KERN_FAILURE;
+ }
+ if (real_map != map) {
+ vm_map_unlock(real_map);
+ }
+ vm_object_unlock(object);
+ vm_map_unlock(local_map);
+
+ goto REDISCOVER_ENTRY;
+ }
+ }
+ if (entry->is_sub_map) {
+ vm_map_t submap;
+
+ submap = entry->object.sub_map;
+ local_start = entry->vme_start;
+ local_offset = entry->offset;
+ vm_map_reference(submap);
+ vm_map_unlock(map);
+
+ ret = (vm_map_create_upl(submap,
+ local_offset + (offset - local_start),
+ upl_size, upl, page_list, count,
+ flags));
+
+ vm_map_deallocate(submap);
+ return ret;
+ }
+
+ if (sync_cow_data) {
+ if (entry->object.vm_object->shadow
+ || entry->object.vm_object->copy) {
+
+ local_object = entry->object.vm_object;
+ local_start = entry->vme_start;
+ local_offset = entry->offset;
+ vm_object_reference(local_object);
+ vm_map_unlock(map);
+
+ if (entry->object.vm_object->shadow &&
+ entry->object.vm_object->copy) {
+ vm_object_lock_request(
+ local_object->shadow,
+ (vm_object_offset_t)
+ ((offset - local_start) +
+ local_offset) +
+ local_object->shadow_offset,
+ *upl_size, FALSE,
+ MEMORY_OBJECT_DATA_SYNC,
+ VM_PROT_NO_CHANGE);
+ }
+ sync_cow_data = FALSE;
+ vm_object_deallocate(local_object);
+ goto REDISCOVER_ENTRY;
+ }
+ }
+
+ if (force_data_sync) {
+
+ local_object = entry->object.vm_object;
+ local_start = entry->vme_start;
+ local_offset = entry->offset;
+ vm_object_reference(local_object);
+ vm_map_unlock(map);
+
+ vm_object_lock_request(
+ local_object,
+ (vm_object_offset_t)
+ ((offset - local_start) + local_offset),
+ (vm_object_size_t)*upl_size, FALSE,
+ MEMORY_OBJECT_DATA_SYNC,
+ VM_PROT_NO_CHANGE);
+ force_data_sync = FALSE;
+ vm_object_deallocate(local_object);
+ goto REDISCOVER_ENTRY;
+ }
+
+ if(!(entry->object.vm_object->private)) {
+ if(*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
+ *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
+ if(entry->object.vm_object->phys_contiguous) {
+ *flags = UPL_PHYS_CONTIG;
+ } else {
+ *flags = 0;
+ }
+ } else {
+ *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
+ }
+ local_object = entry->object.vm_object;
+ local_offset = entry->offset;
+ local_start = entry->vme_start;
+ vm_object_reference(local_object);
+ vm_map_unlock(map);
+ if(caller_flags & UPL_SET_IO_WIRE) {
+ ret = (vm_object_iopl_request(local_object,
+ (vm_object_offset_t)
+ ((offset - local_start)
+ + local_offset),
+ *upl_size,
+ upl,
+ page_list,
+ count,
+ caller_flags));
+ } else {
+ ret = (vm_object_upl_request(local_object,
+ (vm_object_offset_t)
+ ((offset - local_start)
+ + local_offset),
+ *upl_size,
+ upl,
+ page_list,
+ count,
+ caller_flags));
+ }
+ vm_object_deallocate(local_object);
+ return(ret);
+ }
+
+ vm_map_unlock(map);
+ return(KERN_FAILURE);
+
+}
+/*
+ * Internal routine to enter a UPL into a VM map.
+ *
+ * JMM - This should just be doable through the standard
+ * vm_map_enter() API.
+ */
kern_return_t
-vm_upl_map(
- vm_map_t map,
- upl_t upl,
- vm_offset_t *dst_addr)
+vm_map_enter_upl(
+ vm_map_t map,
+ upl_t upl,
+ vm_map_offset_t *dst_addr)
{
- vm_size_t size;
+ vm_map_size_t size;
vm_object_offset_t offset;
- vm_offset_t addr;
+ vm_map_offset_t addr;
vm_page_t m;
kern_return_t kr;
if(upl->flags & UPL_INTERNAL) {
lite_list = (wpl_array_t)
- ((((vm_offset_t)upl) + sizeof(struct upl))
+ ((((uintptr_t)upl) + sizeof(struct upl))
+ ((upl->size/PAGE_SIZE)
* sizeof(upl_page_info_t)));
} else {
lite_list = (wpl_array_t)
- (((vm_offset_t)upl) + sizeof(struct upl));
+ (((uintptr_t)upl) + sizeof(struct upl));
}
object = upl->map_object;
upl->map_object = vm_object_allocate(upl->size);
upl->map_object->shadow_offset =
upl->offset - object->paging_offset;
upl->map_object->wimg_bits = object->wimg_bits;
- vm_object_unlock(upl->map_object);
offset = upl->map_object->shadow_offset;
new_offset = 0;
size = upl->size;
+
vm_object_lock(object);
+
while(size) {
pg_num = (new_offset)/PAGE_SIZE;
if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
alias_page->private = TRUE;
alias_page->pageout = TRUE;
alias_page->phys_page = m->phys_page;
+
+ vm_page_lock_queues();
vm_page_wire(alias_page);
+ vm_page_unlock_queues();
+
+ /*
+ * ENCRYPTED SWAP:
+ * The virtual page ("m") has to be wired in some way
+ * here or its physical page ("m->phys_page") could
+ * be recycled at any time.
+ * Assuming this is enforced by the caller, we can't
+ * get an encrypted page here. Since the encryption
+ * key depends on the VM page's "pager" object and
+ * the "paging_offset", we couldn't handle 2 pageable
+ * VM pages (with different pagers and paging_offsets)
+ * sharing the same physical page: we could end up
+ * encrypting with one key (via one VM page) and
+ * decrypting with another key (via the alias VM page).
+ */
+ ASSERT_PAGE_DECRYPTED(m);
vm_page_insert(alias_page,
upl->map_object, new_offset);
new_offset += PAGE_SIZE_64;
}
vm_object_unlock(object);
+ vm_object_unlock(upl->map_object);
}
+ if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous)
+ offset = upl->offset - upl->map_object->paging_offset;
+ else
+ offset = 0;
- offset = 0; /* Always map the entire object */
size = upl->size;
vm_object_lock(upl->map_object);
/* NEED A UPL_MAP ALIAS */
- kr = vm_map_enter(map, dst_addr, size, (vm_offset_t) 0, TRUE,
- upl->map_object, offset, FALSE,
+ kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
+ VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
if (kr != KERN_SUCCESS) {
return(kr);
}
+ vm_object_lock(upl->map_object);
+
for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) {
m = vm_page_lookup(upl->map_object, offset);
if(m) {
}
offset+=PAGE_SIZE_64;
}
+ vm_object_unlock(upl->map_object);
+
upl->ref_count++; /* hold a reference for the mapping */
upl->flags |= UPL_PAGE_LIST_MAPPED;
upl->kaddr = *dst_addr;
return KERN_SUCCESS;
}
-
+/*
+ * Internal routine to remove a UPL mapping from a VM map.
+ *
+ * XXX - This should just be doable through a standard
+ * vm_map_remove() operation. Otherwise, implicit clean-up
+ * of the target map won't be able to correctly remove
+ * these (and release the reference on the UPL). Having
+ * to do this means we can't map these into user-space
+ * maps yet.
+ */
kern_return_t
-vm_upl_unmap(
+vm_map_remove_upl(
vm_map_t map,
upl_t upl)
{
vm_address_t addr;
- vm_size_t size;
+ upl_size_t size;
if (upl == UPL_NULL)
return KERN_INVALID_ARGUMENT;
upl->kaddr = (vm_offset_t) 0;
upl_unlock(upl);
- vm_deallocate(map, addr, size);
+ vm_map_remove( map,
+ vm_map_trunc_page(addr),
+ vm_map_round_page(addr + size),
+ VM_MAP_NO_FLAGS);
return KERN_SUCCESS;
}
upl_unlock(upl);
kern_return_t
upl_commit_range(
upl_t upl,
- vm_offset_t offset,
- vm_size_t size,
+ upl_offset_t offset,
+ upl_size_t size,
int flags,
upl_page_info_t *page_list,
mach_msg_type_number_t count,
boolean_t *empty)
{
- vm_size_t xfer_size = size;
+ upl_size_t xfer_size = size;
vm_object_t shadow_object;
vm_object_t object = upl->map_object;
vm_object_offset_t target_offset;
int entry;
wpl_array_t lite_list;
int occupied;
+ int delayed_unlock = 0;
+ int clear_refmod = 0;
+ boolean_t shadow_internal;
*empty = FALSE;
if (upl == UPL_NULL)
return KERN_INVALID_ARGUMENT;
+
if (count == 0)
page_list = NULL;
- if(object->pageout) {
+ if (object->pageout) {
shadow_object = object->shadow;
} else {
shadow_object = object;
upl_lock(upl);
+ if (upl->flags & UPL_ACCESS_BLOCKED) {
+ /*
+ * We used this UPL to block access to the pages by marking
+ * them "busy". Now we need to clear the "busy" bit to allow
+ * access to these pages again.
+ */
+ flags |= UPL_COMMIT_ALLOW_ACCESS;
+ }
+
+ if (upl->flags & UPL_CLEAR_DIRTY)
+ flags |= UPL_COMMIT_CLEAR_DIRTY;
- if(upl->flags & UPL_DEVICE_MEMORY) {
+ if (upl->flags & UPL_DEVICE_MEMORY) {
xfer_size = 0;
} else if ((offset + size) > upl->size) {
upl_unlock(upl);
return KERN_FAILURE;
}
- if(upl->flags & UPL_INTERNAL) {
+ if (upl->flags & UPL_INTERNAL) {
lite_list = (wpl_array_t)
- ((((vm_offset_t)upl) + sizeof(struct upl))
+ ((((uintptr_t)upl) + sizeof(struct upl))
+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
} else {
lite_list = (wpl_array_t)
- (((vm_offset_t)upl) + sizeof(struct upl));
+ (((uintptr_t)upl) + sizeof(struct upl));
}
-
+ if (object != shadow_object)
+ vm_object_lock(object);
vm_object_lock(shadow_object);
+ shadow_internal = shadow_object->internal;
+
entry = offset/PAGE_SIZE;
target_offset = (vm_object_offset_t)offset;
- while(xfer_size) {
+
+ while (xfer_size) {
vm_page_t t,m;
upl_page_info_t *p;
m = VM_PAGE_NULL;
- if(upl->flags & UPL_LITE) {
- int pg_num;
+
+ if (upl->flags & UPL_LITE) {
+ int pg_num;
+
pg_num = target_offset/PAGE_SIZE;
- if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
- lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
+
+ if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
+ lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
m = vm_page_lookup(shadow_object,
- target_offset + (upl->offset -
- shadow_object->paging_offset));
+ target_offset + (upl->offset -
+ shadow_object->paging_offset));
}
}
- if(object->pageout) {
- if ((t = vm_page_lookup(object, target_offset))
- != NULL) {
+ if (object->pageout) {
+ if ((t = vm_page_lookup(object, target_offset)) != NULL) {
t->pageout = FALSE;
+
+ if (delayed_unlock) {
+ delayed_unlock = 0;
+ vm_page_unlock_queues();
+ }
VM_PAGE_FREE(t);
- if(m == NULL) {
+
+ if (m == NULL) {
m = vm_page_lookup(
shadow_object,
target_offset +
object->shadow_offset);
}
- if(m != VM_PAGE_NULL)
+ if (m != VM_PAGE_NULL)
vm_object_paging_end(m->object);
}
}
+ if (m != VM_PAGE_NULL) {
+
+ clear_refmod = 0;
+
+ if (upl->flags & UPL_IO_WIRE) {
+
+ if (delayed_unlock == 0)
+ vm_page_lock_queues();
- if(m != VM_PAGE_NULL) {
- if(upl->flags & UPL_IO_WIRE) {
- vm_page_lock_queues();
vm_page_unwire(m);
- vm_page_unlock_queues();
- if(page_list) {
+
+ if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
+ delayed_unlock = 0;
+ vm_page_unlock_queues();
+ }
+ if (page_list) {
page_list[entry].phys_addr = 0;
}
if (flags & UPL_COMMIT_SET_DIRTY) {
- m->dirty = TRUE;
- } else if ((upl->flags & UPL_CLEAR_DIRTY) ||
- (flags & UPL_COMMIT_CLEAR_DIRTY)) {
- pmap_clear_modify(m->phys_page);
+ m->dirty = TRUE;
+ } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
m->dirty = FALSE;
+ clear_refmod |= VM_MEM_MODIFIED;
}
if (flags & UPL_COMMIT_INACTIVATE) {
- vm_page_deactivate(m);
m->reference = FALSE;
- pmap_clear_reference(m->phys_page);
+ clear_refmod |= VM_MEM_REFERENCED;
+ vm_page_deactivate(m);
+ }
+ if (clear_refmod)
+ pmap_clear_refmod(m->phys_page, clear_refmod);
+
+ if (flags & UPL_COMMIT_ALLOW_ACCESS) {
+ /*
+ * We blocked access to the pages in this UPL.
+ * Clear the "busy" bit and wake up any waiter
+ * for this page.
+ */
+ PAGE_WAKEUP_DONE(m);
}
+
target_offset += PAGE_SIZE_64;
xfer_size -= PAGE_SIZE;
entry++;
continue;
}
- vm_page_lock_queues();
- if ((upl->flags & UPL_CLEAR_DIRTY) ||
- (flags & UPL_COMMIT_CLEAR_DIRTY)) {
- pmap_clear_modify(m->phys_page);
+ if (delayed_unlock == 0)
+ vm_page_lock_queues();
+ /*
+ * make sure to clear the hardware
+ * modify or reference bits before
+ * releasing the BUSY bit on this page
+ * otherwise we risk losing a legitimate
+ * change of state
+ */
+ if (flags & UPL_COMMIT_CLEAR_DIRTY) {
m->dirty = FALSE;
+ clear_refmod |= VM_MEM_MODIFIED;
}
- if(page_list) {
+ if (flags & UPL_COMMIT_INACTIVATE)
+ clear_refmod |= VM_MEM_REFERENCED;
+
+ if (clear_refmod)
+ pmap_clear_refmod(m->phys_page, clear_refmod);
+
+ if (page_list) {
p = &(page_list[entry]);
if(p->phys_addr && p->pageout && !m->pageout) {
m->busy = TRUE;
}
m->dump_cleaning = FALSE;
if(m->laundry) {
- vm_page_laundry_count--;
- m->laundry = FALSE;
- if (vm_page_laundry_count < vm_page_laundry_min) {
- vm_page_laundry_min = 0;
- thread_wakeup((event_t)
- &vm_page_laundry_count);
- }
+ vm_pageout_throttle_up(m);
}
if(m->pageout) {
m->cleaning = FALSE;
#if MACH_CLUSTER_STATS
if (m->wanted) vm_pageout_target_collisions++;
#endif
- pmap_page_protect(m->phys_page, VM_PROT_NONE);
- m->dirty = pmap_is_modified(m->phys_page);
+ if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
+ m->dirty = TRUE;
+ else
+ m->dirty = FALSE;
+
if(m->dirty) {
- CLUSTER_STAT(
- vm_pageout_target_page_dirtied++;)
vm_page_unwire(m);/* reactivates */
- VM_STAT(reactivations++);
+
+ if (upl->flags & UPL_PAGEOUT) {
+ CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
+ VM_STAT(reactivations++);
+ }
PAGE_WAKEUP_DONE(m);
} else {
- CLUSTER_STAT(
- vm_pageout_target_page_freed++;)
vm_page_free(m);/* clears busy, etc. */
- VM_STAT(pageouts++);
+
+ if (upl->flags & UPL_PAGEOUT) {
+ CLUSTER_STAT(vm_pageout_target_page_freed++;)
+
+ if (page_list[entry].dirty)
+ VM_STAT(pageouts++);
+ }
}
- vm_page_unlock_queues();
+ if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
+ delayed_unlock = 0;
+ vm_page_unlock_queues();
+ }
target_offset += PAGE_SIZE_64;
xfer_size -= PAGE_SIZE;
entry++;
continue;
}
- if (flags & UPL_COMMIT_INACTIVATE) {
- vm_page_deactivate(m);
- m->reference = FALSE;
- pmap_clear_reference(m->phys_page);
- } else if (!m->active && !m->inactive) {
- if (m->reference)
- vm_page_activate(m);
- else
- vm_page_deactivate(m);
- }
#if MACH_CLUSTER_STATS
m->dirty = pmap_is_modified(m->phys_page);
m->dirty = FALSE;
} else if (m->overwriting) {
/* alternate request page list, write to
- /* page_list case. Occurs when the original
- /* page was wired at the time of the list
- /* request */
+ * page_list case. Occurs when the original
+ * page was wired at the time of the list
+ * request */
assert(m->wire_count != 0);
vm_page_unwire(m);/* reactivates */
m->overwriting = FALSE;
}
m->cleaning = FALSE;
+
/* It is a part of the semantic of COPYOUT_FROM */
/* UPLs that a commit implies cache sync */
/* between the vm page and the backing store */
if (upl->flags & UPL_PAGE_SYNC_DONE)
m->precious = FALSE;
- if (flags & UPL_COMMIT_SET_DIRTY) {
- m->dirty = TRUE;
- }
- /*
- * Wakeup any thread waiting for the page to be un-cleaning.
- */
- PAGE_WAKEUP(m);
- vm_page_unlock_queues();
+ if (flags & UPL_COMMIT_SET_DIRTY)
+ m->dirty = TRUE;
- }
- target_offset += PAGE_SIZE_64;
- xfer_size -= PAGE_SIZE;
- entry++;
- }
+ if (flags & UPL_COMMIT_INACTIVATE) {
+ m->reference = FALSE;
+ vm_page_deactivate(m);
+ } else if (!m->active && !m->inactive) {
+ if (m->reference)
+ vm_page_activate(m);
+ else
+ vm_page_deactivate(m);
+ }
+
+ if (flags & UPL_COMMIT_ALLOW_ACCESS) {
+ /*
+ * We blocked access to the pages in this URL.
+ * Clear the "busy" bit on this page before we
+ * wake up any waiter.
+ */
+ m->busy = FALSE;
+ }
+
+ /*
+ * Wakeup any thread waiting for the page to be un-cleaning.
+ */
+ PAGE_WAKEUP(m);
+
+ if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
+ delayed_unlock = 0;
+ vm_page_unlock_queues();
+ }
+ }
+ target_offset += PAGE_SIZE_64;
+ xfer_size -= PAGE_SIZE;
+ entry++;
+ }
+ if (delayed_unlock)
+ vm_page_unlock_queues();
occupied = 1;
+
if (upl->flags & UPL_DEVICE_MEMORY) {
occupied = 0;
} else if (upl->flags & UPL_LITE) {
vm_object_paging_end(shadow_object);
}
vm_object_unlock(shadow_object);
+ if (object != shadow_object)
+ vm_object_unlock(object);
upl_unlock(upl);
return KERN_SUCCESS;
kern_return_t
upl_abort_range(
upl_t upl,
- vm_offset_t offset,
- vm_size_t size,
+ upl_offset_t offset,
+ upl_size_t size,
int error,
boolean_t *empty)
{
- vm_size_t xfer_size = size;
+ upl_size_t xfer_size = size;
vm_object_t shadow_object;
vm_object_t object = upl->map_object;
vm_object_offset_t target_offset;
- vm_object_offset_t page_offset;
int entry;
wpl_array_t lite_list;
int occupied;
+ boolean_t shadow_internal;
*empty = FALSE;
upl_unlock(upl);
return KERN_FAILURE;
}
-
+ if (object != shadow_object)
+ vm_object_lock(object);
vm_object_lock(shadow_object);
+ shadow_internal = shadow_object->internal;
+
if(upl->flags & UPL_INTERNAL) {
lite_list = (wpl_array_t)
- ((((vm_offset_t)upl) + sizeof(struct upl))
+ ((((uintptr_t)upl) + sizeof(struct upl))
+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
} else {
lite_list = (wpl_array_t)
- (((vm_offset_t)upl) + sizeof(struct upl));
+ (((uintptr_t)upl) + sizeof(struct upl));
}
entry = offset/PAGE_SIZE;
target_offset = (vm_object_offset_t)offset;
while(xfer_size) {
vm_page_t t,m;
- upl_page_info_t *p;
m = VM_PAGE_NULL;
if(upl->flags & UPL_LITE) {
if(m != VM_PAGE_NULL) {
vm_page_lock_queues();
if(m->absent) {
+ boolean_t must_free = TRUE;
+
/* COPYOUT = FALSE case */
/* check for error conditions which must */
/* be passed back to the pages customer */
vm_object_absent_release(m->object);
m->page_error = KERN_MEMORY_ERROR;
m->error = TRUE;
+ must_free = FALSE;
} else if(error & UPL_ABORT_UNAVAILABLE) {
m->restart = FALSE;
m->unusual = TRUE;
- m->clustered = FALSE;
+ must_free = FALSE;
} else if(error & UPL_ABORT_ERROR) {
m->restart = FALSE;
m->absent = FALSE;
vm_object_absent_release(m->object);
m->page_error = KERN_MEMORY_ERROR;
m->error = TRUE;
- } else if(error & UPL_ABORT_DUMP_PAGES) {
- m->clustered = TRUE;
- } else {
- m->clustered = TRUE;
+ must_free = FALSE;
}
-
+
+ /*
+ * ENCRYPTED SWAP:
+ * If the page was already encrypted,
+ * we don't really need to decrypt it
+ * now. It will get decrypted later,
+ * on demand, as soon as someone needs
+ * to access its contents.
+ */
m->cleaning = FALSE;
m->overwriting = FALSE;
PAGE_WAKEUP_DONE(m);
- if(m->clustered) {
+
+ if (must_free == TRUE) {
vm_page_free(m);
} else {
vm_page_activate(m);
}
-
vm_page_unlock_queues();
+
target_offset += PAGE_SIZE_64;
xfer_size -= PAGE_SIZE;
entry++;
* Handle the trusted pager throttle.
*/
if (m->laundry) {
- vm_page_laundry_count--;
- m->laundry = FALSE;
- if (vm_page_laundry_count
- < vm_page_laundry_min) {
- vm_page_laundry_min = 0;
- thread_wakeup((event_t)
- &vm_page_laundry_count);
- }
+ vm_pageout_throttle_up(m);
}
if(m->pageout) {
assert(m->busy);
}
m->dump_cleaning = FALSE;
m->cleaning = FALSE;
- m->busy = FALSE;
m->overwriting = FALSE;
#if MACH_PAGEMAP
vm_external_state_clr(
#endif /* MACH_PAGEMAP */
if(error & UPL_ABORT_DUMP_PAGES) {
vm_page_free(m);
- pmap_page_protect(m->phys_page, VM_PROT_NONE);
+ pmap_disconnect(m->phys_page);
} else {
- PAGE_WAKEUP(m);
+ PAGE_WAKEUP_DONE(m);
}
vm_page_unlock_queues();
}
vm_object_paging_end(shadow_object);
}
vm_object_unlock(shadow_object);
+ if (object != shadow_object)
+ vm_object_unlock(object);
+
upl_unlock(upl);
+
return KERN_SUCCESS;
}
vm_object_offset_t offset;
vm_object_offset_t shadow_offset;
vm_object_offset_t target_offset;
- int i;
+ upl_size_t i;
wpl_array_t lite_list;
vm_page_t t,m;
int occupied;
+ boolean_t shadow_internal;
if (upl == UPL_NULL)
return KERN_INVALID_ARGUMENT;
if(upl->flags & UPL_INTERNAL) {
lite_list = (wpl_array_t)
- ((((vm_offset_t)upl) + sizeof(struct upl))
+ ((((uintptr_t)upl) + sizeof(struct upl))
+ ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
} else {
lite_list = (wpl_array_t)
- (((vm_offset_t)upl) + sizeof(struct upl));
+ (((uintptr_t)upl) + sizeof(struct upl));
}
offset = 0;
+
+ if (object != shadow_object)
+ vm_object_lock(object);
vm_object_lock(shadow_object);
+
+ shadow_internal = shadow_object->internal;
+
for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) {
m = VM_PAGE_NULL;
target_offset = offset + shadow_offset;
if(m != VM_PAGE_NULL) {
vm_page_lock_queues();
if(m->absent) {
+ boolean_t must_free = TRUE;
+
/* COPYOUT = FALSE case */
/* check for error conditions which must */
/* be passed back to the pages customer */
vm_object_absent_release(m->object);
m->page_error = KERN_MEMORY_ERROR;
m->error = TRUE;
+ must_free = FALSE;
} else if(error & UPL_ABORT_UNAVAILABLE) {
m->restart = FALSE;
m->unusual = TRUE;
- m->clustered = FALSE;
+ must_free = FALSE;
} else if(error & UPL_ABORT_ERROR) {
m->restart = FALSE;
m->absent = FALSE;
vm_object_absent_release(m->object);
m->page_error = KERN_MEMORY_ERROR;
m->error = TRUE;
- } else if(error & UPL_ABORT_DUMP_PAGES) {
- m->clustered = TRUE;
- } else {
- m->clustered = TRUE;
+ must_free = FALSE;
}
-
+
+ /*
+ * ENCRYPTED SWAP:
+ * If the page was already encrypted,
+ * we don't really need to decrypt it
+ * now. It will get decrypted later,
+ * on demand, as soon as someone needs
+ * to access its contents.
+ */
+
m->cleaning = FALSE;
m->overwriting = FALSE;
PAGE_WAKEUP_DONE(m);
- if(m->clustered) {
+
+ if (must_free == TRUE) {
vm_page_free(m);
} else {
vm_page_activate(m);
* Handle the trusted pager throttle.
*/
if (m->laundry) {
- vm_page_laundry_count--;
- m->laundry = FALSE;
- if (vm_page_laundry_count
- < vm_page_laundry_min) {
- vm_page_laundry_min = 0;
- thread_wakeup((event_t)
- &vm_page_laundry_count);
- }
+ vm_pageout_throttle_up(m);
}
if(m->pageout) {
assert(m->busy);
}
m->dump_cleaning = FALSE;
m->cleaning = FALSE;
- m->busy = FALSE;
m->overwriting = FALSE;
#if MACH_PAGEMAP
vm_external_state_clr(
#endif /* MACH_PAGEMAP */
if(error & UPL_ABORT_DUMP_PAGES) {
vm_page_free(m);
- pmap_page_protect(m->phys_page, VM_PROT_NONE);
+ pmap_disconnect(m->phys_page);
} else {
- PAGE_WAKEUP(m);
+ PAGE_WAKEUP_DONE(m);
}
vm_page_unlock_queues();
}
occupied = 0;
} else if (upl->flags & UPL_LITE) {
int pg_num;
- int i;
+ int j;
pg_num = upl->size/PAGE_SIZE;
pg_num = (pg_num + 31) >> 5;
occupied = 0;
- for(i= 0; i<pg_num; i++) {
- if(lite_list[i] != 0) {
+ for(j= 0; j<pg_num; j++) {
+ if(lite_list[j] != 0) {
occupied = 1;
break;
}
vm_object_paging_end(shadow_object);
}
vm_object_unlock(shadow_object);
+ if (object != shadow_object)
+ vm_object_unlock(object);
+
upl_unlock(upl);
return KERN_SUCCESS;
}
if (upl->flags & UPL_DEVICE_MEMORY)
page_list = NULL;
+ if (upl->flags & UPL_ENCRYPTED) {
+ /*
+ * ENCRYPTED SWAP:
+ * This UPL was encrypted, but we don't need
+ * to decrypt here. We'll decrypt each page
+ * later, on demand, as soon as someone needs
+ * to access the page's contents.
+ */
+ }
+
if ((upl->flags & UPL_CLEAR_DIRTY) ||
(upl->flags & UPL_PAGE_SYNC_DONE) || page_list) {
vm_object_t shadow_object = upl->map_object->shadow;
vm_object_t object = upl->map_object;
vm_object_offset_t target_offset;
- vm_size_t xfer_end;
+ upl_size_t xfer_end;
int entry;
vm_page_t t, m;
upl_page_info_t *p;
+ if (object != shadow_object)
+ vm_object_lock(object);
vm_object_lock(shadow_object);
entry = 0;
m = vm_page_lookup(shadow_object, target_offset);
if(m != VM_PAGE_NULL) {
+ /*
+ * ENCRYPTED SWAP:
+ * If this page was encrypted, we
+ * don't need to decrypt it here.
+ * We'll decrypt it later, on demand,
+ * as soon as someone needs to access
+ * its contents.
+ */
+
if (upl->flags & UPL_CLEAR_DIRTY) {
pmap_clear_modify(m->phys_page);
m->dirty = FALSE;
target_offset += PAGE_SIZE_64;
entry++;
}
-
vm_object_unlock(shadow_object);
+ if (object != shadow_object)
+ vm_object_unlock(object);
+
}
if (upl->flags & UPL_DEVICE_MEMORY) {
vm_object_lock(upl->map_object->shadow);
vm_object_iopl_request(
vm_object_t object,
vm_object_offset_t offset,
- vm_size_t size,
+ upl_size_t size,
upl_t *upl_ptr,
upl_page_info_array_t user_page_list,
unsigned int *page_list_count,
{
vm_page_t dst_page;
vm_object_offset_t dst_offset = offset;
- vm_size_t xfer_size = size;
+ upl_size_t xfer_size = size;
upl_t upl = NULL;
- int entry;
- wpl_array_t lite_list;
+ unsigned int entry;
+ wpl_array_t lite_list = NULL;
int page_field_size;
-
+ int delayed_unlock = 0;
+ int no_zero_fill = FALSE;
vm_page_t alias_page = NULL;
kern_return_t ret;
vm_prot_t prot;
- if(cntrl_flags & UPL_COPYOUT_FROM) {
+ if (cntrl_flags & ~UPL_VALID_FLAGS) {
+ /*
+ * For forward compatibility's sake,
+ * reject any unknown flag.
+ */
+ return KERN_INVALID_VALUE;
+ }
+ if (vm_lopage_poolsize == 0)
+ cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
+
+ if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
+ if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
+ return KERN_INVALID_VALUE;
+
+ if (object->phys_contiguous) {
+ if ((offset + object->shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
+ return KERN_INVALID_ADDRESS;
+
+ if (((offset + object->shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
+ return KERN_INVALID_ADDRESS;
+ }
+ }
+
+ if (cntrl_flags & UPL_ENCRYPT) {
+ /*
+ * ENCRYPTED SWAP:
+ * The paging path doesn't use this interface,
+ * so we don't support the UPL_ENCRYPT flag
+ * here. We won't encrypt the pages.
+ */
+ assert(! (cntrl_flags & UPL_ENCRYPT));
+ }
+
+ if (cntrl_flags & UPL_NOZEROFILL)
+ no_zero_fill = TRUE;
+
+ if (cntrl_flags & UPL_COPYOUT_FROM)
prot = VM_PROT_READ;
- } else {
+ else
prot = VM_PROT_READ | VM_PROT_WRITE;
- }
if(((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
size = MAX_UPL_TRANSFER * page_size;
return KERN_INVALID_ARGUMENT;
if((!object->internal) && (object->paging_offset != 0))
- panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
+ panic("vm_object_upl_request: external object with non-zero paging offset\n");
if(object->phys_contiguous) {
/* No paging operations are possible against this memory */
UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
size);
user_page_list = (upl_page_info_t *)
- (((vm_offset_t)upl) + sizeof(struct upl));
+ (((uintptr_t)upl) + sizeof(struct upl));
lite_list = (wpl_array_t)
- (((vm_offset_t)user_page_list) +
+ (((uintptr_t)user_page_list) +
((size/PAGE_SIZE) *
sizeof(upl_page_info_t)));
page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
} else {
upl = upl_create(UPL_CREATE_INTERNAL, size);
user_page_list = (upl_page_info_t *)
- (((vm_offset_t)upl)
+ (((uintptr_t)upl)
+ sizeof(struct upl));
upl->flags = UPL_INTERNAL | UPL_IO_WIRE;
}
if(cntrl_flags & UPL_SET_LITE) {
upl = upl_create(UPL_CREATE_LITE, size);
lite_list = (wpl_array_t)
- (((vm_offset_t)upl) + sizeof(struct upl));
+ (((uintptr_t)upl) + sizeof(struct upl));
page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
page_field_size =
(page_field_size + 3) & 0xFFFFFFFC;
}
if(object->phys_contiguous) {
- upl->size = size;
- upl->offset = offset + object->paging_offset;
- *upl_ptr = upl;
- if(user_page_list) {
- user_page_list[0].phys_addr =
- (offset + object->shadow_offset)>>12;
- user_page_list[0].device = TRUE;
- }
upl->map_object = object;
/* don't need any shadow mappings for this one */
/* since it is already I/O memory */
vm_object_paging_begin(object);
vm_object_unlock(object);
+ /* paging in progress also protects the paging_offset */
+ upl->offset = offset + object->paging_offset;
+ upl->size = size;
+ *upl_ptr = upl;
+ if(user_page_list) {
+ user_page_list[0].phys_addr =
+ (offset + object->shadow_offset)>>PAGE_SHIFT;
+ user_page_list[0].device = TRUE;
+ }
+ upl->highest_page = (offset + object->shadow_offset + size - 1)>>PAGE_SHIFT;
+
if(page_list_count != NULL) {
if (upl->flags & UPL_INTERNAL) {
*page_list_count = 0;
}
return KERN_SUCCESS;
}
-
+ if(user_page_list)
+ user_page_list[0].device = FALSE;
if(cntrl_flags & UPL_SET_LITE) {
upl->map_object = object;
upl->map_object->wimg_bits = object->wimg_bits;
vm_object_unlock(upl->map_object);
}
+ }
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+
+ if (!object->phys_contiguous) {
+ /* Protect user space from future COW operations */
+ object->true_share = TRUE;
+ if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
+ object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
+ }
+
+ /* we can lock the upl offset now that paging_in_progress is set */
+ if(upl_ptr) {
upl->size = size;
upl->offset = offset + object->paging_offset;
*upl_ptr = upl;
+#ifdef UPL_DEBUG
+ queue_enter(&object->uplq, upl, upl_t, uplq);
+#endif /* UPL_DEBUG */
+ }
+
+ if (cntrl_flags & UPL_BLOCK_ACCESS) {
+ /*
+ * The user requested that access to the pages in this URL
+ * be blocked until the UPL is commited or aborted.
+ */
+ upl->flags |= UPL_ACCESS_BLOCKED;
+ }
+
+ entry = 0;
+ while (xfer_size) {
+ if((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
+ if (delayed_unlock) {
+ delayed_unlock = 0;
+ vm_page_unlock_queues();
+ }
+ vm_object_unlock(object);
+ VM_PAGE_GRAB_FICTITIOUS(alias_page);
+ vm_object_lock(object);
+ }
+ dst_page = vm_page_lookup(object, dst_offset);
+
+ /*
+ * ENCRYPTED SWAP:
+ * If the page is encrypted, we need to decrypt it,
+ * so force a soft page fault.
+ */
+ if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
+ (dst_page->encrypted) ||
+ (dst_page->unusual && (dst_page->error ||
+ dst_page->restart ||
+ dst_page->absent ||
+ dst_page->fictitious ||
+ (prot & dst_page->page_lock)))) {
+ vm_fault_return_t result;
+ do {
+ vm_page_t top_page;
+ kern_return_t error_code;
+ int interruptible;
+
+ vm_object_offset_t lo_offset = offset;
+ vm_object_offset_t hi_offset = offset + size;
+
+
+ if (delayed_unlock) {
+ delayed_unlock = 0;
+ vm_page_unlock_queues();
+ }
+
+ if(cntrl_flags & UPL_SET_INTERRUPTIBLE) {
+ interruptible = THREAD_ABORTSAFE;
+ } else {
+ interruptible = THREAD_UNINT;
+ }
+
+ result = vm_fault_page(object, dst_offset,
+ prot | VM_PROT_WRITE, FALSE,
+ interruptible,
+ lo_offset, hi_offset,
+ VM_BEHAVIOR_SEQUENTIAL,
+ &prot, &dst_page, &top_page,
+ (int *)0,
+ &error_code, no_zero_fill, FALSE, NULL, 0);
+
+ switch(result) {
+ case VM_FAULT_SUCCESS:
+
+ PAGE_WAKEUP_DONE(dst_page);
+
+ /*
+ * Release paging references and
+ * top-level placeholder page, if any.
+ */
+
+ if(top_page != VM_PAGE_NULL) {
+ vm_object_t local_object;
+ local_object =
+ top_page->object;
+ if(top_page->object
+ != dst_page->object) {
+ vm_object_lock(
+ local_object);
+ VM_PAGE_FREE(top_page);
+ vm_object_paging_end(
+ local_object);
+ vm_object_unlock(
+ local_object);
+ } else {
+ VM_PAGE_FREE(top_page);
+ vm_object_paging_end(
+ local_object);
+ }
+ }
+
+ break;
+
+
+ case VM_FAULT_RETRY:
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ break;
+
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ break;
+
+ case VM_FAULT_MEMORY_SHORTAGE:
+ if (vm_page_wait(interruptible)) {
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ break;
+ }
+ /* fall thru */
+
+ case VM_FAULT_INTERRUPTED:
+ error_code = MACH_SEND_INTERRUPTED;
+ case VM_FAULT_MEMORY_ERROR:
+ ret = (error_code ? error_code:
+ KERN_MEMORY_ERROR);
+ vm_object_lock(object);
+
+ goto return_err;
+ }
+ } while ((result != VM_FAULT_SUCCESS)
+ || (result == VM_FAULT_INTERRUPTED));
+ }
+
+ if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
+ dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
+ vm_page_t low_page;
+ int refmod;
+
+ /*
+ * support devices that can't DMA above 32 bits
+ * by substituting pages from a pool of low address
+ * memory for any pages we find above the 4G mark
+ * can't substitute if the page is already wired because
+ * we don't know whether that physical address has been
+ * handed out to some other 64 bit capable DMA device to use
+ */
+ if (dst_page->wire_count) {
+ ret = KERN_PROTECTION_FAILURE;
+ goto return_err;
+ }
+ if (delayed_unlock) {
+ delayed_unlock = 0;
+ vm_page_unlock_queues();
+ }
+ low_page = vm_page_grablo();
+
+ if (low_page == VM_PAGE_NULL) {
+ ret = KERN_RESOURCE_SHORTAGE;
+ goto return_err;
+ }
+ /*
+ * from here until the vm_page_replace completes
+ * we musn't drop the object lock... we don't
+ * want anyone refaulting this page in and using
+ * it after we disconnect it... we want the fault
+ * to find the new page being substituted.
+ */
+ refmod = pmap_disconnect(dst_page->phys_page);
+
+ vm_page_copy(dst_page, low_page);
+
+ low_page->reference = dst_page->reference;
+ low_page->dirty = dst_page->dirty;
+
+ if (refmod & VM_MEM_REFERENCED)
+ low_page->reference = TRUE;
+ if (refmod & VM_MEM_MODIFIED)
+ low_page->dirty = TRUE;
+
+ vm_page_lock_queues();
+ vm_page_replace(low_page, object, dst_offset);
+ /*
+ * keep the queue lock since we're going to
+ * need it immediately
+ */
+ delayed_unlock = 1;
+
+ dst_page = low_page;
+ /*
+ * vm_page_grablo returned the page marked
+ * BUSY... we don't need a PAGE_WAKEUP_DONE
+ * here, because we've never dropped the object lock
+ */
+ dst_page->busy = FALSE;
+ }
+ if (delayed_unlock == 0)
+ vm_page_lock_queues();
+ vm_page_wire(dst_page);
+
+ if (cntrl_flags & UPL_BLOCK_ACCESS) {
+ /*
+ * Mark the page "busy" to block any future page fault
+ * on this page. We'll also remove the mapping
+ * of all these pages before leaving this routine.
+ */
+ assert(!dst_page->fictitious);
+ dst_page->busy = TRUE;
+ }
+
+ if (upl_ptr) {
+ if (cntrl_flags & UPL_SET_LITE) {
+ int pg_num;
+ pg_num = (dst_offset-offset)/PAGE_SIZE;
+ lite_list[pg_num>>5] |= 1 << (pg_num & 31);
+ } else {
+ /*
+ * Convert the fictitious page to a
+ * private shadow of the real page.
+ */
+ assert(alias_page->fictitious);
+ alias_page->fictitious = FALSE;
+ alias_page->private = TRUE;
+ alias_page->pageout = TRUE;
+ alias_page->phys_page = dst_page->phys_page;
+ vm_page_wire(alias_page);
+
+ vm_page_insert(alias_page,
+ upl->map_object, size - xfer_size);
+ assert(!alias_page->wanted);
+ alias_page->busy = FALSE;
+ alias_page->absent = FALSE;
+ }
+
+ /* expect the page to be used */
+ dst_page->reference = TRUE;
+
+ if (!(cntrl_flags & UPL_COPYOUT_FROM))
+ dst_page->dirty = TRUE;
+ alias_page = NULL;
+
+ if (dst_page->phys_page > upl->highest_page)
+ upl->highest_page = dst_page->phys_page;
+
+ if (user_page_list) {
+ user_page_list[entry].phys_addr
+ = dst_page->phys_page;
+ user_page_list[entry].dirty =
+ dst_page->dirty;
+ user_page_list[entry].pageout =
+ dst_page->pageout;
+ user_page_list[entry].absent =
+ dst_page->absent;
+ user_page_list[entry].precious =
+ dst_page->precious;
+ }
+ }
+ if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
+ delayed_unlock = 0;
+ vm_page_unlock_queues();
+ }
+ entry++;
+ dst_offset += PAGE_SIZE_64;
+ xfer_size -= PAGE_SIZE;
+ }
+ if (delayed_unlock)
+ vm_page_unlock_queues();
+
+ if (upl->flags & UPL_INTERNAL) {
+ if(page_list_count != NULL)
+ *page_list_count = 0;
+ } else if (*page_list_count > entry) {
+ if(page_list_count != NULL)
+ *page_list_count = entry;
+ }
+
+ if (alias_page != NULL) {
+ vm_page_lock_queues();
+ vm_page_free(alias_page);
+ vm_page_unlock_queues();
+ }
+
+ vm_object_unlock(object);
+
+ if (cntrl_flags & UPL_BLOCK_ACCESS) {
+ /*
+ * We've marked all the pages "busy" so that future
+ * page faults will block.
+ * Now remove the mapping for these pages, so that they
+ * can't be accessed without causing a page fault.
+ */
+ vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
+ PMAP_NULL, 0, VM_PROT_NONE);
+ }
+
+ return KERN_SUCCESS;
+
+
+return_err:
+ if (delayed_unlock)
+ vm_page_unlock_queues();
+
+ for (; offset < dst_offset; offset += PAGE_SIZE) {
+ dst_page = vm_page_lookup(object, offset);
+
+ if (dst_page == VM_PAGE_NULL)
+ panic("vm_object_iopl_request: Wired pages missing. \n");
+ vm_page_lock_queues();
+ vm_page_unwire(dst_page);
+ vm_page_unlock_queues();
+ VM_STAT(reactivations++);
+ }
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ upl_destroy(upl);
+
+ return ret;
+}
+
+
+kern_return_t
+upl_transpose(
+ upl_t upl1,
+ upl_t upl2)
+{
+ kern_return_t retval;
+ boolean_t upls_locked;
+ vm_object_t object1, object2;
+
+ if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ upls_locked = FALSE;
+
+ /*
+ * Since we need to lock both UPLs at the same time,
+ * avoid deadlocks by always taking locks in the same order.
+ */
+ if (upl1 < upl2) {
+ upl_lock(upl1);
+ upl_lock(upl2);
+ } else {
+ upl_lock(upl2);
+ upl_lock(upl1);
+ }
+ upls_locked = TRUE; /* the UPLs will need to be unlocked */
+
+ object1 = upl1->map_object;
+ object2 = upl2->map_object;
+
+ if (upl1->offset != 0 || upl2->offset != 0 ||
+ upl1->size != upl2->size) {
+ /*
+ * We deal only with full objects, not subsets.
+ * That's because we exchange the entire backing store info
+ * for the objects: pager, resident pages, etc... We can't do
+ * only part of it.
+ */
+ retval = KERN_INVALID_VALUE;
+ goto done;
+ }
+
+ /*
+ * Tranpose the VM objects' backing store.
+ */
+ retval = vm_object_transpose(object1, object2,
+ (vm_object_size_t) upl1->size);
+
+ if (retval == KERN_SUCCESS) {
+ /*
+ * Make each UPL point to the correct VM object, i.e. the
+ * object holding the pages that the UPL refers to...
+ */
+ upl1->map_object = object2;
+ upl2->map_object = object1;
+ }
+
+done:
+ /*
+ * Cleanup.
+ */
+ if (upls_locked) {
+ upl_unlock(upl1);
+ upl_unlock(upl2);
+ upls_locked = FALSE;
+ }
+
+ return retval;
+}
+
+/*
+ * ENCRYPTED SWAP:
+ *
+ * Rationale: the user might have some encrypted data on disk (via
+ * FileVault or any other mechanism). That data is then decrypted in
+ * memory, which is safe as long as the machine is secure. But that
+ * decrypted data in memory could be paged out to disk by the default
+ * pager. The data would then be stored on disk in clear (not encrypted)
+ * and it could be accessed by anyone who gets physical access to the
+ * disk (if the laptop or the disk gets stolen for example). This weakens
+ * the security offered by FileVault.
+ *
+ * Solution: the default pager will optionally request that all the
+ * pages it gathers for pageout be encrypted, via the UPL interfaces,
+ * before it sends this UPL to disk via the vnode_pageout() path.
+ *
+ * Notes:
+ *
+ * To avoid disrupting the VM LRU algorithms, we want to keep the
+ * clean-in-place mechanisms, which allow us to send some extra pages to
+ * swap (clustering) without actually removing them from the user's
+ * address space. We don't want the user to unknowingly access encrypted
+ * data, so we have to actually remove the encrypted pages from the page
+ * table. When the user accesses the data, the hardware will fail to
+ * locate the virtual page in its page table and will trigger a page
+ * fault. We can then decrypt the page and enter it in the page table
+ * again. Whenever we allow the user to access the contents of a page,
+ * we have to make sure it's not encrypted.
+ *
+ *
+ */
+/*
+ * ENCRYPTED SWAP:
+ * Reserve of virtual addresses in the kernel address space.
+ * We need to map the physical pages in the kernel, so that we
+ * can call the encryption/decryption routines with a kernel
+ * virtual address. We keep this pool of pre-allocated kernel
+ * virtual addresses so that we don't have to scan the kernel's
+ * virtaul address space each time we need to encrypt or decrypt
+ * a physical page.
+ * It would be nice to be able to encrypt and decrypt in physical
+ * mode but that might not always be more efficient...
+ */
+decl_simple_lock_data(,vm_paging_lock)
+#define VM_PAGING_NUM_PAGES 64
+vm_map_offset_t vm_paging_base_address = 0;
+boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
+int vm_paging_max_index = 0;
+unsigned long vm_paging_no_kernel_page = 0;
+unsigned long vm_paging_objects_mapped = 0;
+unsigned long vm_paging_pages_mapped = 0;
+unsigned long vm_paging_objects_mapped_slow = 0;
+unsigned long vm_paging_pages_mapped_slow = 0;
+
+/*
+ * ENCRYPTED SWAP:
+ * vm_paging_map_object:
+ * Maps part of a VM object's pages in the kernel
+ * virtual address space, using the pre-allocated
+ * kernel virtual addresses, if possible.
+ * Context:
+ * The VM object is locked. This lock will get
+ * dropped and re-acquired though.
+ */
+kern_return_t
+vm_paging_map_object(
+ vm_map_offset_t *address,
+ vm_page_t page,
+ vm_object_t object,
+ vm_object_offset_t offset,
+ vm_map_size_t *size)
+{
+ kern_return_t kr;
+ vm_map_offset_t page_map_offset;
+ vm_map_size_t map_size;
+ vm_object_offset_t object_offset;
+ int i;
+ vm_map_entry_t map_entry;
+
+
+ if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
+ /*
+ * Use one of the pre-allocated kernel virtual addresses
+ * and just enter the VM page in the kernel address space
+ * at that virtual address.
+ */
+ vm_object_unlock(object);
+ simple_lock(&vm_paging_lock);
+
+ if (vm_paging_base_address == 0) {
+ /*
+ * Initialize our pool of pre-allocated kernel
+ * virtual addresses.
+ */
+ simple_unlock(&vm_paging_lock);
+ page_map_offset = 0;
+ kr = vm_map_find_space(kernel_map,
+ &page_map_offset,
+ VM_PAGING_NUM_PAGES * PAGE_SIZE,
+ 0,
+ 0,
+ &map_entry);
+ if (kr != KERN_SUCCESS) {
+ panic("vm_paging_map_object: "
+ "kernel_map full\n");
+ }
+ map_entry->object.vm_object = kernel_object;
+ map_entry->offset =
+ page_map_offset - VM_MIN_KERNEL_ADDRESS;
+ vm_object_reference(kernel_object);
+ vm_map_unlock(kernel_map);
+
+ simple_lock(&vm_paging_lock);
+ if (vm_paging_base_address != 0) {
+ /* someone raced us and won: undo */
+ simple_unlock(&vm_paging_lock);
+ kr = vm_map_remove(kernel_map,
+ page_map_offset,
+ page_map_offset +
+ (VM_PAGING_NUM_PAGES
+ * PAGE_SIZE),
+ VM_MAP_NO_FLAGS);
+ assert(kr == KERN_SUCCESS);
+ simple_lock(&vm_paging_lock);
+ } else {
+ vm_paging_base_address = page_map_offset;
+ }
+ }
+
+ /*
+ * Try and find an available kernel virtual address
+ * from our pre-allocated pool.
+ */
+ page_map_offset = 0;
+ for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
+ if (vm_paging_page_inuse[i] == FALSE) {
+ page_map_offset = vm_paging_base_address +
+ (i * PAGE_SIZE);
+ break;
+ }
+ }
+
+ if (page_map_offset != 0) {
+ /*
+ * We found a kernel virtual address;
+ * map the physical page to that virtual address.
+ */
+ if (i > vm_paging_max_index) {
+ vm_paging_max_index = i;
+ }
+ vm_paging_page_inuse[i] = TRUE;
+ simple_unlock(&vm_paging_lock);
+ if (page->no_isync == TRUE) {
+ pmap_sync_page_data_phys(page->phys_page);
+ }
+ assert(pmap_verify_free(page->phys_page));
+ PMAP_ENTER(kernel_pmap,
+ page_map_offset,
+ page,
+ VM_PROT_DEFAULT,
+ ((int) page->object->wimg_bits &
+ VM_WIMG_MASK),
+ TRUE);
+ vm_paging_objects_mapped++;
+ vm_paging_pages_mapped++;
+ *address = page_map_offset;
+ vm_object_lock(object);
+
+ /* all done and mapped, ready to use ! */
+ return KERN_SUCCESS;
+ }
+
+ /*
+ * We ran out of pre-allocated kernel virtual
+ * addresses. Just map the page in the kernel
+ * the slow and regular way.
+ */
+ vm_paging_no_kernel_page++;
+ simple_unlock(&vm_paging_lock);
+ vm_object_lock(object);
+ }
+
+ object_offset = vm_object_trunc_page(offset);
+ map_size = vm_map_round_page(*size);
+
+ /*
+ * Try and map the required range of the object
+ * in the kernel_map
+ */
+
+ /* don't go beyond the object's end... */
+ if (object_offset >= object->size) {
+ map_size = 0;
+ } else if (map_size > object->size - offset) {
+ map_size = object->size - offset;
+ }
+
+ vm_object_reference_locked(object); /* for the map entry */
+ vm_object_unlock(object);
+
+ kr = vm_map_enter(kernel_map,
+ address,
+ map_size,
+ 0,
+ VM_FLAGS_ANYWHERE,
+ object,
+ object_offset,
+ FALSE,
+ VM_PROT_DEFAULT,
+ VM_PROT_ALL,
+ VM_INHERIT_NONE);
+ if (kr != KERN_SUCCESS) {
+ *address = 0;
+ *size = 0;
+ vm_object_deallocate(object); /* for the map entry */
+ return kr;
+ }
+
+ *size = map_size;
+
+ /*
+ * Enter the mapped pages in the page table now.
+ */
+ vm_object_lock(object);
+ for (page_map_offset = 0;
+ map_size != 0;
+ map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
+ unsigned int cache_attr;
+
+ page = vm_page_lookup(object, offset + page_map_offset);
+ if (page == VM_PAGE_NULL) {
+ panic("vm_paging_map_object: no page !?");
+ }
+ if (page->no_isync == TRUE) {
+ pmap_sync_page_data_phys(page->phys_page);
+ }
+ cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
+
+ assert(pmap_verify_free(page->phys_page));
+ PMAP_ENTER(kernel_pmap,
+ *address + page_map_offset,
+ page,
+ VM_PROT_DEFAULT,
+ cache_attr,
+ TRUE);
+ }
+
+ vm_paging_objects_mapped_slow++;
+ vm_paging_pages_mapped_slow += map_size / PAGE_SIZE_64;
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * ENCRYPTED SWAP:
+ * vm_paging_unmap_object:
+ * Unmaps part of a VM object's pages from the kernel
+ * virtual address space.
+ * Context:
+ * The VM object is locked. This lock will get
+ * dropped and re-acquired though.
+ */
+void
+vm_paging_unmap_object(
+ vm_object_t object,
+ vm_map_offset_t start,
+ vm_map_offset_t end)
+{
+ kern_return_t kr;
+ int i;
+
+ if ((vm_paging_base_address == 0) ||
+ (start < vm_paging_base_address) ||
+ (end > (vm_paging_base_address
+ + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
+ /*
+ * We didn't use our pre-allocated pool of
+ * kernel virtual address. Deallocate the
+ * virtual memory.
+ */
+ if (object != VM_OBJECT_NULL) {
+ vm_object_unlock(object);
+ }
+ kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
+ if (object != VM_OBJECT_NULL) {
+ vm_object_lock(object);
+ }
+ assert(kr == KERN_SUCCESS);
+ } else {
+ /*
+ * We used a kernel virtual address from our
+ * pre-allocated pool. Put it back in the pool
+ * for next time.
+ */
+ assert(end - start == PAGE_SIZE);
+ i = (start - vm_paging_base_address) >> PAGE_SHIFT;
+
+ /* undo the pmap mapping */
+ pmap_remove(kernel_pmap, start, end);
+
+ simple_lock(&vm_paging_lock);
+ vm_paging_page_inuse[i] = FALSE;
+ simple_unlock(&vm_paging_lock);
+ }
+}
+
+/*
+ * Encryption data.
+ * "iv" is the "initial vector". Ideally, we want to
+ * have a different one for each page we encrypt, so that
+ * crackers can't find encryption patterns too easily.
+ */
+#define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
+boolean_t swap_crypt_ctx_initialized = FALSE;
+aes_32t swap_crypt_key[8]; /* big enough for a 256 key */
+aes_ctx swap_crypt_ctx;
+const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
+
+#if DEBUG
+boolean_t swap_crypt_ctx_tested = FALSE;
+unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
+unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
+unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
+#endif /* DEBUG */
+
+extern u_long random(void);
+
+/*
+ * Initialize the encryption context: key and key size.
+ */
+void swap_crypt_ctx_initialize(void); /* forward */
+void
+swap_crypt_ctx_initialize(void)
+{
+ unsigned int i;
+
+ /*
+ * No need for locking to protect swap_crypt_ctx_initialized
+ * because the first use of encryption will come from the
+ * pageout thread (we won't pagein before there's been a pageout)
+ * and there's only one pageout thread.
+ */
+ if (swap_crypt_ctx_initialized == FALSE) {
+ for (i = 0;
+ i < (sizeof (swap_crypt_key) /
+ sizeof (swap_crypt_key[0]));
+ i++) {
+ swap_crypt_key[i] = random();
+ }
+ aes_encrypt_key((const unsigned char *) swap_crypt_key,
+ SWAP_CRYPT_AES_KEY_SIZE,
+ &swap_crypt_ctx.encrypt);
+ aes_decrypt_key((const unsigned char *) swap_crypt_key,
+ SWAP_CRYPT_AES_KEY_SIZE,
+ &swap_crypt_ctx.decrypt);
+ swap_crypt_ctx_initialized = TRUE;
+ }
+
+#if DEBUG
+ /*
+ * Validate the encryption algorithms.
+ */
+ if (swap_crypt_ctx_tested == FALSE) {
+ /* initialize */
+ for (i = 0; i < 4096; i++) {
+ swap_crypt_test_page_ref[i] = (char) i;
+ }
+ /* encrypt */
+ aes_encrypt_cbc(swap_crypt_test_page_ref,
+ swap_crypt_null_iv,
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ swap_crypt_test_page_encrypt,
+ &swap_crypt_ctx.encrypt);
+ /* decrypt */
+ aes_decrypt_cbc(swap_crypt_test_page_encrypt,
+ swap_crypt_null_iv,
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ swap_crypt_test_page_decrypt,
+ &swap_crypt_ctx.decrypt);
+ /* compare result with original */
+ for (i = 0; i < 4096; i ++) {
+ if (swap_crypt_test_page_decrypt[i] !=
+ swap_crypt_test_page_ref[i]) {
+ panic("encryption test failed");
+ }
+ }
+
+ /* encrypt again */
+ aes_encrypt_cbc(swap_crypt_test_page_decrypt,
+ swap_crypt_null_iv,
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ swap_crypt_test_page_decrypt,
+ &swap_crypt_ctx.encrypt);
+ /* decrypt in place */
+ aes_decrypt_cbc(swap_crypt_test_page_decrypt,
+ swap_crypt_null_iv,
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ swap_crypt_test_page_decrypt,
+ &swap_crypt_ctx.decrypt);
+ for (i = 0; i < 4096; i ++) {
+ if (swap_crypt_test_page_decrypt[i] !=
+ swap_crypt_test_page_ref[i]) {
+ panic("in place encryption test failed");
+ }
+ }
+
+ swap_crypt_ctx_tested = TRUE;
+ }
+#endif /* DEBUG */
+}
+
+/*
+ * ENCRYPTED SWAP:
+ * vm_page_encrypt:
+ * Encrypt the given page, for secure paging.
+ * The page might already be mapped at kernel virtual
+ * address "kernel_mapping_offset". Otherwise, we need
+ * to map it.
+ *
+ * Context:
+ * The page's object is locked, but this lock will be released
+ * and re-acquired.
+ * The page is busy and not accessible by users (not entered in any pmap).
+ */
+void
+vm_page_encrypt(
+ vm_page_t page,
+ vm_map_offset_t kernel_mapping_offset)
+{
+ int clear_refmod = 0;
+ kern_return_t kr;
+ boolean_t page_was_referenced;
+ boolean_t page_was_modified;
+ vm_map_size_t kernel_mapping_size;
+ vm_offset_t kernel_vaddr;
+ union {
+ unsigned char aes_iv[AES_BLOCK_SIZE];
+ struct {
+ memory_object_t pager_object;
+ vm_object_offset_t paging_offset;
+ } vm;
+ } encrypt_iv;
+
+ if (! vm_pages_encrypted) {
+ vm_pages_encrypted = TRUE;
+ }
+
+ assert(page->busy);
+ assert(page->dirty || page->precious);
+
+ if (page->encrypted) {
+ /*
+ * Already encrypted: no need to do it again.
+ */
+ vm_page_encrypt_already_encrypted_counter++;
+ return;
+ }
+ ASSERT_PAGE_DECRYPTED(page);
+
+ /*
+ * Gather the "reference" and "modified" status of the page.
+ * We'll restore these values after the encryption, so that
+ * the encryption is transparent to the rest of the system
+ * and doesn't impact the VM's LRU logic.
+ */
+ page_was_referenced =
+ (page->reference || pmap_is_referenced(page->phys_page));
+ page_was_modified =
+ (page->dirty || pmap_is_modified(page->phys_page));
+
+ if (kernel_mapping_offset == 0) {
+ /*
+ * The page hasn't already been mapped in kernel space
+ * by the caller. Map it now, so that we can access
+ * its contents and encrypt them.
+ */
+ kernel_mapping_size = PAGE_SIZE;
+ kr = vm_paging_map_object(&kernel_mapping_offset,
+ page,
+ page->object,
+ page->offset,
+ &kernel_mapping_size);
+ if (kr != KERN_SUCCESS) {
+ panic("vm_page_encrypt: "
+ "could not map page in kernel: 0x%x\n",
+ kr);
+ }
+ } else {
+ kernel_mapping_size = 0;
+ }
+ kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
+
+ if (swap_crypt_ctx_initialized == FALSE) {
+ swap_crypt_ctx_initialize();
+ }
+ assert(swap_crypt_ctx_initialized);
+
+ /*
+ * Prepare an "initial vector" for the encryption.
+ * We use the "pager" and the "paging_offset" for that
+ * page to obfuscate the encrypted data a bit more and
+ * prevent crackers from finding patterns that they could
+ * use to break the key.
+ */
+ bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
+ encrypt_iv.vm.pager_object = page->object->pager;
+ encrypt_iv.vm.paging_offset =
+ page->object->paging_offset + page->offset;
+
+ vm_object_unlock(page->object);
+
+ /* encrypt the "initial vector" */
+ aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
+ swap_crypt_null_iv,
+ 1,
+ &encrypt_iv.aes_iv[0],
+ &swap_crypt_ctx.encrypt);
+
+ /*
+ * Encrypt the page.
+ */
+ aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
+ &encrypt_iv.aes_iv[0],
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ (unsigned char *) kernel_vaddr,
+ &swap_crypt_ctx.encrypt);
+
+ vm_page_encrypt_counter++;
+
+ vm_object_lock(page->object);
+
+ /*
+ * Unmap the page from the kernel's address space,
+ * if we had to map it ourselves. Otherwise, let
+ * the caller undo the mapping if needed.
+ */
+ if (kernel_mapping_size != 0) {
+ vm_paging_unmap_object(page->object,
+ kernel_mapping_offset,
+ kernel_mapping_offset + kernel_mapping_size);
}
- vm_object_lock(object);
- if (!object->phys_contiguous) {
- /* Protect user space from future COW operations */
- object->true_share = TRUE;
- if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
- object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
+ /*
+ * Restore the "reference" and "modified" bits.
+ * This should clean up any impact the encryption had
+ * on them.
+ */
+ if (! page_was_referenced) {
+ clear_refmod |= VM_MEM_REFERENCED;
+ page->reference = FALSE;
}
+ if (! page_was_modified) {
+ clear_refmod |= VM_MEM_MODIFIED;
+ page->dirty = FALSE;
+ }
+ if (clear_refmod)
+ pmap_clear_refmod(page->phys_page, clear_refmod);
-#ifdef UBC_DEBUG
- if(upl_ptr)
- queue_enter(&object->uplq, upl, upl_t, uplq);
-#endif /* UBC_DEBUG */
- vm_object_paging_begin(object);
- entry = 0;
- while (xfer_size) {
- if((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
- vm_object_unlock(object);
- VM_PAGE_GRAB_FICTITIOUS(alias_page);
- vm_object_lock(object);
+ page->encrypted = TRUE;
+}
+
+/*
+ * ENCRYPTED SWAP:
+ * vm_page_decrypt:
+ * Decrypt the given page.
+ * The page might already be mapped at kernel virtual
+ * address "kernel_mapping_offset". Otherwise, we need
+ * to map it.
+ *
+ * Context:
+ * The page's VM object is locked but will be unlocked and relocked.
+ * The page is busy and not accessible by users (not entered in any pmap).
+ */
+void
+vm_page_decrypt(
+ vm_page_t page,
+ vm_map_offset_t kernel_mapping_offset)
+{
+ int clear_refmod = 0;
+ kern_return_t kr;
+ vm_map_size_t kernel_mapping_size;
+ vm_offset_t kernel_vaddr;
+ boolean_t page_was_referenced;
+ union {
+ unsigned char aes_iv[AES_BLOCK_SIZE];
+ struct {
+ memory_object_t pager_object;
+ vm_object_offset_t paging_offset;
+ } vm;
+ } decrypt_iv;
+
+ assert(page->busy);
+ assert(page->encrypted);
+
+ /*
+ * Gather the "reference" status of the page.
+ * We'll restore its value after the decryption, so that
+ * the decryption is transparent to the rest of the system
+ * and doesn't impact the VM's LRU logic.
+ */
+ page_was_referenced =
+ (page->reference || pmap_is_referenced(page->phys_page));
+
+ if (kernel_mapping_offset == 0) {
+ /*
+ * The page hasn't already been mapped in kernel space
+ * by the caller. Map it now, so that we can access
+ * its contents and decrypt them.
+ */
+ kernel_mapping_size = PAGE_SIZE;
+ kr = vm_paging_map_object(&kernel_mapping_offset,
+ page,
+ page->object,
+ page->offset,
+ &kernel_mapping_size);
+ if (kr != KERN_SUCCESS) {
+ panic("vm_page_decrypt: "
+ "could not map page in kernel: 0x%x\n");
}
- dst_page = vm_page_lookup(object, dst_offset);
- if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
- (dst_page->unusual && (dst_page->error ||
- dst_page->restart || dst_page->absent ||
- dst_page->fictitious ||
- prot & dst_page->page_lock))) {
- vm_fault_return_t result;
- do {
- vm_page_t top_page;
- kern_return_t error_code;
- int interruptible;
+ } else {
+ kernel_mapping_size = 0;
+ }
+ kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
- vm_object_offset_t lo_offset = offset;
- vm_object_offset_t hi_offset = offset + size;
+ assert(swap_crypt_ctx_initialized);
+ /*
+ * Prepare an "initial vector" for the decryption.
+ * It has to be the same as the "initial vector" we
+ * used to encrypt that page.
+ */
+ bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
+ decrypt_iv.vm.pager_object = page->object->pager;
+ decrypt_iv.vm.paging_offset =
+ page->object->paging_offset + page->offset;
+ vm_object_unlock(page->object);
- if(cntrl_flags & UPL_SET_INTERRUPTIBLE) {
- interruptible = THREAD_ABORTSAFE;
- } else {
- interruptible = THREAD_UNINT;
- }
+ /* encrypt the "initial vector" */
+ aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
+ swap_crypt_null_iv,
+ 1,
+ &decrypt_iv.aes_iv[0],
+ &swap_crypt_ctx.encrypt);
- result = vm_fault_page(object, dst_offset,
- prot | VM_PROT_WRITE, FALSE,
- interruptible,
- lo_offset, hi_offset,
- VM_BEHAVIOR_SEQUENTIAL,
- &prot, &dst_page, &top_page,
- (int *)0,
- &error_code, FALSE, FALSE, NULL, 0);
+ /*
+ * Decrypt the page.
+ */
+ aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
+ &decrypt_iv.aes_iv[0],
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ (unsigned char *) kernel_vaddr,
+ &swap_crypt_ctx.decrypt);
+ vm_page_decrypt_counter++;
- switch(result) {
- case VM_FAULT_SUCCESS:
+ vm_object_lock(page->object);
- PAGE_WAKEUP_DONE(dst_page);
+ /*
+ * Unmap the page from the kernel's address space,
+ * if we had to map it ourselves. Otherwise, let
+ * the caller undo the mapping if needed.
+ */
+ if (kernel_mapping_size != 0) {
+ vm_paging_unmap_object(page->object,
+ kernel_vaddr,
+ kernel_vaddr + PAGE_SIZE);
+ }
- /*
- * Release paging references and
- * top-level placeholder page, if any.
- */
+ /*
+ * After decryption, the page is actually clean.
+ * It was encrypted as part of paging, which "cleans"
+ * the "dirty" pages.
+ * Noone could access it after it was encrypted
+ * and the decryption doesn't count.
+ */
+ page->dirty = FALSE;
+ clear_refmod = VM_MEM_MODIFIED;
- if(top_page != VM_PAGE_NULL) {
- vm_object_t local_object;
- local_object =
- top_page->object;
- if(top_page->object
- != dst_page->object) {
- vm_object_lock(
- local_object);
- VM_PAGE_FREE(top_page);
- vm_object_paging_end(
- local_object);
- vm_object_unlock(
- local_object);
- } else {
- VM_PAGE_FREE(top_page);
- vm_object_paging_end(
- local_object);
- }
- }
+ /* restore the "reference" bit */
+ if (! page_was_referenced) {
+ page->reference = FALSE;
+ clear_refmod |= VM_MEM_REFERENCED;
+ }
+ pmap_clear_refmod(page->phys_page, clear_refmod);
- break;
-
-
- case VM_FAULT_RETRY:
- vm_object_lock(object);
- vm_object_paging_begin(object);
- break;
+ page->encrypted = FALSE;
- case VM_FAULT_FICTITIOUS_SHORTAGE:
- vm_page_more_fictitious();
- vm_object_lock(object);
- vm_object_paging_begin(object);
- break;
+ /*
+ * We've just modified the page's contents via the data cache and part
+ * of the new contents might still be in the cache and not yet in RAM.
+ * Since the page is now available and might get gathered in a UPL to
+ * be part of a DMA transfer from a driver that expects the memory to
+ * be coherent at this point, we have to flush the data cache.
+ */
+ pmap_sync_page_attributes_phys(page->phys_page);
+ /*
+ * Since the page is not mapped yet, some code might assume that it
+ * doesn't need to invalidate the instruction cache when writing to
+ * that page. That code relies on "no_isync" being set, so that the
+ * caches get syncrhonized when the page is first mapped. So we need
+ * to set "no_isync" here too, despite the fact that we just
+ * synchronized the caches above...
+ */
+ page->no_isync = TRUE;
+}
- case VM_FAULT_MEMORY_SHORTAGE:
- if (vm_page_wait(interruptible)) {
- vm_object_lock(object);
- vm_object_paging_begin(object);
- break;
- }
- /* fall thru */
+unsigned long upl_encrypt_upls = 0;
+unsigned long upl_encrypt_pages = 0;
- case VM_FAULT_INTERRUPTED:
- error_code = MACH_SEND_INTERRUPTED;
- case VM_FAULT_MEMORY_ERROR:
- ret = (error_code ? error_code:
- KERN_MEMORY_ERROR);
- vm_object_lock(object);
- for(; offset < dst_offset;
- offset += PAGE_SIZE) {
- dst_page = vm_page_lookup(
- object, offset);
- if(dst_page == VM_PAGE_NULL)
- panic("vm_object_iopl_request: Wired pages missing. \n");
- vm_page_lock_queues();
- vm_page_unwire(dst_page);
- vm_page_unlock_queues();
- VM_STAT(reactivations++);
- }
- vm_object_unlock(object);
- upl_destroy(upl);
- return ret;
- }
- } while ((result != VM_FAULT_SUCCESS)
- || (result == VM_FAULT_INTERRUPTED));
- }
+/*
+ * ENCRYPTED SWAP:
+ *
+ * upl_encrypt:
+ * Encrypts all the pages in the UPL, within the specified range.
+ *
+ */
+void
+upl_encrypt(
+ upl_t upl,
+ upl_offset_t crypt_offset,
+ upl_size_t crypt_size)
+{
+ upl_size_t upl_size;
+ upl_offset_t upl_offset;
+ vm_object_t upl_object;
+ vm_page_t page;
+ vm_object_t shadow_object;
+ vm_object_offset_t shadow_offset;
+ vm_object_offset_t paging_offset;
+ vm_object_offset_t base_offset;
- vm_page_lock_queues();
- vm_page_wire(dst_page);
- vm_page_unlock_queues();
+ upl_encrypt_upls++;
+ upl_encrypt_pages += crypt_size / PAGE_SIZE;
- if(upl_ptr) {
+ upl_lock(upl);
- vm_page_lock_queues();
- if(cntrl_flags & UPL_SET_LITE) {
- int pg_num;
- pg_num = (dst_offset-offset)/PAGE_SIZE;
- lite_list[pg_num>>5] |= 1 << (pg_num & 31);
- } else {
- /*
- * Convert the fictitious page to a
- * private shadow of the real page.
- */
- assert(alias_page->fictitious);
- alias_page->fictitious = FALSE;
- alias_page->private = TRUE;
- alias_page->pageout = TRUE;
- alias_page->phys_page = dst_page->phys_page;
- vm_page_wire(alias_page);
+ upl_object = upl->map_object;
+ upl_offset = upl->offset;
+ upl_size = upl->size;
- vm_page_insert(alias_page,
- upl->map_object, size - xfer_size);
- assert(!alias_page->wanted);
- alias_page->busy = FALSE;
- alias_page->absent = FALSE;
- }
+ upl_unlock(upl);
- /* expect the page to be used */
- dst_page->reference = TRUE;
- if (!(cntrl_flags & UPL_COPYOUT_FROM))
- dst_page->dirty = TRUE;
- alias_page = NULL;
+ vm_object_lock(upl_object);
- if(user_page_list) {
- user_page_list[entry].phys_addr
- = dst_page->phys_page;
- user_page_list[entry].dirty =
- dst_page->dirty;
- user_page_list[entry].pageout =
- dst_page->pageout;
- user_page_list[entry].absent =
- dst_page->absent;
- user_page_list[entry].precious =
- dst_page->precious;
- }
- vm_page_unlock_queues();
- }
- entry++;
- dst_offset += PAGE_SIZE_64;
- xfer_size -= PAGE_SIZE;
+ /*
+ * Find the VM object that contains the actual pages.
+ */
+ if (upl_object->pageout) {
+ shadow_object = upl_object->shadow;
+ /*
+ * The offset in the shadow object is actually also
+ * accounted for in upl->offset. It possibly shouldn't be
+ * this way, but for now don't account for it twice.
+ */
+ shadow_offset = 0;
+ assert(upl_object->paging_offset == 0); /* XXX ? */
+ vm_object_lock(shadow_object);
+ } else {
+ shadow_object = upl_object;
+ shadow_offset = 0;
}
- if (upl->flags & UPL_INTERNAL) {
- if(page_list_count != NULL)
- *page_list_count = 0;
- } else if (*page_list_count > entry) {
- if(page_list_count != NULL)
- *page_list_count = entry;
+ paging_offset = shadow_object->paging_offset;
+ vm_object_paging_begin(shadow_object);
+
+ if (shadow_object != upl_object) {
+ vm_object_unlock(shadow_object);
}
+ vm_object_unlock(upl_object);
- if(alias_page != NULL) {
- vm_page_lock_queues();
- vm_page_free(alias_page);
- vm_page_unlock_queues();
+ base_offset = shadow_offset;
+ base_offset += upl_offset;
+ base_offset += crypt_offset;
+ base_offset -= paging_offset;
+ /*
+ * Unmap the pages, so that nobody can continue accessing them while
+ * they're encrypted. After that point, all accesses to these pages
+ * will cause a page fault and block while the page is being encrypted
+ * (busy). After the encryption completes, any access will cause a
+ * page fault and the page gets decrypted at that time.
+ */
+ assert(crypt_offset + crypt_size <= upl_size);
+ vm_object_pmap_protect(shadow_object,
+ base_offset,
+ (vm_object_size_t)crypt_size,
+ PMAP_NULL,
+ 0,
+ VM_PROT_NONE);
+
+ /* XXX FBDP could the object have changed significantly here ? */
+ vm_object_lock(shadow_object);
+
+ for (upl_offset = 0;
+ upl_offset < crypt_size;
+ upl_offset += PAGE_SIZE) {
+ page = vm_page_lookup(shadow_object,
+ base_offset + upl_offset);
+ if (page == VM_PAGE_NULL) {
+ panic("upl_encrypt: "
+ "no page for (obj=%p,off=%lld+%d)!\n",
+ shadow_object,
+ base_offset,
+ upl_offset);
+ }
+ vm_page_encrypt(page, 0);
}
- vm_object_unlock(object);
- return KERN_SUCCESS;
+ vm_object_paging_end(shadow_object);
+ vm_object_unlock(shadow_object);
}
+
vm_size_t
-upl_get_internal_pagelist_offset()
+upl_get_internal_pagelist_offset(void)
{
return sizeof(struct upl);
}
-void
-upl_set_dirty(
- upl_t upl)
-{
- upl->flags |= UPL_CLEAR_DIRTY;
-}
-
void
upl_clear_dirty(
- upl_t upl)
+ upl_t upl,
+ boolean_t value)
{
- upl->flags &= ~UPL_CLEAR_DIRTY;
+ if (value) {
+ upl->flags |= UPL_CLEAR_DIRTY;
+ } else {
+ upl->flags &= ~UPL_CLEAR_DIRTY;
+ }
}
{
return(UPL_VALID_PAGE(upl, index));
}
-vm_offset_t upl_phys_page(upl_page_info_t *upl, int index)
+ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
{
- return((vm_offset_t)UPL_PHYS_PAGE(upl, index));
+ return(UPL_PHYS_PAGE(upl, index));
}
void
if(m->pageout) pgopages++;
if(m->precious) precpages++;
+ assert(m->object != kernel_object);
m = (vm_page_t) queue_next(&m->pageq);
if (m ==(vm_page_t )0) break;
if(m->pageout) pgopages++;
if(m->precious) precpages++;
+ assert(m->object != kernel_object);
m = (vm_page_t) queue_next(&m->pageq);
if (m ==(vm_page_t )0) break;
if(m->pageout) pgopages++;
if(m->precious) precpages++;
+ assert(m->object != kernel_object);
m = (vm_page_t) queue_next(&m->pageq);
if(m == (vm_page_t )0) break;
}
#endif /* MACH_BSD */
-#ifdef UBC_DEBUG
+ppnum_t upl_get_highest_page(
+ upl_t upl)
+{
+ return upl->highest_page;
+}
+
+#ifdef UPL_DEBUG
kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
{
upl->ubc_alias1 = alias1;
*al2 = upl->ubc_alias2;
return KERN_SUCCESS;
}
-#endif /* UBC_DEBUG */
+#endif /* UPL_DEBUG */
#include <vm/vm_print.h>
#define printf kdbprintf
-extern int db_indent;
void db_pageout(void);
void
db_vm(void)
{
- extern int vm_page_gobble_count;
iprintf("VM Statistics:\n");
db_indent += 2;
vm_page_free_count);
printf(" wire %5d gobbl %5d\n",
vm_page_wire_count, vm_page_gobble_count);
- iprintf("laund %5d\n",
- vm_page_laundry_count);
db_indent -= 2;
iprintf("target:\n");
db_indent += 2;
vm_page_free_target);
printf(" resrv %5d\n", vm_page_free_reserved);
db_indent -= 2;
-
- iprintf("burst:\n");
- db_indent += 2;
- iprintf("max %5d min %5d wait %5d empty %5d\n",
- vm_pageout_burst_max, vm_pageout_burst_min,
- vm_pageout_burst_wait, vm_pageout_empty_wait);
- db_indent -= 2;
iprintf("pause:\n");
- db_indent += 2;
- iprintf("count %5d max %5d\n",
- vm_pageout_pause_count, vm_pageout_pause_max);
-#if MACH_COUNTERS
- iprintf("scan_continue called %8d\n", c_vm_pageout_scan_continue);
-#endif /* MACH_COUNTERS */
- db_indent -= 2;
db_pageout();
db_indent -= 2;
}
-void
-db_pageout(void)
-{
#if MACH_COUNTERS
- extern int c_laundry_pages_freed;
+extern int c_laundry_pages_freed;
#endif /* MACH_COUNTERS */
+void
+db_pageout(void)
+{
iprintf("Pageout Statistics:\n");
db_indent += 2;
iprintf("active %5d inactv %5d\n",
db_indent -= 2;
}
-#if MACH_CLUSTER_STATS
-unsigned long vm_pageout_cluster_dirtied = 0;
-unsigned long vm_pageout_cluster_cleaned = 0;
-unsigned long vm_pageout_cluster_collisions = 0;
-unsigned long vm_pageout_cluster_clusters = 0;
-unsigned long vm_pageout_cluster_conversions = 0;
-unsigned long vm_pageout_target_collisions = 0;
-unsigned long vm_pageout_target_page_dirtied = 0;
-unsigned long vm_pageout_target_page_freed = 0;
-#define CLUSTER_STAT(clause) clause
-#else /* MACH_CLUSTER_STATS */
-#define CLUSTER_STAT(clause)
-#endif /* MACH_CLUSTER_STATS */
-
#endif /* MACH_KDB */