-
/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
*
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
*
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
* Paging File Management.
*/
+#include <mach/host_priv.h>
#include <mach/memory_object_control.h>
#include <mach/memory_object_server.h>
-#include "default_pager_internal.h"
+#include <mach/upl.h>
+#include <default_pager/default_pager_internal.h>
#include <default_pager/default_pager_alerts.h>
+#include <default_pager/default_pager_object_server.h>
+
+#include <ipc/ipc_types.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
+
+#include <kern/kern_types.h>
+#include <kern/host.h>
#include <kern/queue.h>
#include <kern/counters.h>
#include <kern/sched_prim.h>
+
#include <vm/vm_kern.h>
#include <vm/vm_pageout.h>
-/* CDY CDY */
#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_protos.h>
+
+
+/* todo - need large internal object support */
/*
* ALLOC_STRIDE... the maximum number of bytes allocated from
#define ALLOC_STRIDE (1024 * 1024 * 1024)
int physical_transfer_cluster_count = 0;
-#define VM_SUPER_CLUSTER 0x20000
-#define VM_SUPER_PAGES 32
+#define VM_SUPER_CLUSTER 0x40000
+#define VM_SUPER_PAGES (VM_SUPER_CLUSTER / PAGE_SIZE)
/*
* 0 means no shift to pages, so == 1 page/cluster. 1 would mean
* 2 pages/cluster, 2 means 4 pages/cluster, and so on.
*/
+#define VSTRUCT_MIN_CLSHIFT 0
+
#define VSTRUCT_DEF_CLSHIFT 2
-int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT;
int default_pager_clsize = 0;
+int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT;
+
/* statistics */
unsigned int clustered_writes[VM_SUPER_PAGES+1];
unsigned int clustered_reads[VM_SUPER_PAGES+1];
#define VS_ASYNC_REUSE 1
struct vs_async *vs_async_free_list;
-mutex_t default_pager_async_lock; /* Protects globals above */
+lck_mtx_t default_pager_async_lock; /* Protects globals above */
int vs_alloc_async_failed = 0; /* statistics */
#define VS_ALLOC_ASYNC() vs_alloc_async()
#define VS_FREE_ASYNC(vsa) vs_free_async(vsa)
-#define VS_ASYNC_LOCK() mutex_lock(&default_pager_async_lock)
-#define VS_ASYNC_UNLOCK() mutex_unlock(&default_pager_async_lock)
-#define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, \
- ETAP_IO_DEV_PAGEH)
+#define VS_ASYNC_LOCK() lck_mtx_lock(&default_pager_async_lock)
+#define VS_ASYNC_UNLOCK() lck_mtx_unlock(&default_pager_async_lock)
+#define VS_ASYNC_LOCK_INIT() lck_mtx_init(&default_pager_async_lock, &default_pager_lck_grp, &default_pager_lck_attr)
+#define VS_ASYNC_LOCK_DESTROY() lck_mtx_destroy(&default_pager_async_lock, &default_pager_lck_grp)
#define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock)
/*
* Paging Space Hysteresis triggers and the target notification port
*
*/
-
+unsigned int dp_pages_free_drift_count = 0;
+unsigned int dp_pages_free_drifted_max = 0;
unsigned int minimum_pages_remaining = 0;
unsigned int maximum_pages_free = 0;
ipc_port_t min_pages_trigger_port = NULL;
ipc_port_t max_pages_trigger_port = NULL;
+#if CONFIG_FREEZE
+boolean_t use_emergency_swap_file_first = TRUE;
+#else
+boolean_t use_emergency_swap_file_first = FALSE;
+#endif
boolean_t bs_low = FALSE;
int backing_store_release_trigger_disable = 0;
-
+boolean_t backing_store_stop_compaction = FALSE;
+boolean_t backing_store_abort_compaction = FALSE;
+/* Have we decided if swap needs to be encrypted yet ? */
+boolean_t dp_encryption_inited = FALSE;
+/* Should we encrypt swap ? */
+boolean_t dp_encryption = FALSE;
+
+boolean_t dp_isssd = FALSE;
/*
* Object sizes are rounded up to the next power of 2,
/*
* List of all backing store and segments.
*/
+MACH_PORT_FACE emergency_segment_backing_store;
struct backing_store_list_head backing_store_list;
paging_segment_t paging_segments[MAX_NUM_PAGING_SEGMENTS];
-mutex_t paging_segments_lock;
+lck_mtx_t paging_segments_lock;
int paging_segment_max = 0;
int paging_segment_count = 0;
int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 };
* likely to be deprecated.
*/
unsigned int dp_pages_free = 0;
+unsigned int dp_pages_reserve = 0;
unsigned int cluster_transfer_minimum = 100;
-kern_return_t ps_write_file(paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, int); /* forward */
-kern_return_t ps_read_file (paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, unsigned int *, int); /* forward */
+/*
+ * Trim state
+ */
+struct ps_vnode_trim_data {
+ struct vnode *vp;
+ dp_offset_t offset;
+ dp_size_t length;
+};
+
+/* forward declarations */
+kern_return_t ps_write_file(paging_segment_t, upl_t, upl_offset_t, dp_offset_t, unsigned int, int); /* forward */
+kern_return_t ps_read_file (paging_segment_t, upl_t, upl_offset_t, dp_offset_t, unsigned int, unsigned int *, int); /* forward */
+default_pager_thread_t *get_read_buffer( void );
+kern_return_t ps_vstruct_transfer_from_segment(
+ vstruct_t vs,
+ paging_segment_t segment,
+ upl_t upl);
+kern_return_t ps_read_device(paging_segment_t, dp_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */
+kern_return_t ps_write_device(paging_segment_t, dp_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */
+kern_return_t vs_cluster_transfer(
+ vstruct_t vs,
+ dp_offset_t offset,
+ dp_size_t cnt,
+ upl_t upl);
+vs_map_t vs_get_map_entry(
+ vstruct_t vs,
+ dp_offset_t offset);
+
+kern_return_t
+default_pager_backing_store_delete_internal( MACH_PORT_FACE );
+static inline void ps_vnode_trim_init(struct ps_vnode_trim_data *data);
+static inline void ps_vnode_trim_now(struct ps_vnode_trim_data *data);
+static inline void ps_vnode_trim_more(struct ps_vnode_trim_data *data, struct vs_map *map, unsigned int shift, dp_size_t length);
default_pager_thread_t *
-get_read_buffer()
+get_read_buffer( void )
{
int i;
return dpt_array[i];
}
}
- assert_wait(&dpt_array, THREAD_UNINT);
- DPT_UNLOCK(dpt_lock);
- thread_block((void(*)(void))0);
+ DPT_SLEEP(dpt_lock, &dpt_array, THREAD_UNINT);
}
}
void
bs_global_info(
- vm_size_t *totalp,
- vm_size_t *freep)
+ uint64_t *totalp,
+ uint64_t *freep)
{
- vm_size_t pages_total, pages_free;
+ uint64_t pages_total, pages_free;
paging_segment_t ps;
int i;
*/
pages_total += ps->ps_pgnum;
pages_free += ps->ps_clcount << ps->ps_clshift;
- DEBUG(DEBUG_BS_INTERNAL,
- ("segment #%d: %d total, %d free\n",
- i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift));
+ DP_DEBUG(DEBUG_BS_INTERNAL,
+ ("segment #%d: %d total, %d free\n",
+ i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift));
}
*totalp = pages_total;
*freep = pages_free;
if ((port == MACH_PORT_NULL) || port_is_vs(port))
*/
- if ((port == MACH_PORT_NULL))
+ if (port == MACH_PORT_NULL)
return BACKING_STORE_NULL;
BSL_LOCK();
void
backing_store_add(
- backing_store_t bs)
+ __unused backing_store_t bs)
{
- MACH_PORT_FACE port = bs->bs_port;
- MACH_PORT_FACE pset = default_pager_default_set;
+// MACH_PORT_FACE port = bs->bs_port;
+// MACH_PORT_FACE pset = default_pager_default_set;
kern_return_t kr = KERN_SUCCESS;
if (kr != KERN_SUCCESS)
{
backing_store_t bs;
MACH_PORT_FACE port;
- kern_return_t kr;
+// kern_return_t kr;
struct vstruct_alias *alias_struct;
if (pager != default_pager_object)
ipc_port_make_send(port);
assert (port != IP_NULL);
- DEBUG(DEBUG_BS_EXTERNAL,
- ("priority=%d clsize=%d bs_port=0x%x\n",
- priority, clsize, (int) backing_store));
+ DP_DEBUG(DEBUG_BS_EXTERNAL,
+ ("priority=%d clsize=%d bs_port=0x%x\n",
+ priority, clsize, (int) backing_store));
alias_struct = (struct vstruct_alias *)
kalloc(sizeof (struct vstruct_alias));
if(alias_struct != NULL) {
alias_struct->vs = (struct vstruct *)bs;
- alias_struct->name = ISVS;
- port->alias = (int) alias_struct;
+ alias_struct->name = &default_pager_ops;
+ port->ip_alias = (uintptr_t) alias_struct;
}
else {
ipc_port_dealloc_kernel((MACH_PORT_FACE)(port));
- kfree((vm_offset_t)bs, sizeof (struct backing_store));
+
+ BS_LOCK_DESTROY(bs);
+ kfree(bs, sizeof (struct backing_store));
+
return KERN_RESOURCE_SHORTAGE;
}
priority = BS_MINPRI;
bs->bs_priority = priority;
- bs->bs_clsize = bs_get_global_clsize(atop(clsize));
+ bs->bs_clsize = bs_get_global_clsize(atop_32(clsize));
BSL_LOCK();
queue_enter(&backing_store_list.bsl_queue, bs, backing_store_t,
basic->bs_pages_out_fail= bs->bs_pages_out_fail;
basic->bs_priority = bs->bs_priority;
- basic->bs_clsize = ptoa(bs->bs_clsize); /* in bytes */
+ basic->bs_clsize = ptoa_32(bs->bs_clsize); /* in bytes */
BS_UNLOCK(bs);
}
int ps_delete(paging_segment_t); /* forward */
+boolean_t current_thread_aborted(void);
int
ps_delete(
while(backing_store_release_trigger_disable != 0) {
- assert_wait((event_t)
- &backing_store_release_trigger_disable,
- THREAD_UNINT);
- VSL_UNLOCK();
- thread_block((void (*)(void)) 0);
- VSL_LOCK();
+ VSL_SLEEP(&backing_store_release_trigger_disable, THREAD_UNINT);
}
/* we will choose instead to hold a send right */
if ((vs_count != 0) && (vs != NULL))
vs->vs_async_pending += 1; /* hold parties calling */
/* vs_async_wait */
+
+ if (bs_low == FALSE)
+ backing_store_abort_compaction = FALSE;
+
VS_UNLOCK(vs);
VSL_UNLOCK();
while((vs_count != 0) && (vs != NULL)) {
error = KERN_FAILURE;
else {
vm_object_t transfer_object;
- int count;
+ unsigned int count;
upl_t upl;
+ int upl_flags;
- transfer_object = vm_object_allocate(VM_SUPER_CLUSTER);
+ transfer_object = vm_object_allocate((vm_object_size_t)VM_SUPER_CLUSTER);
count = 0;
+ upl_flags = (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE |
+ UPL_SET_LITE | UPL_SET_INTERNAL);
+ if (dp_encryption) {
+ /* mark the pages as "encrypted" when they come in */
+ upl_flags |= UPL_ENCRYPT;
+ }
error = vm_object_upl_request(transfer_object,
(vm_object_offset_t)0, VM_SUPER_CLUSTER,
- &upl, NULL, &count,
- UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
- | UPL_SET_INTERNAL);
+ &upl, NULL, &count, upl_flags);
+
if(error == KERN_SUCCESS) {
-#ifndef ubc_sync_working
- upl_commit(upl, NULL);
- upl_deallocate(upl);
- error = ps_vstruct_transfer_from_segment(
- vs, ps, transfer_object);
-#else
error = ps_vstruct_transfer_from_segment(
vs, ps, upl);
- upl_commit(upl, NULL);
+ upl_commit(upl, NULL, 0);
upl_deallocate(upl);
-#endif
- vm_object_deallocate(transfer_object);
} else {
- vm_object_deallocate(transfer_object);
error = KERN_FAILURE;
}
+ vm_object_deallocate(transfer_object);
}
- if(error) {
+ if(error || current_thread_aborted()) {
VS_LOCK(vs);
vs->vs_async_pending -= 1; /* release vs_async_wait */
if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
VSL_LOCK();
while(backing_store_release_trigger_disable != 0) {
- assert_wait((event_t)
- &backing_store_release_trigger_disable,
- THREAD_UNINT);
- VSL_UNLOCK();
- thread_block((void (*)(void)) 0);
- VSL_LOCK();
+ VSL_SLEEP(&backing_store_release_trigger_disable,
+ THREAD_UNINT);
}
next_vs = (vstruct_t) queue_next(&(vs->vs_links));
kern_return_t
-default_pager_backing_store_delete(
+default_pager_backing_store_delete_internal(
MACH_PORT_FACE backing_store)
{
backing_store_t bs;
paging_segment_t ps;
int error;
int interim_pages_removed = 0;
- kern_return_t kr;
+ boolean_t dealing_with_emergency_segment = ( backing_store == emergency_segment_backing_store );
if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
return KERN_INVALID_ARGUMENT;
-#if 0
- /* not implemented */
- BS_UNLOCK(bs);
- return KERN_FAILURE;
-#endif
-
- restart:
+restart:
PSL_LOCK();
error = KERN_SUCCESS;
for (i = 0; i <= paging_segment_max; i++) {
ps = paging_segments[i];
if (ps != PAGING_SEGMENT_NULL &&
ps->ps_bs == bs &&
- ! ps->ps_going_away) {
+ ! IS_PS_GOING_AWAY(ps)) {
PS_LOCK(ps);
+
+ if( IS_PS_GOING_AWAY(ps) || !IS_PS_OK_TO_USE(ps)) {
+ /*
+ * Someone is already busy reclamining this paging segment.
+ * If it's the emergency segment we are looking at then check
+ * that someone has not already recovered it and set the right
+ * state i.e. online but not activated.
+ */
+ PS_UNLOCK(ps);
+ continue;
+ }
+
/* disable access to this segment */
- ps->ps_going_away = TRUE;
+ ps->ps_state &= ~PS_CAN_USE;
+ ps->ps_state |= PS_GOING_AWAY;
PS_UNLOCK(ps);
/*
* The "ps" segment is "off-line" now,
ps = paging_segments[i];
if (ps != PAGING_SEGMENT_NULL &&
ps->ps_bs == bs &&
- ps->ps_going_away) {
+ IS_PS_GOING_AWAY(ps)) {
PS_LOCK(ps);
+
+ if( !IS_PS_GOING_AWAY(ps)) {
+ PS_UNLOCK(ps);
+ continue;
+ }
+ /* Handle the special clusters that came in while we let go the lock*/
+ if( ps->ps_special_clusters) {
+ dp_pages_free += ps->ps_special_clusters << ps->ps_clshift;
+ ps->ps_pgcount += ps->ps_special_clusters << ps->ps_clshift;
+ ps->ps_clcount += ps->ps_special_clusters;
+ if ( ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI) {
+ ps_select_array[ps->ps_bs->bs_priority] = 0;
+ }
+ ps->ps_special_clusters = 0;
+ }
/* re-enable access to this segment */
- ps->ps_going_away = FALSE;
+ ps->ps_state &= ~PS_GOING_AWAY;
+ ps->ps_state |= PS_CAN_USE;
PS_UNLOCK(ps);
}
}
ps = paging_segments[i];
if (ps != PAGING_SEGMENT_NULL &&
ps->ps_bs == bs) {
- if(ps->ps_going_away) {
- paging_segments[i] = PAGING_SEGMENT_NULL;
- paging_segment_count--;
- PS_LOCK(ps);
- kfree((vm_offset_t)ps->ps_bmap,
- RMAPSIZE(ps->ps_ncls));
- kfree((vm_offset_t)ps, sizeof *ps);
+ if(IS_PS_GOING_AWAY(ps)) {
+ if(IS_PS_EMERGENCY_SEGMENT(ps)) {
+ PS_LOCK(ps);
+ ps->ps_state &= ~PS_GOING_AWAY;
+ ps->ps_special_clusters = 0;
+ ps->ps_pgcount = ps->ps_pgnum;
+ ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
+ dp_pages_reserve += ps->ps_pgcount;
+ PS_UNLOCK(ps);
+ } else {
+ paging_segments[i] = PAGING_SEGMENT_NULL;
+ paging_segment_count--;
+ PS_LOCK(ps);
+ kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
+ kfree(ps, sizeof *ps);
+ }
}
}
}
PSL_UNLOCK();
+ if( dealing_with_emergency_segment ) {
+ BS_UNLOCK(bs);
+ return KERN_SUCCESS;
+ }
+
/*
* All the segments have been deleted.
* We can remove the backing store.
/*
* Disable lookups of this backing store.
*/
- if((void *)bs->bs_port->alias != NULL)
- kfree((vm_offset_t) bs->bs_port->alias,
- sizeof (struct vstruct_alias));
+ if((void *)bs->bs_port->ip_alias != NULL)
+ kfree((void *) bs->bs_port->ip_alias,
+ sizeof (struct vstruct_alias));
ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port));
bs->bs_port = MACH_PORT_NULL;
BS_UNLOCK(bs);
/*
* Free the backing store structure.
*/
- kfree((vm_offset_t)bs, sizeof *bs);
+ BS_LOCK_DESTROY(bs);
+ kfree(bs, sizeof *bs);
return KERN_SUCCESS;
}
+kern_return_t
+default_pager_backing_store_delete(
+ MACH_PORT_FACE backing_store)
+{
+ if( backing_store != emergency_segment_backing_store ) {
+ default_pager_backing_store_delete_internal(emergency_segment_backing_store);
+ }
+ return(default_pager_backing_store_delete_internal(backing_store));
+}
+
int ps_enter(paging_segment_t); /* forward */
int
PS_LOCK_INIT(ps);
ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
if (!ps->ps_bmap) {
- kfree((vm_offset_t)ps, sizeof *ps);
+ PS_LOCK_DESTROY(ps);
+ kfree(ps, sizeof *ps);
BS_UNLOCK(bs);
return KERN_RESOURCE_SHORTAGE;
}
clrbit(ps->ps_bmap, i);
}
- ps->ps_going_away = FALSE;
+ if(paging_segment_count == 0) {
+ ps->ps_state = PS_EMERGENCY_SEGMENT;
+ if(use_emergency_swap_file_first) {
+ ps->ps_state |= PS_CAN_USE;
+ }
+ } else {
+ ps->ps_state = PS_CAN_USE;
+ }
+
ps->ps_bs = bs;
if ((error = ps_enter(ps)) != 0) {
- kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
- kfree((vm_offset_t)ps, sizeof *ps);
+ kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
+
+ PS_LOCK_DESTROY(ps);
+ kfree(ps, sizeof *ps);
BS_UNLOCK(bs);
return KERN_RESOURCE_SHORTAGE;
}
BS_UNLOCK(bs);
PSL_LOCK();
- dp_pages_free += ps->ps_pgcount;
+ if(IS_PS_OK_TO_USE(ps)) {
+ dp_pages_free += ps->ps_pgcount;
+ } else {
+ dp_pages_reserve += ps->ps_pgcount;
+ }
PSL_UNLOCK();
bs_more_space(ps->ps_clcount);
- DEBUG(DEBUG_BS_INTERNAL,
- ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
- device, offset, count, record_size,
- ps->ps_record_shift, ps->ps_pgnum));
+ DP_DEBUG(DEBUG_BS_INTERNAL,
+ ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
+ device, offset, count, record_size,
+ ps->ps_record_shift, ps->ps_pgnum));
return KERN_SUCCESS;
}
{
struct vs_async *vsa;
MACH_PORT_FACE reply_port;
- kern_return_t kr;
+// kern_return_t kr;
VS_ASYNC_LOCK();
if (vs_async_free_list == NULL) {
kalloc(sizeof (struct vstruct_alias));
if(alias_struct != NULL) {
alias_struct->vs = (struct vstruct *)vsa;
- alias_struct->name = ISVS;
- reply_port->alias = (int) alias_struct;
+ alias_struct->name = &default_pager_ops;
+ reply_port->ip_alias = (uintptr_t) alias_struct;
vsa->reply_port = reply_port;
vs_alloc_async_count++;
}
vs_alloc_async_failed++;
ipc_port_dealloc_kernel((MACH_PORT_FACE)
(reply_port));
- kfree((vm_offset_t)vsa,
- sizeof (struct vs_async));
+ kfree(vsa, sizeof (struct vs_async));
vsa = NULL;
}
}
kalloc(sizeof (struct vstruct_alias));
if(alias_struct != NULL) {
alias_struct->vs = reply_port;
- alias_struct->name = ISVS;
- reply_port->alias = (int) vsa;
+ alias_struct->name = &default_pager_ops;
+ reply_port->defpager_importance.alias = (int) vsa;
vsa->reply_port = reply_port;
vs_alloc_async_count++;
}
vs_alloc_async_failed++;
ipc_port_dealloc_kernel((MACH_PORT_FACE)
(reply_port));
- kfree((vm_offset_t) vsa,
- sizeof (struct vs_async));
+ kfree(vsa, sizeof (struct vs_async));
vsa = NULL;
}
}
kern_return_t kr;
reply_port = vsa->reply_port;
- kfree((vm_offset_t) reply_port->alias, sizeof (struct vstuct_alias));
- kfree((vm_offset_t) vsa, sizeof (struct vs_async));
+ kfree(reply_port->ip_alias, sizeof (struct vstuct_alias));
+ kfree(vsa, sizeof (struct vs_async));
ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port));
#if 0
VS_ASYNC_LOCK();
vstruct_t
ps_vstruct_create(
- vm_size_t size)
+ dp_size_t size)
{
vstruct_t vs;
- int i;
+ unsigned int i;
vs = (vstruct_t) zalloc(vstruct_zone);
if (vs == VSTRUCT_NULL) {
/*
* The following fields will be provided later.
*/
- vs->vs_mem_obj = NULL;
+ vs->vs_pager_ops = NULL;
vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
vs->vs_references = 1;
vs->vs_seqno = 0;
-#ifdef MACH_KERNEL
vs->vs_waiting_seqno = FALSE;
vs->vs_waiting_read = FALSE;
vs->vs_waiting_write = FALSE;
vs->vs_waiting_async = FALSE;
-#else
- mutex_init(&vs->vs_waiting_seqno, ETAP_DPAGE_VSSEQNO);
- mutex_init(&vs->vs_waiting_read, ETAP_DPAGE_VSREAD);
- mutex_init(&vs->vs_waiting_write, ETAP_DPAGE_VSWRITE);
- mutex_init(&vs->vs_waiting_refs, ETAP_DPAGE_VSREFS);
- mutex_init(&vs->vs_waiting_async, ETAP_DPAGE_VSASYNC);
-#endif
vs->vs_readers = 0;
vs->vs_writers = 0;
vs->vs_errors = 0;
vs->vs_clshift = local_log2(bs_get_global_clsize(0));
- vs->vs_size = ((atop(round_page(size)) - 1) >> vs->vs_clshift) + 1;
+ vs->vs_size = ((atop_32(round_page_32(size)) - 1) >> vs->vs_clshift) + 1;
vs->vs_async_pending = 0;
/*
vs->vs_indirect = FALSE;
}
vs->vs_xfer_pending = FALSE;
- DEBUG(DEBUG_VS_INTERNAL,
- ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect));
+ DP_DEBUG(DEBUG_VS_INTERNAL,
+ ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect));
/*
* Check to see that we got the space.
*/
if (!vs->vs_dmap) {
- kfree((vm_offset_t)vs, sizeof *vs);
+ kfree(vs, sizeof *vs);
return VSTRUCT_NULL;
}
return vs;
}
-paging_segment_t ps_select_segment(int, int *); /* forward */
+paging_segment_t ps_select_segment(unsigned int, int *); /* forward */
paging_segment_t
ps_select_segment(
- int shift,
- int *psindex)
+ unsigned int shift,
+ int *psindex)
{
paging_segment_t ps;
int i;
PSL_LOCK();
if (paging_segment_count == 1) {
- paging_segment_t lps; /* used to avoid extra PS_UNLOCK */
+ paging_segment_t lps = PAGING_SEGMENT_NULL; /* used to avoid extra PS_UNLOCK */
ipc_port_t trigger = IP_NULL;
ps = paging_segments[paging_segment_max];
*psindex = paging_segment_max;
PS_LOCK(ps);
- if (ps->ps_going_away) {
- /* this segment is being turned off */
- lps = PAGING_SEGMENT_NULL;
- } else {
- ASSERT(ps->ps_clshift >= shift);
+ if( !IS_PS_EMERGENCY_SEGMENT(ps) ) {
+ panic("Emergency paging segment missing\n");
+ }
+ ASSERT(ps->ps_clshift >= shift);
+ if(IS_PS_OK_TO_USE(ps)) {
if (ps->ps_clcount) {
ps->ps_clcount--;
dp_pages_free -= 1 << ps->ps_clshift;
+ ps->ps_pgcount -= 1 << ps->ps_clshift;
if(min_pages_trigger_port &&
(dp_pages_free < minimum_pages_remaining)) {
trigger = min_pages_trigger_port;
min_pages_trigger_port = NULL;
bs_low = TRUE;
+ backing_store_abort_compaction = TRUE;
}
lps = ps;
- } else
- lps = PAGING_SEGMENT_NULL;
- }
+ }
+ }
PS_UNLOCK(ps);
+
+ if( lps == PAGING_SEGMENT_NULL ) {
+ if(dp_pages_free) {
+ dp_pages_free_drift_count++;
+ if(dp_pages_free > dp_pages_free_drifted_max) {
+ dp_pages_free_drifted_max = dp_pages_free;
+ }
+ dprintf(("Emergency swap segment:dp_pages_free before zeroing out: %d\n",dp_pages_free));
+ }
+ dp_pages_free = 0;
+ }
+
PSL_UNLOCK();
if (trigger != IP_NULL) {
+ dprintf(("ps_select_segment - send HI_WAT_ALERT\n"));
+
default_pager_space_alert(trigger, HI_WAT_ALERT);
ipc_port_release_send(trigger);
}
}
if (paging_segment_count == 0) {
+ if(dp_pages_free) {
+ dp_pages_free_drift_count++;
+ if(dp_pages_free > dp_pages_free_drifted_max) {
+ dp_pages_free_drifted_max = dp_pages_free;
+ }
+ dprintf(("No paging segments:dp_pages_free before zeroing out: %d\n",dp_pages_free));
+ }
+ dp_pages_free = 0;
PSL_UNLOCK();
return PAGING_SEGMENT_NULL;
}
* >= that of the vstruct.
*/
PS_LOCK(ps);
- if (ps->ps_going_away) {
- /* this segment is being turned off */
- } else if ((ps->ps_clcount) &&
- (ps->ps_clshift >= shift)) {
- ipc_port_t trigger = IP_NULL;
-
- ps->ps_clcount--;
- dp_pages_free -= 1 << ps->ps_clshift;
- if(min_pages_trigger_port &&
- (dp_pages_free <
- minimum_pages_remaining)) {
- trigger = min_pages_trigger_port;
- min_pages_trigger_port = NULL;
- }
- PS_UNLOCK(ps);
- /*
- * found one, quit looking.
- */
- ps_select_array[i] = j;
- PSL_UNLOCK();
-
- if (trigger != IP_NULL) {
- default_pager_space_alert(
- trigger,
- HI_WAT_ALERT);
- ipc_port_release_send(trigger);
+ if (IS_PS_OK_TO_USE(ps)) {
+ if ((ps->ps_clcount) &&
+ (ps->ps_clshift >= shift)) {
+ ipc_port_t trigger = IP_NULL;
+
+ ps->ps_clcount--;
+ dp_pages_free -= 1 << ps->ps_clshift;
+ ps->ps_pgcount -= 1 << ps->ps_clshift;
+ if(min_pages_trigger_port &&
+ (dp_pages_free <
+ minimum_pages_remaining)) {
+ trigger = min_pages_trigger_port;
+ min_pages_trigger_port = NULL;
+ bs_low = TRUE;
+ backing_store_abort_compaction = TRUE;
+ }
+ PS_UNLOCK(ps);
+ /*
+ * found one, quit looking.
+ */
+ ps_select_array[i] = j;
+ PSL_UNLOCK();
+
+ if (trigger != IP_NULL) {
+ dprintf(("ps_select_segment - send HI_WAT_ALERT\n"));
+
+ default_pager_space_alert(
+ trigger,
+ HI_WAT_ALERT);
+ ipc_port_release_send(trigger);
+ }
+ *psindex = j;
+ return ps;
}
- *psindex = j;
- return ps;
}
PS_UNLOCK(ps);
}
j++;
}
}
+
+ if(dp_pages_free) {
+ dp_pages_free_drift_count++;
+ if(dp_pages_free > dp_pages_free_drifted_max) {
+ dp_pages_free_drifted_max = dp_pages_free;
+ }
+ dprintf(("%d Paging Segments: dp_pages_free before zeroing out: %d\n",paging_segment_count,dp_pages_free));
+ }
+ dp_pages_free = 0;
PSL_UNLOCK();
return PAGING_SEGMENT_NULL;
}
-vm_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/
+dp_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/
-vm_offset_t
+dp_offset_t
ps_allocate_cluster(
vstruct_t vs,
int *psindex,
paging_segment_t use_ps)
{
- int byte_num;
+ unsigned int byte_num;
int bit_num = 0;
paging_segment_t ps;
- vm_offset_t cluster;
+ dp_offset_t cluster;
ipc_port_t trigger = IP_NULL;
/*
* This and the ordering of the paging segment "going_away" bit setting
* protects us.
*/
+retry:
if (use_ps != PAGING_SEGMENT_NULL) {
ps = use_ps;
PSL_LOCK();
PS_LOCK(ps);
+
+ ASSERT(ps->ps_clcount != 0);
+
ps->ps_clcount--;
dp_pages_free -= 1 << ps->ps_clshift;
+ ps->ps_pgcount -= 1 << ps->ps_clshift;
if(min_pages_trigger_port &&
(dp_pages_free < minimum_pages_remaining)) {
trigger = min_pages_trigger_port;
min_pages_trigger_port = NULL;
+ bs_low = TRUE;
+ backing_store_abort_compaction = TRUE;
}
PSL_UNLOCK();
PS_UNLOCK(ps);
if (trigger != IP_NULL) {
+ dprintf(("ps_allocate_cluster - send HI_WAT_ALERT\n"));
+
default_pager_space_alert(trigger, HI_WAT_ALERT);
ipc_port_release_send(trigger);
}
} else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) ==
PAGING_SEGMENT_NULL) {
-#if 0
- bs_no_paging_space(TRUE);
-#endif
-#if 0
- if (verbose)
-#endif
- dprintf(("no space in available paging segments; "
- "swapon suggested\n"));
- /* the count got off maybe, reset to zero */
+ static clock_sec_t lastnotify = 0;
+ clock_sec_t now;
+ clock_nsec_t nanoseconds_dummy;
+
+ /*
+ * Don't immediately jump to the emergency segment. Give the
+ * dynamic pager a chance to create it's first normal swap file.
+ * Unless, of course the very first normal swap file can't be
+ * created due to some problem and we didn't expect that problem
+ * i.e. use_emergency_swap_file_first was never set to true initially.
+ * It then gets set in the swap file creation error handling.
+ */
+ if(paging_segment_count > 1 || use_emergency_swap_file_first == TRUE) {
+
+ ps = paging_segments[EMERGENCY_PSEG_INDEX];
+ if(IS_PS_EMERGENCY_SEGMENT(ps) && !IS_PS_GOING_AWAY(ps)) {
+ PSL_LOCK();
+ PS_LOCK(ps);
+
+ if(IS_PS_GOING_AWAY(ps)) {
+ /* Someone de-activated the emergency paging segment*/
+ PS_UNLOCK(ps);
+ PSL_UNLOCK();
+
+ } else if(dp_pages_free) {
+ /*
+ * Someone has already activated the emergency paging segment
+ * OR
+ * Between us having rec'd a NULL segment from ps_select_segment
+ * and reaching here a new normal segment could have been added.
+ * E.g. we get NULL segment and another thread just added the
+ * new swap file. Hence check to see if we have more dp_pages_free
+ * before activating the emergency segment.
+ */
+ PS_UNLOCK(ps);
+ PSL_UNLOCK();
+ goto retry;
+
+ } else if(!IS_PS_OK_TO_USE(ps) && ps->ps_clcount) {
+ /*
+ * PS_CAN_USE is only reset from the emergency segment when it's
+ * been successfully recovered. So it's legal to have an emergency
+ * segment that has PS_CAN_USE but no clusters because it's recovery
+ * failed.
+ */
+ backing_store_t bs = ps->ps_bs;
+ ps->ps_state |= PS_CAN_USE;
+ if(ps_select_array[bs->bs_priority] == BS_FULLPRI ||
+ ps_select_array[bs->bs_priority] == BS_NOPRI) {
+ ps_select_array[bs->bs_priority] = 0;
+ }
+ dp_pages_free += ps->ps_pgcount;
+ dp_pages_reserve -= ps->ps_pgcount;
+ PS_UNLOCK(ps);
+ PSL_UNLOCK();
+ dprintf(("Switching ON Emergency paging segment\n"));
+ goto retry;
+ }
+
+ PS_UNLOCK(ps);
+ PSL_UNLOCK();
+ }
+ }
+
+ /*
+ * Emit a notification of the low-paging resource condition
+ * but don't issue it more than once every five seconds. This
+ * prevents us from overflowing logs with thousands of
+ * repetitions of the message.
+ */
+ clock_get_system_nanotime(&now, &nanoseconds_dummy);
+ if (paging_segment_count > 1 && (now > lastnotify + 5)) {
+ /* With an activated emergency paging segment we still
+ * didn't get any clusters. This could mean that the
+ * emergency paging segment is exhausted.
+ */
+ dprintf(("System is out of paging space.\n"));
+ lastnotify = now;
+ }
+
PSL_LOCK();
- dp_pages_free = 0;
+
if(min_pages_trigger_port) {
trigger = min_pages_trigger_port;
min_pages_trigger_port = NULL;
bs_low = TRUE;
+ backing_store_abort_compaction = TRUE;
}
PSL_UNLOCK();
if (trigger != IP_NULL) {
+ dprintf(("ps_allocate_cluster - send HI_WAT_ALERT\n"));
+
default_pager_space_alert(trigger, HI_WAT_ALERT);
ipc_port_release_send(trigger);
}
- return (vm_offset_t) -1;
+ return (dp_offset_t) -1;
}
- ASSERT(ps->ps_clcount != 0);
/*
* Look for an available cluster. At the end of the loop,
return cluster;
}
-void ps_deallocate_cluster(paging_segment_t, vm_offset_t); /* forward */
+void ps_deallocate_cluster(paging_segment_t, dp_offset_t); /* forward */
void
ps_deallocate_cluster(
paging_segment_t ps,
- vm_offset_t cluster)
+ dp_offset_t cluster)
{
- ipc_port_t trigger = IP_NULL;
- if (cluster >= (vm_offset_t) ps->ps_ncls)
+ if (cluster >= ps->ps_ncls)
panic("ps_deallocate_cluster: Invalid cluster number");
/*
PSL_LOCK();
PS_LOCK(ps);
clrbit(ps->ps_bmap, cluster);
- ++ps->ps_clcount;
- dp_pages_free += 1 << ps->ps_clshift;
- if(max_pages_trigger_port
- && (backing_store_release_trigger_disable == 0)
- && (dp_pages_free > maximum_pages_free)) {
- trigger = max_pages_trigger_port;
- max_pages_trigger_port = NULL;
+ if( IS_PS_OK_TO_USE(ps)) {
+ ++ps->ps_clcount;
+ ps->ps_pgcount += 1 << ps->ps_clshift;
+ dp_pages_free += 1 << ps->ps_clshift;
+ } else {
+ ps->ps_special_clusters += 1;
}
- PSL_UNLOCK();
/*
* Move the hint down to the freed cluster if it is
ps->ps_hint = (cluster/NBBY);
}
- PS_UNLOCK(ps);
/*
* If we're freeing space on a full priority, reset the array.
*/
- PSL_LOCK();
- if (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI)
+ if ( IS_PS_OK_TO_USE(ps) && ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI)
ps_select_array[ps->ps_bs->bs_priority] = 0;
+ PS_UNLOCK(ps);
PSL_UNLOCK();
- if (trigger != IP_NULL) {
- VSL_LOCK();
- if(backing_store_release_trigger_disable != 0) {
- assert_wait((event_t)
- &backing_store_release_trigger_disable,
- THREAD_UNINT);
- VSL_UNLOCK();
- thread_block((void (*)(void)) 0);
- } else {
- VSL_UNLOCK();
- }
- default_pager_space_alert(trigger, LO_WAT_ALERT);
- ipc_port_release_send(trigger);
- }
-
return;
}
-void ps_dealloc_vsmap(struct vs_map *, vm_size_t); /* forward */
+void ps_dealloc_vsmap(struct vs_map *, dp_size_t); /* forward */
void
ps_dealloc_vsmap(
struct vs_map *vsmap,
- vm_size_t size)
+ dp_size_t size)
{
- int i;
- for (i = 0; i < size; i++)
- if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i]))
+ unsigned int i;
+ struct ps_vnode_trim_data trim_data;
+
+ ps_vnode_trim_init(&trim_data);
+
+ for (i = 0; i < size; i++) {
+ if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i])) {
+ ps_vnode_trim_more(&trim_data,
+ &vsmap[i],
+ VSM_PS(vsmap[i])->ps_clshift,
+ vm_page_size << VSM_PS(vsmap[i])->ps_clshift);
ps_deallocate_cluster(VSM_PS(vsmap[i]),
VSM_CLOFF(vsmap[i]));
+ } else {
+ ps_vnode_trim_now(&trim_data);
+ }
+ }
+ ps_vnode_trim_now(&trim_data);
}
void
ps_vstruct_dealloc(
vstruct_t vs)
{
- int i;
- spl_t s;
+ unsigned int i;
+// spl_t s;
VS_MAP_LOCK(vs);
for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
if (vs->vs_imap[i] != NULL) {
ps_dealloc_vsmap(vs->vs_imap[i], CLMAP_ENTRIES);
- kfree((vm_offset_t)vs->vs_imap[i],
- CLMAP_THRESHOLD);
+ kfree(vs->vs_imap[i], CLMAP_THRESHOLD);
}
}
- kfree((vm_offset_t)vs->vs_imap,
- INDIRECT_CLMAP_SIZE(vs->vs_size));
+ kfree(vs->vs_imap, INDIRECT_CLMAP_SIZE(vs->vs_size));
} else {
/*
* Direct map. Free used clusters, then memory.
*/
ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size);
- kfree((vm_offset_t)vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
+ kfree(vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
}
VS_MAP_UNLOCK(vs);
bs_commit(- vs->vs_size);
- zfree(vstruct_zone, (vm_offset_t)vs);
+ VS_MAP_LOCK_DESTROY(vs);
+
+ zfree(vstruct_zone, vs);
+}
+
+kern_return_t
+ps_vstruct_reclaim(
+ vstruct_t vs,
+ boolean_t return_to_vm,
+ boolean_t reclaim_backing_store)
+{
+ unsigned int i, j;
+ struct vs_map *vsmap;
+ boolean_t vsmap_all_clear, vsimap_all_clear;
+ struct vm_object_fault_info fault_info;
+ int clmap_off;
+ unsigned int vsmap_size;
+ kern_return_t kr = KERN_SUCCESS;
+
+ VS_MAP_LOCK(vs);
+
+ fault_info.cluster_size = VM_SUPER_CLUSTER;
+ fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
+ fault_info.user_tag = 0;
+ fault_info.lo_offset = 0;
+ fault_info.hi_offset = ptoa_32(vs->vs_size << vs->vs_clshift);
+ fault_info.io_sync = reclaim_backing_store;
+ fault_info.batch_pmap_op = FALSE;
+
+ /*
+ * If this is an indirect structure, then we walk through the valid
+ * (non-zero) indirect pointers and deallocate the clusters
+ * associated with each used map entry (via ps_dealloc_vsmap).
+ * When all of the clusters in an indirect block have been
+ * freed, we deallocate the block. When all of the indirect
+ * blocks have been deallocated we deallocate the memory
+ * holding the indirect pointers.
+ */
+ if (vs->vs_indirect) {
+ vsimap_all_clear = TRUE;
+ for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
+ vsmap = vs->vs_imap[i];
+ if (vsmap == NULL)
+ continue;
+ /* loop on clusters in this indirect map */
+ clmap_off = (vm_page_size * CLMAP_ENTRIES *
+ VSCLSIZE(vs) * i);
+ if (i+1 == INDIRECT_CLMAP_ENTRIES(vs->vs_size))
+ vsmap_size = vs->vs_size - (CLMAP_ENTRIES * i);
+ else
+ vsmap_size = CLMAP_ENTRIES;
+ vsmap_all_clear = TRUE;
+ if (return_to_vm) {
+ for (j = 0; j < vsmap_size;) {
+ if (VSM_ISCLR(vsmap[j]) ||
+ VSM_ISERR(vsmap[j])) {
+ j++;
+ clmap_off += vm_page_size * VSCLSIZE(vs);
+ continue;
+ }
+ VS_MAP_UNLOCK(vs);
+ kr = pvs_cluster_read(
+ vs,
+ clmap_off,
+ (dp_size_t) -1, /* read whole cluster */
+ &fault_info);
+
+ VS_MAP_LOCK(vs); /* XXX what if it changed ? */
+ if (kr != KERN_SUCCESS) {
+ vsmap_all_clear = FALSE;
+ vsimap_all_clear = FALSE;
+
+ kr = KERN_MEMORY_ERROR;
+ goto out;
+ }
+ }
+ }
+ if (vsmap_all_clear) {
+ ps_dealloc_vsmap(vsmap, CLMAP_ENTRIES);
+ kfree(vsmap, CLMAP_THRESHOLD);
+ vs->vs_imap[i] = NULL;
+ }
+ }
+ if (vsimap_all_clear) {
+// kfree(vs->vs_imap, INDIRECT_CLMAP_SIZE(vs->vs_size));
+ }
+ } else {
+ /*
+ * Direct map. Free used clusters, then memory.
+ */
+ vsmap = vs->vs_dmap;
+ if (vsmap == NULL) {
+ goto out;
+ }
+ vsmap_all_clear = TRUE;
+ /* loop on clusters in the direct map */
+ if (return_to_vm) {
+ for (j = 0; j < vs->vs_size;) {
+ if (VSM_ISCLR(vsmap[j]) ||
+ VSM_ISERR(vsmap[j])) {
+ j++;
+ continue;
+ }
+ clmap_off = vm_page_size * (j << vs->vs_clshift);
+ VS_MAP_UNLOCK(vs);
+ kr = pvs_cluster_read(
+ vs,
+ clmap_off,
+ (dp_size_t) -1, /* read whole cluster */
+ &fault_info);
+
+ VS_MAP_LOCK(vs); /* XXX what if it changed ? */
+ if (kr != KERN_SUCCESS) {
+ vsmap_all_clear = FALSE;
+
+ kr = KERN_MEMORY_ERROR;
+ goto out;
+ } else {
+// VSM_CLR(vsmap[j]);
+ }
+ }
+ }
+ if (vsmap_all_clear) {
+ ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size);
+// kfree(vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
+ }
+ }
+out:
+ VS_MAP_UNLOCK(vs);
+
+ return kr;
}
-int ps_map_extend(vstruct_t, int); /* forward */
+int ps_map_extend(vstruct_t, unsigned int); /* forward */
int ps_map_extend(
vstruct_t vs,
- int new_size)
+ unsigned int new_size)
{
struct vs_map **new_imap;
struct vs_map *new_dmap = NULL;
/* Allocate an indirect page */
if ((new_imap[0] = (struct vs_map *)
kalloc(CLMAP_THRESHOLD)) == NULL) {
- kfree((vm_offset_t)new_imap, new_map_size);
+ kfree(new_imap, new_map_size);
return -1;
}
new_dmap = new_imap[0];
bs_commit(new_size - vs->vs_size);
vs->vs_size = new_size;
if (old_map)
- kfree((vm_offset_t)old_map, old_map_size);
+ kfree(old_map, old_map_size);
return 0;
}
-vm_offset_t
+dp_offset_t
ps_clmap(
vstruct_t vs,
- vm_offset_t offset,
+ dp_offset_t offset,
struct clmap *clmap,
int flag,
- vm_size_t size,
+ dp_size_t size,
int error)
{
- vm_offset_t cluster; /* The cluster of offset. */
- vm_offset_t newcl; /* The new cluster allocated. */
- vm_offset_t newoff;
- int i;
+ dp_offset_t cluster; /* The cluster of offset. */
+ dp_offset_t newcl; /* The new cluster allocated. */
+ dp_offset_t newoff;
+ unsigned int i;
struct vs_map *vsmap;
VS_MAP_LOCK(vs);
ASSERT(vs->vs_dmap);
- cluster = atop(offset) >> vs->vs_clshift;
+ cluster = atop_32(offset) >> vs->vs_clshift;
/*
* Initialize cluster error value
if (flag == CL_FIND) {
/* Do not allocate if just doing a lookup */
VS_MAP_UNLOCK(vs);
- return (vm_offset_t) -1;
+ return (dp_offset_t) -1;
}
if (ps_map_extend(vs, cluster + 1)) {
VS_MAP_UNLOCK(vs);
- return (vm_offset_t) -1;
+ return (dp_offset_t) -1;
}
}
if (vsmap == NULL) {
if (flag == CL_FIND) {
VS_MAP_UNLOCK(vs);
- return (vm_offset_t) -1;
+ return (dp_offset_t) -1;
}
/* Allocate the indirect block */
vsmap = (struct vs_map *) kalloc(CLMAP_THRESHOLD);
if (vsmap == NULL) {
VS_MAP_UNLOCK(vs);
- return (vm_offset_t) -1;
+ return (dp_offset_t) -1;
}
/* Initialize the cluster offsets */
for (i = 0; i < CLMAP_ENTRIES; i++)
if (VSM_ISERR(*vsmap)) {
clmap->cl_error = VSM_GETERR(*vsmap);
VS_MAP_UNLOCK(vs);
- return (vm_offset_t) -1;
+ return (dp_offset_t) -1;
} else if (VSM_ISCLR(*vsmap)) {
int psindex;
VSM_SETERR(*vsmap, error);
}
VS_MAP_UNLOCK(vs);
- return (vm_offset_t) -1;
+ return (dp_offset_t) -1;
} else {
/*
* Attempt to allocate a cluster from the paging segment
*/
newcl = ps_allocate_cluster(vs, &psindex,
PAGING_SEGMENT_NULL);
- if (newcl == -1) {
+ if (newcl == (dp_offset_t) -1) {
VS_MAP_UNLOCK(vs);
- return (vm_offset_t) -1;
+ return (dp_offset_t) -1;
}
VSM_CLR(*vsmap);
VSM_SETCLOFF(*vsmap, newcl);
* relatively quick.
*/
ASSERT(trunc_page(offset) == offset);
- newcl = ptoa(newcl) << vs->vs_clshift;
+ newcl = ptoa_32(newcl) << vs->vs_clshift;
newoff = offset & ((1<<(vm_page_shift + vs->vs_clshift)) - 1);
if (flag == CL_ALLOC) {
/*
* set bits in the allocation bitmap according to which
* pages were requested. size is in bytes.
*/
- i = atop(newoff);
+ i = atop_32(newoff);
while ((size > 0) && (i < VSCLSIZE(vs))) {
VSM_SETALLOC(*vsmap, i);
i++;
* Offset is not cluster aligned, so number of pages
* and bitmaps must be adjusted
*/
- clmap->cl_numpages -= atop(newoff);
+ clmap->cl_numpages -= atop_32(newoff);
CLMAP_SHIFT(clmap, vs);
CLMAP_SHIFTALLOC(clmap, vs);
}
* entire cluster is in error.
*/
if (size && flag == CL_FIND) {
- vm_offset_t off = (vm_offset_t) 0;
+ dp_offset_t off = (dp_offset_t) 0;
if (!error) {
for (i = VSCLSIZE(vs) - clmap->cl_numpages; size > 0;
} else {
BS_STAT(clmap->cl_ps->ps_bs,
clmap->cl_ps->ps_bs->bs_pages_out_fail +=
- atop(size));
+ atop_32(size));
off = VSM_CLOFF(*vsmap);
VSM_SETERR(*vsmap, error);
}
* Deallocate cluster if error, and no valid pages
* already present.
*/
- if (off != (vm_offset_t) 0)
+ if (off != (dp_offset_t) 0)
ps_deallocate_cluster(clmap->cl_ps, off);
VS_MAP_UNLOCK(vs);
- return (vm_offset_t) 0;
+ return (dp_offset_t) 0;
} else
VS_MAP_UNLOCK(vs);
- DEBUG(DEBUG_VS_INTERNAL,
- ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
- newcl+newoff, (int) vs, (int) vsmap, flag));
- DEBUG(DEBUG_VS_INTERNAL,
- (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
- (int) clmap->cl_ps, clmap->cl_numpages,
- (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map));
+ DP_DEBUG(DEBUG_VS_INTERNAL,
+ ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
+ newcl+newoff, (int) vs, (int) vsmap, flag));
+ DP_DEBUG(DEBUG_VS_INTERNAL,
+ (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
+ (int) clmap->cl_ps, clmap->cl_numpages,
+ (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map));
return (newcl + newoff);
}
-void ps_clunmap(vstruct_t, vm_offset_t, vm_size_t); /* forward */
+void ps_clunmap(vstruct_t, dp_offset_t, dp_size_t); /* forward */
void
ps_clunmap(
vstruct_t vs,
- vm_offset_t offset,
- vm_size_t length)
+ dp_offset_t offset,
+ dp_size_t length)
{
- vm_offset_t cluster; /* The cluster number of offset */
+ dp_offset_t cluster; /* The cluster number of offset */
struct vs_map *vsmap;
+ struct ps_vnode_trim_data trim_data;
+
+ ps_vnode_trim_init(&trim_data);
VS_MAP_LOCK(vs);
* clusters and map entries as encountered.
*/
while (length > 0) {
- vm_offset_t newoff;
- int i;
+ dp_offset_t newoff;
+ unsigned int i;
- cluster = atop(offset) >> vs->vs_clshift;
+ cluster = atop_32(offset) >> vs->vs_clshift;
if (vs->vs_indirect) /* indirect map */
vsmap = vs->vs_imap[cluster/CLMAP_ENTRIES];
else
vsmap = vs->vs_dmap;
if (vsmap == NULL) {
+ ps_vnode_trim_now(&trim_data);
VS_MAP_UNLOCK(vs);
return;
}
vsmap += cluster%CLMAP_ENTRIES;
if (VSM_ISCLR(*vsmap)) {
+ ps_vnode_trim_now(&trim_data);
length -= vm_page_size;
offset += vm_page_size;
continue;
* paging segment cluster pages.
* Optimize for entire cluster cleraing.
*/
- if (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) {
+ if ( (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) ) {
/*
* Not cluster aligned.
*/
ASSERT(trunc_page(newoff) == newoff);
- i = atop(newoff);
+ i = atop_32(newoff);
} else
i = 0;
while ((i < VSCLSIZE(vs)) && (length > 0)) {
/*
* If map entry is empty, clear and deallocate cluster.
*/
- if (!VSM_ALLOC(*vsmap)) {
+ if (!VSM_BMAP(*vsmap)) {
+ ps_vnode_trim_more(&trim_data,
+ vsmap,
+ vs->vs_clshift,
+ VSCLSIZE(vs) * vm_page_size);
ps_deallocate_cluster(VSM_PS(*vsmap),
VSM_CLOFF(*vsmap));
VSM_CLR(*vsmap);
+ } else {
+ ps_vnode_trim_now(&trim_data);
}
}
+ ps_vnode_trim_now(&trim_data);
VS_MAP_UNLOCK(vs);
}
-void ps_vs_write_complete(vstruct_t, vm_offset_t, vm_size_t, int); /* forward */
+void ps_vs_write_complete(vstruct_t, dp_offset_t, dp_size_t, int); /* forward */
void
ps_vs_write_complete(
vstruct_t vs,
- vm_offset_t offset,
- vm_size_t size,
+ dp_offset_t offset,
+ dp_size_t size,
int error)
{
struct clmap clmap;
(void) ps_clmap(vs, offset, &clmap, CL_FIND, size, error);
}
-void vs_cl_write_complete(vstruct_t, paging_segment_t, vm_offset_t, vm_offset_t, vm_size_t, boolean_t, int); /* forward */
+void vs_cl_write_complete(vstruct_t, paging_segment_t, dp_offset_t, vm_offset_t, dp_size_t, boolean_t, int); /* forward */
void
vs_cl_write_complete(
- vstruct_t vs,
- paging_segment_t ps,
- vm_offset_t offset,
- vm_offset_t addr,
- vm_size_t size,
- boolean_t async,
- int error)
+ vstruct_t vs,
+ __unused paging_segment_t ps,
+ dp_offset_t offset,
+ __unused vm_offset_t addr,
+ dp_size_t size,
+ boolean_t async,
+ int error)
{
- kern_return_t kr;
+// kern_return_t kr;
if (error) {
/*
dprintf(("write failed error = 0x%x\n", error));
/* add upl_abort code here */
} else
- GSTAT(global_stats.gs_pages_out += atop(size));
+ GSTAT(global_stats.gs_pages_out += atop_32(size));
/*
* Notify the vstruct mapping code, so it can do its accounting.
*/
if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
vs->vs_waiting_async = FALSE;
VS_UNLOCK(vs);
- /* mutex_unlock(&vs->vs_waiting_async); */
thread_wakeup(&vs->vs_async_pending);
} else {
VS_UNLOCK(vs);
struct vs_async *vsa;
vsa = (struct vs_async *)
- ((struct vstruct_alias *)(reply_port->alias))->vs;
+ ((struct vstruct_alias *)(reply_port->ip_alias))->vs;
if (device_code == KERN_SUCCESS && bytes_written != vsa->vsa_size) {
device_code = KERN_FAILURE;
if(vsa->vsa_error) {
/* need to consider error condition. re-write data or */
/* throw it away here. */
- vm_offset_t ioaddr;
- if(vm_map_copyout(kernel_map, &ioaddr,
- (vm_map_copy_t)vsa->vsa_addr) != KERN_SUCCESS)
- panic("vs_cluster_write: unable to copy source list\n");
- vm_deallocate(kernel_map, ioaddr, vsa->vsa_size);
+ vm_map_copy_discard((vm_map_copy_t)vsa->vsa_addr);
}
ps_vs_write_complete(vsa->vsa_vs, vsa->vsa_offset,
vsa->vsa_size, vsa->vsa_error);
{
struct vs_async *vsa;
vsa = (struct vs_async *)
- ((struct vstruct_alias *)(reply_port->alias))->vs;
+ ((struct vstruct_alias *)(reply_port->defpager_importance.alias))->vs;
vsa->vsa_addr = (vm_offset_t)data;
vsa->vsa_size = (vm_size_t)dataCnt;
vsa->vsa_error = return_code;
- thread_wakeup(&vsa->vsa_lock);
+ thread_wakeup(&vsa);
return KERN_SUCCESS;
}
return KERN_SUCCESS;
}
-kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */
-
kern_return_t
ps_read_device(
paging_segment_t ps,
- vm_offset_t offset,
+ dp_offset_t offset,
vm_offset_t *bufferp,
unsigned int size,
unsigned int *residualp,
vm_offset_t buf_ptr;
unsigned int records_read;
struct vs_async *vsa;
- mutex_t vs_waiting_read_reply;
device_t device;
vm_map_copy_t device_data = NULL;
default_pager_thread_t *dpt = NULL;
device = dev_port_lookup(ps->ps_device);
- clustered_reads[atop(size)]++;
+ clustered_reads[atop_32(size)]++;
dev_offset = (ps->ps_offset +
(offset >> (vm_page_shift - ps->ps_record_shift)));
vsa->vsa_size = 0;
vsa->vsa_ps = NULL;
}
- mutex_init(&vsa->vsa_lock, ETAP_DPAGE_VSSEQNO);
ip_lock(vsa->reply_port);
vsa->reply_port->ip_sorights++;
ip_reference(vsa->reply_port);
(io_buf_ptr_t *) &dev_buffer,
(mach_msg_type_number_t *) &bytes_read);
if(kr == MIG_NO_REPLY) {
- assert_wait(&vsa->vsa_lock, THREAD_UNINT);
- thread_block((void(*)(void))0);
+ assert_wait(&vsa, THREAD_UNINT);
+ thread_block(THREAD_CONTINUE_NULL);
dev_buffer = vsa->vsa_addr;
bytes_read = (unsigned int)vsa->vsa_size;
records_read = (bytes_read >>
(vm_page_shift - ps->ps_record_shift));
dev_offset += records_read;
- DEBUG(DEBUG_VS_INTERNAL,
- ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
- dev_buffer, bytes_read));
+ DP_DEBUG(DEBUG_VS_INTERNAL,
+ ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
+ dev_buffer, bytes_read));
if (vm_deallocate(kernel_map, dev_buffer, bytes_read)
!= KERN_SUCCESS)
Panic("dealloc buf");
*residualp = size - total_read;
if((dev_buffer != *bufferp) && (total_read != 0)) {
vm_offset_t temp_buffer;
- vm_allocate(kernel_map, &temp_buffer, total_read, TRUE);
+ vm_allocate(kernel_map, &temp_buffer, total_read, VM_FLAGS_ANYWHERE);
memcpy((void *) temp_buffer, (void *) *bufferp, total_read);
if(vm_map_copyin_page_list(kernel_map, temp_buffer, total_read,
VM_MAP_COPYIN_OPT_SRC_DESTROY |
return KERN_SUCCESS;
}
-kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */
-
kern_return_t
ps_write_device(
paging_segment_t ps,
- vm_offset_t offset,
+ dp_offset_t offset,
vm_offset_t addr,
unsigned int size,
struct vs_async *vsa)
- clustered_writes[atop(size)]++;
+ clustered_writes[atop_32(size)]++;
dev_offset = (ps->ps_offset +
(offset >> (vm_page_shift - ps->ps_record_shift)));
"device_write_request returned ",
kr, addr, size, offset));
BS_STAT(ps->ps_bs,
- ps->ps_bs->bs_pages_out_fail += atop(size));
+ ps->ps_bs->bs_pages_out_fail += atop_32(size));
/* do the completion notification to free resources */
device_write_reply(reply_port, kr, 0);
return PAGER_ERROR;
"device_write returned ",
kr, addr, size, offset));
BS_STAT(ps->ps_bs,
- ps->ps_bs->bs_pages_out_fail += atop(size));
+ ps->ps_bs->bs_pages_out_fail += atop_32(size));
return PAGER_ERROR;
}
if (bytes_written & ((vm_page_size >> ps->ps_record_shift) - 1))
kern_return_t
ps_read_device(
- paging_segment_t ps,
- vm_offset_t offset,
- vm_offset_t *bufferp,
- unsigned int size,
- unsigned int *residualp,
- int flags)
+ __unused paging_segment_t ps,
+ __unused dp_offset_t offset,
+ __unused vm_offset_t *bufferp,
+ __unused unsigned int size,
+ __unused unsigned int *residualp,
+ __unused int flags)
{
panic("ps_read_device not supported");
+ return KERN_FAILURE;
}
+kern_return_t
ps_write_device(
- paging_segment_t ps,
- vm_offset_t offset,
- vm_offset_t addr,
- unsigned int size,
- struct vs_async *vsa)
+ __unused paging_segment_t ps,
+ __unused dp_offset_t offset,
+ __unused vm_offset_t addr,
+ __unused unsigned int size,
+ __unused struct vs_async *vsa)
{
panic("ps_write_device not supported");
+ return KERN_FAILURE;
}
#endif /* DEVICE_PAGING */
-void pvs_object_data_provided(vstruct_t, upl_t, vm_offset_t, vm_size_t); /* forward */
+void pvs_object_data_provided(vstruct_t, upl_t, upl_offset_t, upl_size_t); /* forward */
void
pvs_object_data_provided(
- vstruct_t vs,
- upl_t upl,
- vm_offset_t offset,
- vm_size_t size)
+ __unused vstruct_t vs,
+ __unused upl_t upl,
+ __unused upl_offset_t offset,
+ upl_size_t size)
{
+#if RECLAIM_SWAP
+ boolean_t empty;
+#endif
- DEBUG(DEBUG_VS_INTERNAL,
- ("buffer=0x%x,offset=0x%x,size=0x%x\n",
- upl, offset, size));
+ DP_DEBUG(DEBUG_VS_INTERNAL,
+ ("buffer=0x%x,offset=0x%x,size=0x%x\n",
+ upl, offset, size));
ASSERT(size > 0);
- GSTAT(global_stats.gs_pages_in += atop(size));
-
-
-#if USE_PRECIOUS
- ps_clunmap(vs, offset, size);
-#endif /* USE_PRECIOUS */
+ GSTAT(global_stats.gs_pages_in += atop_32(size));
+
+/* check upl iosync flag instead of using RECLAIM_SWAP*/
+#if RECLAIM_SWAP
+ if (size != upl->size) {
+ if (size) {
+ ps_clunmap(vs, offset, size);
+ upl_commit_range(upl, 0, size, 0, NULL, 0, &empty);
+ }
+ upl_abort(upl, UPL_ABORT_ERROR);
+ upl_deallocate(upl);
+ } else {
+ ps_clunmap(vs, offset, size);
+ upl_commit(upl, NULL, 0);
+ upl_deallocate(upl);
+ }
+#endif /* RECLAIM_SWAP */
}
+static memory_object_offset_t last_start;
+static vm_size_t last_length;
+
+/*
+ * A "cnt" of 0 means that the caller just wants to check if the page at
+ * offset "vs_offset" exists in the backing store. That page hasn't been
+ * prepared, so no need to release it.
+ *
+ * A "cnt" of -1 means that the caller wants to bring back from the backing
+ * store all existing pages in the cluster containing "vs_offset".
+ */
kern_return_t
pvs_cluster_read(
vstruct_t vs,
- vm_offset_t vs_offset,
- vm_size_t cnt)
+ dp_offset_t vs_offset,
+ dp_size_t cnt,
+ void *fault_info)
{
- upl_t upl;
kern_return_t error = KERN_SUCCESS;
- int size;
+ unsigned int size;
unsigned int residual;
unsigned int request_flags;
- int seg_index;
- int pages_in_cl;
+ int io_flags = 0;
+ int seg_index;
+ int pages_in_cl;
int cl_size;
int cl_mask;
- int cl_index;
- int xfer_size;
- vm_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
- paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
+ int cl_index;
+ unsigned int xfer_size;
+ dp_offset_t orig_vs_offset;
+ dp_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT];
+ paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT];
struct clmap clmap;
+ upl_t upl;
+ unsigned int page_list_count;
+ memory_object_offset_t cluster_start;
+ vm_size_t cluster_length;
+ uint32_t io_streaming;
+ int i;
+ boolean_t io_sync = FALSE;
+ boolean_t reclaim_all = FALSE;
pages_in_cl = 1 << vs->vs_clshift;
cl_size = pages_in_cl * vm_page_size;
cl_mask = cl_size - 1;
+ request_flags = UPL_NO_SYNC | UPL_RET_ONLY_ABSENT | UPL_SET_LITE;
+
+ if (cnt == (dp_size_t) -1)
+ reclaim_all = TRUE;
+
+ if (reclaim_all == TRUE) {
+ /*
+ * We've been called from ps_vstruct_reclaim() to move all
+ * the object's swapped pages back to VM pages.
+ * This can put memory pressure on the system, so we do want
+ * to wait for free pages, to avoid getting in the way of the
+ * vm_pageout_scan() thread.
+ * Let's not use UPL_NOBLOCK in this case.
+ */
+ vs_offset &= ~cl_mask;
+ i = pages_in_cl;
+ } else {
+ i = 1;
+
+ /*
+ * if the I/O cluster size == PAGE_SIZE, we don't want to set
+ * the UPL_NOBLOCK since we may be trying to recover from a
+ * previous partial pagein I/O that occurred because we were low
+ * on memory and bailed early in order to honor the UPL_NOBLOCK...
+ * since we're only asking for a single page, we can block w/o fear
+ * of tying up pages while waiting for more to become available
+ */
+ if (fault_info == NULL || ((vm_object_fault_info_t)fault_info)->cluster_size > PAGE_SIZE)
+ request_flags |= UPL_NOBLOCK;
+ }
+
+again:
+ cl_index = (vs_offset & cl_mask) / vm_page_size;
+
+ if ((ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0) == (dp_offset_t)-1) ||
+ !CLMAP_ISSET(clmap, cl_index)) {
+ /*
+ * the needed page doesn't exist in the backing store...
+ * we don't want to try to do any I/O, just abort the
+ * page and let the fault handler provide a zero-fill
+ */
+ if (cnt == 0) {
+ /*
+ * The caller was just poking at us to see if
+ * the page has been paged out. No need to
+ * mess with the page at all.
+ * Just let the caller know we don't have that page.
+ */
+ return KERN_FAILURE;
+ }
+ if (reclaim_all == TRUE) {
+ i--;
+ if (i == 0) {
+ /* no more pages in this cluster */
+ return KERN_FAILURE;
+ }
+ /* try the next page in this cluster */
+ vs_offset += vm_page_size;
+ goto again;
+ }
+
+ page_list_count = 0;
+
+ memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)vs_offset,
+ PAGE_SIZE, PAGE_SIZE,
+ &upl, NULL, &page_list_count,
+ request_flags | UPL_SET_INTERNAL);
+ upl_range_needed(upl, 0, 1);
+
+ if (clmap.cl_error)
+ upl_abort(upl, UPL_ABORT_ERROR);
+ else
+ upl_abort(upl, UPL_ABORT_UNAVAILABLE);
+ upl_deallocate(upl);
+
+ return KERN_SUCCESS;
+ }
+
+ if (cnt == 0) {
+ /*
+ * The caller was just poking at us to see if
+ * the page has been paged out. No need to
+ * mess with the page at all.
+ * Just let the caller know we do have that page.
+ */
+ return KERN_SUCCESS;
+ }
+
+ if(((vm_object_fault_info_t)fault_info)->io_sync == TRUE ) {
+ io_sync = TRUE;
+ } else {
+#if RECLAIM_SWAP
+ io_sync = TRUE;
+#endif /* RECLAIM_SWAP */
+ }
+
+ if( io_sync == TRUE ) {
+
+ io_flags |= UPL_IOSYNC | UPL_NOCOMMIT;
+#if USE_PRECIOUS
+ request_flags |= UPL_PRECIOUS | UPL_CLEAN_IN_PLACE;
+#else /* USE_PRECIOUS */
+ request_flags |= UPL_REQUEST_SET_DIRTY;
+#endif /* USE_PRECIOUS */
+ }
+
+ assert(dp_encryption_inited);
+ if (dp_encryption) {
+ /*
+ * ENCRYPTED SWAP:
+ * request that the UPL be prepared for
+ * decryption.
+ */
+ request_flags |= UPL_ENCRYPT;
+ io_flags |= UPL_PAGING_ENCRYPTED;
+ }
+ orig_vs_offset = vs_offset;
+
+ assert(cnt != 0);
+ cnt = VM_SUPER_CLUSTER;
+ cluster_start = (memory_object_offset_t) vs_offset;
+ cluster_length = (vm_size_t) cnt;
+ io_streaming = 0;
+
+ /*
+ * determine how big a speculative I/O we should try for...
+ */
+ if (memory_object_cluster_size(vs->vs_control, &cluster_start, &cluster_length, &io_streaming, (memory_object_fault_info_t)fault_info) == KERN_SUCCESS) {
+ assert(vs_offset >= (dp_offset_t) cluster_start &&
+ vs_offset < (dp_offset_t) (cluster_start + cluster_length));
+ vs_offset = (dp_offset_t) cluster_start;
+ cnt = (dp_size_t) cluster_length;
+ } else {
+ cluster_length = PAGE_SIZE;
+ cnt = PAGE_SIZE;
+ }
+
+ if (io_streaming)
+ io_flags |= UPL_IOSTREAMING;
+
+ last_start = cluster_start;
+ last_length = cluster_length;
+
/*
* This loop will be executed multiple times until the entire
- * request has been satisfied... if the request spans cluster
+ * range has been looked at or we issue an I/O... if the request spans cluster
* boundaries, the clusters will be checked for logical continunity,
- * if contiguous the I/O request will span multiple clusters, otherwise
- * it will be broken up into the minimal set of I/O's
- *
- * If there are holes in a request (either unallocated pages in a paging
- * segment or an unallocated paging segment), we stop
- * reading at the hole, inform the VM of any data read, inform
- * the VM of an unavailable range, then loop again, hoping to
- * find valid pages later in the requested range. This continues until
- * the entire range has been examined, and read, if present.
+ * if contiguous the I/O request will span multiple clusters...
+ * at most only 1 I/O will be issued... it will encompass the original offset
*/
-
-#if USE_PRECIOUS
- request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS;
-#else
- request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE ;
-#endif
- while (cnt && (error == KERN_SUCCESS)) {
+ while (cnt && error == KERN_SUCCESS) {
int ps_info_valid;
- int page_list_count;
- if (cnt > VM_SUPER_CLUSTER)
+ if ((vs_offset & cl_mask) && (cnt > (VM_SUPER_CLUSTER - (vs_offset & cl_mask)))) {
+ size = VM_SUPER_CLUSTER;
+ size -= vs_offset & cl_mask;
+ } else if (cnt > VM_SUPER_CLUSTER)
size = VM_SUPER_CLUSTER;
else
size = cnt;
+
cnt -= size;
ps_info_valid = 0;
seg_index = 0;
while (size > 0 && error == KERN_SUCCESS) {
- int abort_size;
+ unsigned int abort_size;
+ unsigned int lsize;
int failed_size;
int beg_pseg;
int beg_indx;
- vm_offset_t cur_offset;
-
+ dp_offset_t cur_offset;
if ( !ps_info_valid) {
ps_offset[seg_index] = ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0);
/*
* skip over unallocated physical segments
*/
- if (ps_offset[seg_index] == (vm_offset_t) -1) {
+ if (ps_offset[seg_index] == (dp_offset_t) -1) {
abort_size = cl_size - (vs_offset & cl_mask);
abort_size = MIN(abort_size, size);
- page_list_count = 0;
- memory_object_super_upl_request(
- vs->vs_control,
- (memory_object_offset_t)vs_offset,
- abort_size, abort_size,
- &upl, NULL, &page_list_count,
- request_flags);
-
- if (clmap.cl_error) {
- upl_abort(upl, UPL_ABORT_ERROR);
- } else {
- upl_abort(upl, UPL_ABORT_UNAVAILABLE);
- }
- upl_deallocate(upl);
-
- size -= abort_size;
- vs_offset += abort_size;
+ size -= abort_size;
+ vs_offset += abort_size;
seg_index++;
ps_info_valid = 0;
+
continue;
}
cl_index = (vs_offset & cl_mask) / vm_page_size;
abort_size += vm_page_size;
}
if (abort_size) {
- /*
- * Let VM system know about holes in clusters.
- */
- GSTAT(global_stats.gs_pages_unavail += atop(abort_size));
-
- page_list_count = 0;
- memory_object_super_upl_request(
- vs->vs_control,
- (memory_object_offset_t)vs_offset,
- abort_size, abort_size,
- &upl, NULL, &page_list_count,
- request_flags);
-
- upl_abort(upl, UPL_ABORT_UNAVAILABLE);
- upl_deallocate(upl);
-
- size -= abort_size;
- vs_offset += abort_size;
+ size -= abort_size;
+ vs_offset += abort_size;
if (cl_index == pages_in_cl) {
/*
*/
seg_index++;
ps_info_valid = 0;
+
continue;
}
if (size == 0)
while (cl_index < pages_in_cl && xfer_size < size) {
/*
- * accumulate allocated pages within a physical segment
+ * accumulate allocated pages within
+ * a physical segment
*/
if (CLMAP_ISSET(clmap, cl_index)) {
xfer_size += vm_page_size;
}
if (cl_index < pages_in_cl || xfer_size >= size) {
/*
- * we've hit an unallocated page or the
- * end of this request... go fire the I/O
+ * we've hit an unallocated page or
+ * the end of this request... see if
+ * it's time to fire the I/O
*/
break;
}
/*
- * we've hit the end of the current physical segment
- * and there's more to do, so try moving to the next one
+ * we've hit the end of the current physical
+ * segment and there's more to do, so try
+ * moving to the next one
*/
seg_index++;
ps_offset[seg_index] = ps_clmap(vs, cur_offset & ~cl_mask, &clmap, CL_FIND, 0, 0);
- psp[seg_index] = CLMAP_PS(clmap);
+ psp[seg_index] = CLMAP_PS(clmap);
ps_info_valid = 1;
if ((ps_offset[seg_index - 1] != (ps_offset[seg_index] - cl_size)) || (psp[seg_index - 1] != psp[seg_index])) {
/*
- * if the physical segment we're about to step into
- * is not contiguous to the one we're currently
- * in, or it's in a different paging file, or
+ * if the physical segment we're about
+ * to step into is not contiguous to
+ * the one we're currently in, or it's
+ * in a different paging file, or
* it hasn't been allocated....
- * we stop here and generate the I/O
+ * we stop this run and go check
+ * to see if it's time to fire the I/O
*/
break;
}
/*
- * start with first page of the next physical segment
+ * start with first page of the next physical
+ * segment
*/
cl_index = 0;
}
- if (xfer_size) {
+ if (xfer_size == 0) {
/*
- * we have a contiguous range of allocated pages
- * to read from
+ * no I/O to generate for this segment
*/
- page_list_count = 0;
- memory_object_super_upl_request(vs->vs_control,
- (memory_object_offset_t)vs_offset,
- xfer_size, xfer_size,
- &upl, NULL, &page_list_count,
- request_flags | UPL_SET_INTERNAL);
-
- error = ps_read_file(psp[beg_pseg], upl, (vm_offset_t) 0,
- ps_offset[beg_pseg] + (beg_indx * vm_page_size), xfer_size, &residual, 0);
- } else
continue;
+ }
+ if (cur_offset <= orig_vs_offset) {
+ /*
+ * we've hit a hole in our speculative cluster
+ * before the offset that we're really after...
+ * don't issue the I/O since it doesn't encompass
+ * the original offset and we're looking to only
+ * pull in the speculative pages if they can be
+ * made part of a single I/O
+ */
+ size -= xfer_size;
+ vs_offset += xfer_size;
+
+ continue;
+ }
+ /*
+ * we have a contiguous range of allocated pages
+ * to read from that encompasses the original offset
+ */
+ page_list_count = 0;
+ memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)vs_offset,
+ xfer_size, xfer_size,
+ &upl, NULL, &page_list_count,
+ request_flags | UPL_SET_INTERNAL);
+
+ error = ps_read_file(psp[beg_pseg],
+ upl, (upl_offset_t) 0,
+ ps_offset[beg_pseg] + (beg_indx * vm_page_size),
+ xfer_size, &residual, io_flags);
- failed_size = 0;
/*
- * Adjust counts and send response to VM. Optimize for the
- * common case, i.e. no error and/or partial data.
- * If there was an error, then we need to error the entire
- * range, even if some data was successfully read.
- * If there was a partial read we may supply some
+ * Adjust counts and send response to VM. Optimize
+ * for the common case, i.e. no error and/or partial
+ * data. If there was an error, then we need to error
+ * the entire range, even if some data was successfully
+ * read. If there was a partial read we may supply some
* data and may error some as well. In all cases the
- * VM must receive some notification for every page in the
- * range.
+ * VM must receive some notification for every page
+ * in the range.
*/
if ((error == KERN_SUCCESS) && (residual == 0)) {
/*
- * Got everything we asked for, supply the data to
- * the VM. Note that as a side effect of supplying
- * the data, the buffer holding the supplied data is
- * deallocated from the pager's address space.
+ * Got everything we asked for, supply the data
+ * to the VM. Note that as a side effect of
+ * supplying the data, the buffer holding the
+ * supplied data is deallocated from the pager's
+ * address space.
*/
- pvs_object_data_provided(vs, upl, vs_offset, xfer_size);
+ lsize = xfer_size;
+ failed_size = 0;
} else {
+ lsize = 0;
failed_size = xfer_size;
if (error == KERN_SUCCESS) {
* and no data moved, we turn it into
* an error, assuming we're reading at
* or beyong EOF.
- * Fall through and error the entire
- * range.
+ * Fall through and error the entire range.
*/
error = KERN_FAILURE;
} else {
* Fall through and error the remainder
* of the range, if any.
*/
- int fill, lsize;
+ int fill;
- fill = residual & ~vm_page_size;
+ fill = residual & (vm_page_size - 1);
lsize = (xfer_size - residual) + fill;
- pvs_object_data_provided(vs, upl, vs_offset, lsize);
- if (lsize < xfer_size) {
+ if (lsize < xfer_size)
failed_size = xfer_size - lsize;
+
+ if (reclaim_all == FALSE)
error = KERN_FAILURE;
- }
}
}
}
- /*
- * If there was an error in any part of the range, tell
- * the VM. Note that error is explicitly checked again since
- * it can be modified above.
- */
- if (error != KERN_SUCCESS) {
+ pvs_object_data_provided(vs, upl, vs_offset, lsize);
+
+ if (failed_size) {
+ /*
+ * There was an error in some part of the range, tell
+ * the VM. Note that error is explicitly checked again
+ * since it can be modified above.
+ */
BS_STAT(psp[beg_pseg]->ps_bs,
- psp[beg_pseg]->ps_bs->bs_pages_in_fail += atop(failed_size));
+ psp[beg_pseg]->ps_bs->bs_pages_in_fail += atop_32(failed_size));
}
- size -= xfer_size;
- vs_offset += xfer_size;
+ /*
+ * we've issued a single I/O that encompassed the original offset
+ * at this point we either met our speculative request length or
+ * we ran into a 'hole' (i.e. page not present in the cluster, cluster
+ * not present or not physically contiguous to the previous one), so
+ * we're done issuing I/O at this point
+ */
+ return (error);
}
-
- } /* END while (cnt && (error == 0)) */
+ }
return error;
}
vs_cluster_write(
vstruct_t vs,
upl_t internal_upl,
- vm_offset_t offset,
- vm_size_t cnt,
+ upl_offset_t offset,
+ upl_size_t cnt,
boolean_t dp_internal,
int flags)
{
- vm_offset_t size;
- vm_offset_t transfer_size;
+ upl_size_t transfer_size;
int error = 0;
struct clmap clmap;
- vm_offset_t actual_offset; /* Offset within paging segment */
+ dp_offset_t actual_offset; /* Offset within paging segment */
paging_segment_t ps;
- vm_offset_t subx_size;
- vm_offset_t mobj_base_addr;
- vm_offset_t mobj_target_addr;
- int mobj_size;
-
- struct vs_async *vsa;
- vm_map_copy_t copy;
+ dp_offset_t mobj_base_addr;
+ dp_offset_t mobj_target_addr;
upl_t upl;
upl_page_info_t *pl;
int page_index;
+ unsigned int page_max_index;
int list_size;
- int cl_size;
+ int pages_in_cl;
+ unsigned int cl_size;
+ int base_index;
+ unsigned int seg_size;
+ unsigned int upl_offset_in_object;
+ boolean_t minimal_clustering = FALSE;
+ boolean_t found_dirty;
+
+ if (!dp_encryption_inited) {
+ /*
+ * ENCRYPTED SWAP:
+ * Once we've started using swap, we
+ * can't change our mind on whether
+ * it needs to be encrypted or
+ * not.
+ */
+ dp_encryption_inited = TRUE;
+ }
+ if (dp_encryption) {
+ /*
+ * ENCRYPTED SWAP:
+ * the UPL will need to be encrypted...
+ */
+ flags |= UPL_PAGING_ENCRYPTED;
+ }
+
+ pages_in_cl = 1 << vs->vs_clshift;
+ cl_size = pages_in_cl * vm_page_size;
+#if CONFIG_FREEZE
+ minimal_clustering = TRUE;
+#else
+ if (dp_isssd == TRUE)
+ minimal_clustering = TRUE;
+#endif
if (!dp_internal) {
- int page_list_count;
+ unsigned int page_list_count;
int request_flags;
- int super_size;
+ unsigned int super_size;
int first_dirty;
int num_dirty;
int num_of_pages;
int seg_index;
- int pages_in_cl;
- int must_abort;
- vm_offset_t upl_offset;
- vm_offset_t seg_offset;
- vm_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
- paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
+ upl_offset_t upl_offset;
+ upl_offset_t upl_offset_aligned;
+ dp_offset_t seg_offset;
+ dp_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT) + 1];
+ paging_segment_t psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT) + 1];
- pages_in_cl = 1 << vs->vs_clshift;
- cl_size = pages_in_cl * vm_page_size;
-
- if (bs_low) {
+ if (bs_low)
super_size = cl_size;
-
- request_flags = UPL_NOBLOCK |
- UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
- UPL_NO_SYNC | UPL_SET_INTERNAL;
- } else {
+ else
super_size = VM_SUPER_CLUSTER;
- request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE |
- UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
- UPL_NO_SYNC | UPL_SET_INTERNAL;
+ request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE |
+ UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
+ UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE;
+
+ if (dp_encryption) {
+ /*
+ * ENCRYPTED SWAP:
+ * request that the UPL be prepared for
+ * encryption.
+ */
+ request_flags |= UPL_ENCRYPT;
+ flags |= UPL_PAGING_ENCRYPTED;
}
page_list_count = 0;
(memory_object_offset_t)offset,
cnt, super_size,
&upl, NULL, &page_list_count,
- request_flags | UPL_PAGEOUT);
+ request_flags | UPL_FOR_PAGEOUT);
+
+ /*
+ * The default pager does not handle objects larger than
+ * 4GB, so it does not deal with offset that don't fit in
+ * 32-bit. Cast down upl->offset now and make sure we
+ * did not lose any valuable bits.
+ */
+ upl_offset_in_object = (unsigned int) upl->offset;
+ assert(upl->offset == upl_offset_in_object);
pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
+ seg_size = cl_size - (upl_offset_in_object % cl_size);
+ upl_offset_aligned = upl_offset_in_object & ~(cl_size - 1);
+ page_index = 0;
+ page_max_index = upl->size / PAGE_SIZE;
+ found_dirty = TRUE;
+
for (seg_index = 0, transfer_size = upl->size; transfer_size > 0; ) {
- ps_offset[seg_index] = ps_clmap(vs, upl->offset + (seg_index * cl_size),
- &clmap, CL_ALLOC,
- transfer_size < cl_size ?
- transfer_size : cl_size, 0);
+ unsigned int seg_pgcnt;
- if (ps_offset[seg_index] == (vm_offset_t) -1) {
- upl_abort(upl, 0);
- upl_deallocate(upl);
-
- return KERN_FAILURE;
+ seg_pgcnt = seg_size / PAGE_SIZE;
- }
- psp[seg_index] = CLMAP_PS(clmap);
+ if (minimal_clustering == TRUE) {
+ unsigned int non_dirty;
- if (transfer_size > cl_size) {
- transfer_size -= cl_size;
+ non_dirty = 0;
+ found_dirty = FALSE;
+
+ for (; non_dirty < seg_pgcnt; non_dirty++) {
+ if ((page_index + non_dirty) >= page_max_index)
+ break;
+
+ if (UPL_DIRTY_PAGE(pl, page_index + non_dirty) ||
+ UPL_PRECIOUS_PAGE(pl, page_index + non_dirty)) {
+ found_dirty = TRUE;
+ break;
+ }
+ }
+ }
+ if (found_dirty == TRUE) {
+ ps_offset[seg_index] =
+ ps_clmap(vs,
+ upl_offset_aligned,
+ &clmap, CL_ALLOC,
+ cl_size, 0);
+
+ if (ps_offset[seg_index] == (dp_offset_t) -1) {
+ upl_abort(upl, 0);
+ upl_deallocate(upl);
+
+ return KERN_FAILURE;
+ }
+ psp[seg_index] = CLMAP_PS(clmap);
+ }
+ if (transfer_size > seg_size) {
+ page_index += seg_pgcnt;
+ transfer_size -= seg_size;
+ upl_offset_aligned += cl_size;
+ seg_size = cl_size;
seg_index++;
} else
transfer_size = 0;
}
- for (page_index = 0, num_of_pages = upl->size / vm_page_size; page_index < num_of_pages; ) {
+ /*
+ * Ignore any non-present pages at the end of the
+ * UPL.
+ */
+ for (page_index = upl->size / vm_page_size; page_index > 0;) {
+ if (UPL_PAGE_PRESENT(pl, --page_index)) {
+ page_index++;
+ break;
+ }
+ }
+ if (page_index == 0) {
+ /*
+ * no pages in the UPL
+ * abort and return
+ */
+ upl_abort(upl, 0);
+ upl_deallocate(upl);
+
+ return KERN_SUCCESS;
+ }
+ num_of_pages = page_index;
+
+ base_index = (upl_offset_in_object % cl_size) / PAGE_SIZE;
+
+ for (page_index = 0; page_index < num_of_pages; ) {
/*
* skip over non-dirty pages
*/
for ( ; page_index < num_of_pages; page_index++) {
- if (UPL_DIRTY_PAGE(pl, page_index) || UPL_PRECIOUS_PAGE(pl, page_index))
+ if (UPL_DIRTY_PAGE(pl, page_index)
+ || UPL_PRECIOUS_PAGE(pl, page_index))
/*
* this is a page we need to write
- * go see if we can buddy it up with others
- * that are contiguous to it
+ * go see if we can buddy it up with
+ * others that are contiguous to it
*/
break;
/*
- * if the page is not-dirty, but present we need to commit it...
- * this is an unusual case since we only asked for dirty pages
+ * if the page is not-dirty, but present we
+ * need to commit it... This is an unusual
+ * case since we only asked for dirty pages
*/
if (UPL_PAGE_PRESENT(pl, page_index)) {
boolean_t empty = FALSE;
vm_page_size,
UPL_COMMIT_NOTIFY_EMPTY,
pl,
- MAX_UPL_TRANSFER,
+ page_list_count,
&empty);
- if (empty)
+ if (empty) {
+ assert(page_index ==
+ num_of_pages - 1);
upl_deallocate(upl);
+ }
}
}
if (page_index == num_of_pages)
break;
/*
- * gather up contiguous dirty pages... we have at least 1
- * otherwise we would have bailed above
+ * gather up contiguous dirty pages... we have at
+ * least 1 * otherwise we would have bailed above
* make sure that each physical segment that we step
* into is contiguous to the one we're currently in
* if it's not, we have to stop and write what we have
*/
- for (first_dirty = page_index; page_index < num_of_pages; ) {
- if ( !UPL_DIRTY_PAGE(pl, page_index) && !UPL_PRECIOUS_PAGE(pl, page_index))
+ for (first_dirty = page_index;
+ page_index < num_of_pages; ) {
+ if ( !UPL_DIRTY_PAGE(pl, page_index)
+ && !UPL_PRECIOUS_PAGE(pl, page_index))
break;
page_index++;
/*
int cur_seg;
int nxt_seg;
- cur_seg = (page_index - 1) / pages_in_cl;
- nxt_seg = page_index / pages_in_cl;
+ cur_seg = (base_index + (page_index - 1))/pages_in_cl;
+ nxt_seg = (base_index + page_index)/pages_in_cl;
if (cur_seg != nxt_seg) {
if ((ps_offset[cur_seg] != (ps_offset[nxt_seg] - cl_size)) || (psp[cur_seg] != psp[nxt_seg]))
- /*
- * if the segment we're about to step into
- * is not contiguous to the one we're currently
- * in, or it's in a different paging file....
- * we stop here and generate the I/O
- */
+ /*
+ * if the segment we're about
+ * to step into is not
+ * contiguous to the one we're
+ * currently in, or it's in a
+ * different paging file....
+ * we stop here and generate
+ * the I/O
+ */
break;
}
}
}
num_dirty = page_index - first_dirty;
- must_abort = 1;
if (num_dirty) {
upl_offset = first_dirty * vm_page_size;
- seg_index = first_dirty / pages_in_cl;
- seg_offset = upl_offset - (seg_index * cl_size);
transfer_size = num_dirty * vm_page_size;
- error = ps_write_file(psp[seg_index], upl, upl_offset,
- ps_offset[seg_index] + seg_offset, transfer_size, flags);
-
- if (error == 0) {
- while (transfer_size) {
- int seg_size;
+ while (transfer_size) {
- if ((seg_size = cl_size - (upl_offset % cl_size)) > transfer_size)
- seg_size = transfer_size;
+ if ((seg_size = cl_size -
+ ((upl_offset_in_object +
+ upl_offset) % cl_size))
+ > transfer_size)
+ seg_size = transfer_size;
- ps_vs_write_complete(vs, upl->offset + upl_offset, seg_size, error);
+ ps_vs_write_complete(
+ vs,
+ (upl_offset_in_object +
+ upl_offset),
+ seg_size, error);
- transfer_size -= seg_size;
- upl_offset += seg_size;
- }
- must_abort = 0;
+ transfer_size -= seg_size;
+ upl_offset += seg_size;
}
- }
- if (must_abort) {
- boolean_t empty = FALSE;
- upl_abort_range(upl,
- first_dirty * vm_page_size,
- num_dirty * vm_page_size,
- UPL_ABORT_NOTIFY_EMPTY,
- &empty);
- if (empty)
- upl_deallocate(upl);
+ upl_offset = first_dirty * vm_page_size;
+ transfer_size = num_dirty * vm_page_size;
+
+ seg_index = (base_index + first_dirty) / pages_in_cl;
+ seg_offset = (upl_offset_in_object + upl_offset) % cl_size;
+
+ error = ps_write_file(psp[seg_index],
+ upl, upl_offset,
+ ps_offset[seg_index]
+ + seg_offset,
+ transfer_size, flags);
}
}
} else {
- assert(cnt <= (vm_page_size << vs->vs_clshift));
+ assert(cnt <= (unsigned) (vm_page_size << vs->vs_clshift));
list_size = cnt;
page_index = 0;
&clmap, CL_ALLOC,
transfer_size < cl_size ?
transfer_size : cl_size, 0);
- if(actual_offset == (vm_offset_t) -1) {
+ if(actual_offset == (dp_offset_t) -1) {
error = 1;
break;
}
cnt = MIN(transfer_size,
- CLMAP_NPGS(clmap) * vm_page_size);
+ (unsigned) CLMAP_NPGS(clmap) * vm_page_size);
ps = CLMAP_PS(clmap);
/* Assume that the caller has given us contiguous */
/* pages */
if(cnt) {
+ ps_vs_write_complete(vs, mobj_target_addr,
+ cnt, error);
error = ps_write_file(ps, internal_upl,
0, actual_offset,
cnt, flags);
if (error)
break;
- ps_vs_write_complete(vs, mobj_target_addr,
- cnt, error);
}
if (error)
break;
{
int num_pages;
struct vs_map *vsmap;
- int i, j, k;
+ unsigned int i, j, k;
num_pages = 0;
if (vs->vs_indirect) {
}
}
- return ptoa(num_pages);
+ return ptoa_32(num_pages);
}
-size_t
+unsigned int
ps_vstruct_allocated_pages(
vstruct_t vs,
default_pager_page_t *pages,
- size_t pages_size)
+ unsigned int pages_size)
{
- int num_pages;
+ unsigned int num_pages;
struct vs_map *vsmap;
- vm_offset_t offset;
- int i, j, k;
+ dp_offset_t offset;
+ unsigned int i, j, k;
num_pages = 0;
offset = 0;
ps_vstruct_transfer_from_segment(
vstruct_t vs,
paging_segment_t segment,
-#ifndef ubc_sync_working
- vm_object_t transfer_object)
-#else
upl_t upl)
-#endif
{
struct vs_map *vsmap;
- struct vs_map old_vsmap;
- struct vs_map new_vsmap;
- int i, j, k;
+// struct vs_map old_vsmap;
+// struct vs_map new_vsmap;
+ unsigned int i, j;
VS_LOCK(vs); /* block all work on this vstruct */
/* can't allow the normal multiple write */
VS_UNLOCK(vs);
vs_changed:
if (vs->vs_indirect) {
- int vsmap_size;
- int clmap_off;
+ unsigned int vsmap_size;
+ int clmap_off;
/* loop on indirect maps */
for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
vsmap = vs->vs_imap[i];
(vm_page_size * (j << vs->vs_clshift))
+ clmap_off,
vm_page_size << vs->vs_clshift,
-#ifndef ubc_sync_working
- transfer_object)
-#else
upl)
-#endif
!= KERN_SUCCESS) {
VS_LOCK(vs);
vs->vs_xfer_pending = FALSE;
vs->vs_xfer_pending = FALSE;
VS_UNLOCK(vs);
vs_finish_write(vs);
+
+ if (backing_store_abort_compaction || backing_store_stop_compaction) {
+ backing_store_abort_compaction = FALSE;
+ dprintf(("ps_vstruct_transfer_from_segment - ABORTED\n"));
+ return KERN_FAILURE;
+ }
+ vnode_pager_throttle();
+
VS_LOCK(vs);
vs->vs_xfer_pending = TRUE;
vs_wait_for_sync_writers(vs);
if(vs_cluster_transfer(vs,
vm_page_size * (j << vs->vs_clshift),
vm_page_size << vs->vs_clshift,
-#ifndef ubc_sync_working
- transfer_object) != KERN_SUCCESS) {
-#else
upl) != KERN_SUCCESS) {
-#endif
VS_LOCK(vs);
vs->vs_xfer_pending = FALSE;
VS_UNLOCK(vs);
vs_finish_write(vs);
VS_LOCK(vs);
vs->vs_xfer_pending = TRUE;
- VS_UNLOCK(vs);
vs_wait_for_sync_writers(vs);
vs_start_write(vs);
vs_wait_for_readers(vs);
+ VS_UNLOCK(vs);
if (vs->vs_indirect) {
goto vs_changed;
}
vs_map_t
vs_get_map_entry(
vstruct_t vs,
- vm_offset_t offset)
+ dp_offset_t offset)
{
struct vs_map *vsmap;
- vm_offset_t cluster;
+ dp_offset_t cluster;
- cluster = atop(offset) >> vs->vs_clshift;
+ cluster = atop_32(offset) >> vs->vs_clshift;
if (vs->vs_indirect) {
long ind_block = cluster/CLMAP_ENTRIES;
kern_return_t
vs_cluster_transfer(
vstruct_t vs,
- vm_offset_t offset,
- vm_size_t cnt,
-#ifndef ubc_sync_working
- vm_object_t transfer_object)
-#else
+ dp_offset_t offset,
+ dp_size_t cnt,
upl_t upl)
-#endif
{
- vm_offset_t actual_offset;
+ dp_offset_t actual_offset;
paging_segment_t ps;
struct clmap clmap;
kern_return_t error = KERN_SUCCESS;
- int size, size_wanted, i;
- unsigned int residual;
- int unavail_size;
- default_pager_thread_t *dpt;
- boolean_t dealloc;
- struct vs_map *vsmap_ptr;
+ unsigned int size, size_wanted;
+ int i;
+ unsigned int residual = 0;
+ unsigned int unavail_size;
+// default_pager_thread_t *dpt;
+// boolean_t dealloc;
+ struct vs_map *vsmap_ptr = NULL;
struct vs_map read_vsmap;
struct vs_map original_read_vsmap;
struct vs_map write_vsmap;
- upl_t sync_upl;
-#ifndef ubc_sync_working
- upl_t upl;
-#endif
-
- vm_offset_t ioaddr;
+// upl_t sync_upl;
+// vm_offset_t ioaddr;
/* vs_cluster_transfer reads in the pages of a cluster and
* then writes these pages back to new backing store. The
vsmap_ptr = vs_get_map_entry(vs, offset);
actual_offset = ps_clmap(vs, offset, &clmap, CL_FIND, 0, 0);
- if (actual_offset == (vm_offset_t) -1) {
+ if (actual_offset == (dp_offset_t) -1) {
/*
* Nothing left to write in this cluster at least
if (size == 0) {
ASSERT(unavail_size);
+ ps_clunmap(vs, offset, unavail_size);
cnt -= unavail_size;
offset += unavail_size;
if((offset & ((vm_page_size << vs->vs_clshift) - 1))
original_read_vsmap = *vsmap_ptr;
if(ps->ps_segtype == PS_PARTITION) {
+ panic("swap partition not supported\n");
+ /*NOTREACHED*/
+ error = KERN_FAILURE;
+ residual = size;
/*
- NEED TO BE WITH SYNC & NO COMMIT
+ NEED TO ISSUE WITH SYNC & NO COMMIT
error = ps_read_device(ps, actual_offset, &buffer,
size, &residual, flags);
*/
} else {
-#ifndef ubc_sync_working
- int page_list_count = 0;
-
- error = vm_object_upl_request(transfer_object,
-(vm_object_offset_t) (actual_offset & ((vm_page_size << vs->vs_clshift) - 1)),
- size, &upl, NULL, &page_list_count,
- UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
- | UPL_SET_INTERNAL);
- if (error == KERN_SUCCESS) {
- error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset,
- size, &residual, 0);
- if(error)
- upl_commit(upl, NULL);
- upl_deallocate(upl);
- }
-
-#else
- /* NEED TO BE WITH SYNC & NO COMMIT & NO RDAHEAD*/
- error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset,
+ /* NEED TO ISSUE WITH SYNC & NO COMMIT */
+ error = ps_read_file(ps, upl, (upl_offset_t) 0, actual_offset,
size, &residual,
- (UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD));
-#endif
+ (UPL_IOSYNC | UPL_NOCOMMIT | (dp_encryption ? UPL_PAGING_ENCRYPTED : 0)));
}
read_vsmap = *vsmap_ptr;
*
*/
if ((error == KERN_SUCCESS) && (residual == 0)) {
- int page_list_count = 0;
/*
* Got everything we asked for, supply the data to
/* the vm_map_copy_page_discard call */
*vsmap_ptr = write_vsmap;
-#ifndef ubc_sync_working
- error = vm_object_upl_request(transfer_object,
- (vm_object_offset_t)
- (actual_offset & ((vm_page_size << vs->vs_clshift) - 1)),
- size, &upl, NULL, &page_list_count,
- UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL);
- if(vs_cluster_write(vs, upl, offset,
- size, TRUE, 0) != KERN_SUCCESS) {
- upl_commit(upl, NULL);
- upl_deallocate(upl);
-#else
if(vs_cluster_write(vs, upl, offset,
size, TRUE, UPL_IOSYNC | UPL_NOCOMMIT ) != KERN_SUCCESS) {
-#endif
error = KERN_FAILURE;
if(!(VSM_ISCLR(*vsmap_ptr))) {
/* unmap the new backing store object */
*/
write_vsmap = *vsmap_ptr;
*vsmap_ptr = read_vsmap;
+ ps_clunmap(vs, offset, size);
} else {
/* discard the old backing object */
write_vsmap = *vsmap_ptr;
}
kern_return_t
-default_pager_add_file(MACH_PORT_FACE backing_store,
- int *vp,
+default_pager_add_file(
+ MACH_PORT_FACE backing_store,
+ vnode_ptr_t vp,
int record_size,
- long size)
+ vm_size_t size)
{
backing_store_t bs;
paging_segment_t ps;
int i;
+ unsigned int j;
int error;
if ((bs = backing_store_lookup(backing_store))
ps->ps_vnode = (struct vnode *)vp;
ps->ps_offset = 0;
ps->ps_record_shift = local_log2(vm_page_size / record_size);
- ps->ps_recnum = size;
- ps->ps_pgnum = size >> ps->ps_record_shift;
+ assert((dp_size_t) size == size);
+ ps->ps_recnum = (dp_size_t) size;
+ ps->ps_pgnum = ((dp_size_t) size) >> ps->ps_record_shift;
ps->ps_pgcount = ps->ps_pgnum;
ps->ps_clshift = local_log2(bs->bs_clsize);
ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
+ ps->ps_special_clusters = 0;
ps->ps_hint = 0;
PS_LOCK_INIT(ps);
ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
if (!ps->ps_bmap) {
- kfree((vm_offset_t)ps, sizeof *ps);
+ PS_LOCK_DESTROY(ps);
+ kfree(ps, sizeof *ps);
BS_UNLOCK(bs);
return KERN_RESOURCE_SHORTAGE;
}
- for (i = 0; i < ps->ps_ncls; i++) {
- clrbit(ps->ps_bmap, i);
+ for (j = 0; j < ps->ps_ncls; j++) {
+ clrbit(ps->ps_bmap, j);
}
- ps->ps_going_away = FALSE;
+ if(paging_segment_count == 0) {
+ ps->ps_state = PS_EMERGENCY_SEGMENT;
+ if(use_emergency_swap_file_first) {
+ ps->ps_state |= PS_CAN_USE;
+ }
+ emergency_segment_backing_store = backing_store;
+ } else {
+ ps->ps_state = PS_CAN_USE;
+ }
+
ps->ps_bs = bs;
if ((error = ps_enter(ps)) != 0) {
- kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
- kfree((vm_offset_t)ps, sizeof *ps);
+ kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
+ PS_LOCK_DESTROY(ps);
+ kfree(ps, sizeof *ps);
BS_UNLOCK(bs);
return KERN_RESOURCE_SHORTAGE;
}
bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
PSL_LOCK();
- dp_pages_free += ps->ps_pgcount;
+ if(IS_PS_OK_TO_USE(ps)) {
+ dp_pages_free += ps->ps_pgcount;
+ } else {
+ dp_pages_reserve += ps->ps_pgcount;
+ }
PSL_UNLOCK();
BS_UNLOCK(bs);
bs_more_space(ps->ps_clcount);
- DEBUG(DEBUG_BS_INTERNAL,
- ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
- device, offset, size, record_size,
- ps->ps_record_shift, ps->ps_pgnum));
+ /*
+ * If the paging segment being activated is not the emergency
+ * segment and we notice that the emergency segment is being
+ * used then we help recover it. If all goes well, the
+ * emergency segment will be back to its original state of
+ * online but not activated (till it's needed the next time).
+ */
+#if CONFIG_FREEZE
+ if (!memorystatus_freeze_enabled)
+#endif
+ {
+ ps = paging_segments[EMERGENCY_PSEG_INDEX];
+ if(IS_PS_EMERGENCY_SEGMENT(ps) && IS_PS_OK_TO_USE(ps)) {
+ if(default_pager_backing_store_delete(emergency_segment_backing_store)) {
+ dprintf(("Failed to recover emergency paging segment\n"));
+ } else {
+ dprintf(("Recovered emergency paging segment\n"));
+ }
+ }
+ }
+
+ DP_DEBUG(DEBUG_BS_INTERNAL,
+ ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
+ device, offset, (dp_size_t) size, record_size,
+ ps->ps_record_shift, ps->ps_pgnum));
return KERN_SUCCESS;
}
ps_read_file(
paging_segment_t ps,
upl_t upl,
- vm_offset_t upl_offset,
- vm_offset_t offset,
- unsigned int size,
+ upl_offset_t upl_offset,
+ dp_offset_t offset,
+ upl_size_t size,
unsigned int *residualp,
int flags)
{
int error = 0;
int result;
+ assert(dp_encryption_inited);
- clustered_reads[atop(size)]++;
+ clustered_reads[atop_32(size)]++;
f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
- /* for transfer case we need to pass uploffset and flags */
- error = vnode_pagein(ps->ps_vnode,
- upl, upl_offset, f_offset, (vm_size_t)size, flags | UPL_NORDAHEAD, NULL);
+ /*
+ * for transfer case we need to pass uploffset and flags
+ */
+ assert((upl_size_t) size == size);
+ error = vnode_pagein(ps->ps_vnode, upl, upl_offset, f_offset, (upl_size_t)size, flags, NULL);
/* The vnode_pagein semantic is somewhat at odds with the existing */
/* device_read semantic. Partial reads are not experienced at this */
ps_write_file(
paging_segment_t ps,
upl_t upl,
- vm_offset_t upl_offset,
- vm_offset_t offset,
+ upl_offset_t upl_offset,
+ dp_offset_t offset,
unsigned int size,
int flags)
{
vm_object_offset_t f_offset;
kern_return_t result;
- int error = 0;
+ assert(dp_encryption_inited);
- clustered_writes[atop(size)]++;
+ clustered_writes[atop_32(size)]++;
f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
- if (vnode_pageout(ps->ps_vnode,
- upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL))
+ if (flags & UPL_PAGING_ENCRYPTED) {
+ /*
+ * ENCRYPTED SWAP:
+ * encrypt all the pages that we're going
+ * to pageout.
+ */
+ upl_encrypt(upl, upl_offset, size);
+ }
+ assert((upl_size_t) size == size);
+ if (vnode_pageout(ps->ps_vnode, upl, upl_offset, f_offset, (upl_size_t)size, flags, NULL))
result = KERN_FAILURE;
else
result = KERN_SUCCESS;
return result;
}
+static inline void ps_vnode_trim_init(struct ps_vnode_trim_data *data)
+{
+#pragma unused(data)
+}
+
+static inline void ps_vnode_trim_now(struct ps_vnode_trim_data *data)
+{
+#pragma unused(data)
+}
+
+static inline void ps_vnode_trim_more(struct ps_vnode_trim_data *data, struct vs_map *map, unsigned int shift, dp_size_t length)
+{
+#pragma unused(data, map, shift, length)
+}
+
kern_return_t
-default_pager_triggers(MACH_PORT_FACE default_pager,
+default_pager_triggers( __unused MACH_PORT_FACE default_pager,
int hi_wat,
int lo_wat,
int flags,
MACH_PORT_FACE trigger_port)
{
- MACH_PORT_FACE release;
+ MACH_PORT_FACE release = IPC_PORT_NULL;
kern_return_t kr;
+ clock_sec_t now;
+ clock_nsec_t nanoseconds_dummy;
+ static clock_sec_t error_notify = 0;
PSL_LOCK();
- if (flags == HI_WAT_ALERT) {
+ if (flags == SWAP_ENCRYPT_ON) {
+ /* ENCRYPTED SWAP: turn encryption on */
+ release = trigger_port;
+ if (!dp_encryption_inited) {
+ dp_encryption_inited = TRUE;
+ dp_encryption = TRUE;
+ kr = KERN_SUCCESS;
+ } else {
+ kr = KERN_FAILURE;
+ }
+ } else if (flags == SWAP_ENCRYPT_OFF) {
+ /* ENCRYPTED SWAP: turn encryption off */
+ release = trigger_port;
+ if (!dp_encryption_inited) {
+ dp_encryption_inited = TRUE;
+ dp_encryption = FALSE;
+ kr = KERN_SUCCESS;
+ } else {
+ kr = KERN_FAILURE;
+ }
+ } else if (flags == HI_WAT_ALERT) {
release = min_pages_trigger_port;
- min_pages_trigger_port = trigger_port;
- minimum_pages_remaining = hi_wat/vm_page_size;
- bs_low = FALSE;
- kr = KERN_SUCCESS;
+#if CONFIG_FREEZE
+ /* High and low water signals aren't applicable when freeze is */
+ /* enabled, so release the trigger ports here and return */
+ /* KERN_FAILURE. */
+ if (memorystatus_freeze_enabled) {
+ if (IP_VALID( trigger_port )){
+ ipc_port_release_send( trigger_port );
+ }
+ min_pages_trigger_port = IPC_PORT_NULL;
+ kr = KERN_FAILURE;
+ }
+ else
+#endif
+ {
+ min_pages_trigger_port = trigger_port;
+ minimum_pages_remaining = hi_wat/vm_page_size;
+ bs_low = FALSE;
+ kr = KERN_SUCCESS;
+ }
} else if (flags == LO_WAT_ALERT) {
release = max_pages_trigger_port;
- max_pages_trigger_port = trigger_port;
- maximum_pages_free = lo_wat/vm_page_size;
+#if CONFIG_FREEZE
+ if (memorystatus_freeze_enabled) {
+ if (IP_VALID( trigger_port )){
+ ipc_port_release_send( trigger_port );
+ }
+ max_pages_trigger_port = IPC_PORT_NULL;
+ kr = KERN_FAILURE;
+ }
+ else
+#endif
+ {
+ max_pages_trigger_port = trigger_port;
+ maximum_pages_free = lo_wat/vm_page_size;
+ kr = KERN_SUCCESS;
+ }
+ } else if (flags == USE_EMERGENCY_SWAP_FILE_FIRST) {
+ use_emergency_swap_file_first = TRUE;
+ release = trigger_port;
kr = KERN_SUCCESS;
+ } else if (flags == SWAP_FILE_CREATION_ERROR) {
+ release = trigger_port;
+ kr = KERN_SUCCESS;
+ if( paging_segment_count == 1) {
+ use_emergency_swap_file_first = TRUE;
+ }
+ no_paging_space_action();
+ clock_get_system_nanotime(&now, &nanoseconds_dummy);
+ if (now > error_notify + 5) {
+ dprintf(("Swap File Error.\n"));
+ error_notify = now;
+ }
} else {
release = trigger_port;
kr = KERN_INVALID_ARGUMENT;
return kr;
}
+
+/*
+ * Monitor the amount of available backing store vs. the amount of
+ * required backing store, notify a listener (if present) when
+ * backing store may safely be removed.
+ *
+ * We attempt to avoid the situation where backing store is
+ * discarded en masse, as this can lead to thrashing as the
+ * backing store is compacted.
+ */
+
+#define PF_INTERVAL 3 /* time between free level checks */
+#define PF_LATENCY 10 /* number of intervals before release */
+
+static int dp_pages_free_low_count = 0;
+thread_call_t default_pager_backing_store_monitor_callout;
+
+void
+default_pager_backing_store_monitor(__unused thread_call_param_t p1,
+ __unused thread_call_param_t p2)
+{
+// unsigned long long average;
+ ipc_port_t trigger;
+ uint64_t deadline;
+
+ /*
+ * We determine whether it will be safe to release some
+ * backing store by watching the free page level. If
+ * it remains below the maximum_pages_free threshold for
+ * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
+ * then we deem it safe.
+ *
+ * Note that this establishes a maximum rate at which backing
+ * store will be released, as each notification (currently)
+ * only results in a single backing store object being
+ * released.
+ */
+ if (dp_pages_free > maximum_pages_free) {
+ dp_pages_free_low_count++;
+ } else {
+ dp_pages_free_low_count = 0;
+ }
+
+ /* decide whether to send notification */
+ trigger = IP_NULL;
+ if (max_pages_trigger_port &&
+ (backing_store_release_trigger_disable == 0) &&
+ (dp_pages_free_low_count > PF_LATENCY)) {
+ trigger = max_pages_trigger_port;
+ max_pages_trigger_port = NULL;
+ }
+
+ /* send notification */
+ if (trigger != IP_NULL) {
+ VSL_LOCK();
+ if(backing_store_release_trigger_disable != 0) {
+ assert_wait((event_t)
+ &backing_store_release_trigger_disable,
+ THREAD_UNINT);
+ VSL_UNLOCK();
+ thread_block(THREAD_CONTINUE_NULL);
+ } else {
+ VSL_UNLOCK();
+ }
+ dprintf(("default_pager_backing_store_monitor - send LO_WAT_ALERT\n"));
+
+ default_pager_space_alert(trigger, LO_WAT_ALERT);
+ ipc_port_release_send(trigger);
+ dp_pages_free_low_count = 0;
+ }
+
+ clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline);
+ thread_call_enter_delayed(default_pager_backing_store_monitor_callout, deadline);
+}
+
+#if CONFIG_FREEZE
+unsigned int default_pager_swap_pages_free() {
+ return dp_pages_free;
+}
+#endif