/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
*
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
* Paging File Management.
*/
+#include <mach/memory_object_control.h>
#include <mach/memory_object_server.h>
#include "default_pager_internal.h"
#include <default_pager/default_pager_alerts.h>
/* CDY CDY */
#include <vm/vm_map.h>
-/* MAXPHYS derived from bsd/bsd/ppc/param.h, we need a */
-/* universal originating in the kernel, or a formal means of exporting */
-/* from the bsd component */
+/*
+ * ALLOC_STRIDE... the maximum number of bytes allocated from
+ * a swap file before moving on to the next swap file... if
+ * all swap files reside on a single disk, this value should
+ * be very large (this is the default assumption)... if the
+ * swap files are spread across multiple disks, than this value
+ * should be small (128 * 1024)...
+ *
+ * This should be determined dynamically in the future
+ */
-#define MAXPHYS (64 * 1024)
+#define ALLOC_STRIDE (1024 * 1024 * 1024)
int physical_transfer_cluster_count = 0;
-#define VM_SUPER_CLUSTER 0x10000
+#define VM_SUPER_CLUSTER 0x40000
+#define VM_SUPER_PAGES 64
/*
* 0 means no shift to pages, so == 1 page/cluster. 1 would mean
int default_pager_clsize = 0;
/* statistics */
-unsigned int clustered_writes[MAX_CLUSTER_SIZE+1];
-unsigned int clustered_reads[MAX_CLUSTER_SIZE+1];
+unsigned int clustered_writes[VM_SUPER_PAGES+1];
+unsigned int clustered_reads[VM_SUPER_PAGES+1];
/*
* Globals used for asynchronous paging operations:
ipc_port_t max_pages_trigger_port = NULL;
boolean_t bs_low = FALSE;
+int backing_store_release_trigger_disable = 0;
unsigned int cluster_transfer_minimum = 100;
kern_return_t ps_write_file(paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, int); /* forward */
+kern_return_t ps_read_file (paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, unsigned int *, int); /* forward */
+
default_pager_thread_t *
get_read_buffer()
return dpt_array[i];
}
}
- assert_wait(&dpt_array, THREAD_UNINT);
- DPT_UNLOCK(dpt_lock);
- thread_block((void(*)(void))0);
+ DPT_SLEEP(dpt_lock, &dpt_array, THREAD_UNINT);
}
}
vs_async_free_list = NULL;
#endif /* VS_ASYNC_REUSE */
- for (i = 0; i < MAX_CLUSTER_SIZE+1; i++) {
+ for (i = 0; i < VM_SUPER_PAGES + 1; i++) {
clustered_writes[i] = 0;
clustered_reads[i] = 0;
}
bs_no_paging_space(
boolean_t out_of_memory)
{
- static char here[] = "bs_no_paging_space";
if (out_of_memory)
dprintf(("*** OUT OF MEMORY ***\n"));
vm_size_t pages_total, pages_free;
paging_segment_t ps;
int i;
- static char here[] = "bs_global_info";
PSL_LOCK();
pages_total = pages_free = 0;
backing_store_alloc(void)
{
backing_store_t bs;
- static char here[] = "backing_store_alloc";
bs = (backing_store_t) kalloc(sizeof (struct backing_store));
if (bs == BACKING_STORE_NULL)
MACH_PORT_FACE port = bs->bs_port;
MACH_PORT_FACE pset = default_pager_default_set;
kern_return_t kr = KERN_SUCCESS;
- static char here[] = "backing_store_add";
if (kr != KERN_SUCCESS)
panic("backing_store_add: add to set");
int clsize)
{
int i;
- MACH_PORT_FACE DMM;
+ memory_object_default_t dmm;
kern_return_t kr;
- static char here[] = "bs_get_global_clsize";
/*
* Only allow setting of cluster size once. If called
* paging segments.
*/
if (default_pager_clsize == 0) {
- if (norma_mk) {
- /*
- * On NORMA, don't use clustered paging because
- * XMM can't handle it.
- */
- vstruct_def_clshift = 0;
- }
/*
* Keep cluster size in bit shift because it's quicker
* arithmetic, and easier to keep at a power of 2.
printf("%scluster size = %d page%s\n",
my_name, default_pager_clsize,
(default_pager_clsize == 1) ? "" : "s");
+
/*
* Let the kernel know too, in case it hasn't used the
* default value provided in main() yet.
*/
- DMM = default_pager_default_port;
+ dmm = default_pager_object;
clsize = default_pager_clsize * vm_page_size; /* in bytes */
kr = host_default_memory_manager(host_priv_self(),
- &DMM,
+ &dmm,
clsize);
+ memory_object_default_deallocate(dmm);
+
if (kr != KERN_SUCCESS) {
panic("bs_get_global_cl_size:host_default_memory_manager");
}
- if (DMM != default_pager_default_port) {
+ if (dmm != default_pager_object) {
panic("bs_get_global_cl_size:there is another default pager");
}
}
kern_return_t
default_pager_backing_store_create(
- MACH_PORT_FACE pager,
- int priority,
- int clsize, /* in bytes */
- MACH_PORT_FACE *backing_store)
+ memory_object_default_t pager,
+ int priority,
+ int clsize, /* in bytes */
+ MACH_PORT_FACE *backing_store)
{
backing_store_t bs;
MACH_PORT_FACE port;
kern_return_t kr;
struct vstruct_alias *alias_struct;
- static char here[] = "default_pager_backing_store_create";
- if (pager != default_pager_default_port)
+ if (pager != default_pager_object)
return KERN_INVALID_ARGUMENT;
bs = backing_store_alloc();
priority = BS_MINPRI;
bs->bs_priority = priority;
- bs->bs_clsize = bs_get_global_clsize(atop(clsize));
+ bs->bs_clsize = bs_get_global_clsize(atop_32(clsize));
BSL_LOCK();
queue_enter(&backing_store_list.bsl_queue, bs, backing_store_t,
basic->bs_pages_out_fail= bs->bs_pages_out_fail;
basic->bs_priority = bs->bs_priority;
- basic->bs_clsize = ptoa(bs->bs_clsize); /* in bytes */
+ basic->bs_clsize = ptoa_32(bs->bs_clsize); /* in bytes */
BS_UNLOCK(bs);
/* lock and the vs locks are not being held by bumping the */
/* vs_async_pending count. */
+
+ while(backing_store_release_trigger_disable != 0) {
+ VSL_SLEEP(&backing_store_release_trigger_disable, THREAD_UNINT);
+ }
+
/* we will choose instead to hold a send right */
vs_count = vstruct_list.vsl_count;
vs = (vstruct_t) queue_first((queue_entry_t)&(vstruct_list.vsl_queue));
error = KERN_FAILURE;
else {
vm_object_t transfer_object;
+ int count;
upl_t upl;
transfer_object = vm_object_allocate(VM_SUPER_CLUSTER);
- error = vm_fault_list_request(transfer_object,
- (vm_object_offset_t)0,
- VM_SUPER_CLUSTER, &upl, NULL,
- 0, UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
- | UPL_SET_INTERNAL);
+ count = 0;
+ error = vm_object_upl_request(transfer_object,
+ (vm_object_offset_t)0, VM_SUPER_CLUSTER,
+ &upl, NULL, &count,
+ UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
+ | UPL_SET_INTERNAL);
if(error == KERN_SUCCESS) {
-#ifndef ubc_sync_working
- uc_upl_commit(upl, NULL);
- error = ps_vstruct_transfer_from_segment(
- vs, ps, transfer_object);
-#else
error = ps_vstruct_transfer_from_segment(
vs, ps, upl);
- uc_upl_commit(upl, NULL);
-#endif
- vm_object_deallocate(transfer_object);
+ upl_commit(upl, NULL);
+ upl_deallocate(upl);
} else {
- vm_object_deallocate(transfer_object);
error = KERN_FAILURE;
}
+ vm_object_deallocate(transfer_object);
}
if(error) {
VS_LOCK(vs);
vs->vs_async_pending -= 1; /* release vs_async_wait */
- if (vs->vs_async_pending == 0) {
+ if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
+ vs->vs_waiting_async = FALSE;
VS_UNLOCK(vs);
- thread_wakeup(&vs->vs_waiting_async);
+ thread_wakeup(&vs->vs_async_pending);
} else {
VS_UNLOCK(vs);
}
}
VSL_LOCK();
+
+ while(backing_store_release_trigger_disable != 0) {
+ VSL_SLEEP(&backing_store_release_trigger_disable,
+ THREAD_UNINT);
+ }
+
next_vs = (vstruct_t) queue_next(&(vs->vs_links));
if((next_vs != (vstruct_t)&vstruct_list) &&
(vs != next_vs) && (vs_count != 1)) {
VSL_UNLOCK();
VS_LOCK(vs);
vs->vs_async_pending -= 1;
- if (vs->vs_async_pending == 0) {
+ if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
+ vs->vs_waiting_async = FALSE;
VS_UNLOCK(vs);
- thread_wakeup(&vs->vs_waiting_async);
+ thread_wakeup(&vs->vs_async_pending);
} else {
VS_UNLOCK(vs);
}
int error;
int interim_pages_removed = 0;
kern_return_t kr;
- static char here[] = "default_pager_backing_store_delete";
if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
return KERN_INVALID_ARGUMENT;
if((void *)bs->bs_port->alias != NULL)
kfree((vm_offset_t) bs->bs_port->alias,
sizeof (struct vstruct_alias));
- pager_mux_hash_delete((ipc_port_t) (bs->bs_port));
ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port));
bs->bs_port = MACH_PORT_NULL;
BS_UNLOCK(bs);
paging_segment_t ps;
int i;
int error;
- static char here[] = "default_pager_add_segment";
if ((bs = backing_store_lookup(backing_store))
== BACKING_STORE_NULL)
count = info[DEV_GET_SIZE_DEVICE_SIZE] / rec_size;
clsize = bs_get_global_clsize(0);
if (!default_pager_backing_store_create(
- default_pager_default_port,
+ default_pager_object,
DEFAULT_PAGER_BACKING_STORE_MAXPRI,
(clsize * vm_page_size),
&bs)) {
vs_free_async(
struct vs_async *vsa)
{
- static char here[] = "vs_free_async";
MACH_PORT_FACE reply_port;
kern_return_t kr;
reply_port = vsa->reply_port;
kfree((vm_offset_t) reply_port->alias, sizeof (struct vstuct_alias));
kfree((vm_offset_t) vsa, sizeof (struct vs_async));
- pager_mux_hash_delete(reply_port);
ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port));
#if 0
VS_ASYNC_LOCK();
#endif /* VS_ASYNC_REUSE */
+zone_t vstruct_zone;
+
vstruct_t
ps_vstruct_create(
vm_size_t size)
{
vstruct_t vs;
int i;
- static char here[] = "ps_vstruct_create";
- vs = (vstruct_t) kalloc(sizeof (struct vstruct));
+ vs = (vstruct_t) zalloc(vstruct_zone);
if (vs == VSTRUCT_NULL) {
return VSTRUCT_NULL;
}
/*
* The following fields will be provided later.
*/
- vs->vs_mem_obj_port = MACH_PORT_NULL;
+ vs->vs_mem_obj = NULL;
+ vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
+ vs->vs_references = 1;
vs->vs_seqno = 0;
- vs->vs_control_port = MACH_PORT_NULL;
- vs->vs_control_refs = 0;
- vs->vs_object_name = MACH_PORT_NULL;
- vs->vs_name_refs = 0;
#ifdef MACH_KERNEL
vs->vs_waiting_seqno = FALSE;
vs->vs_waiting_read = FALSE;
vs->vs_waiting_write = FALSE;
- vs->vs_waiting_refs = FALSE;
vs->vs_waiting_async = FALSE;
#else
mutex_init(&vs->vs_waiting_seqno, ETAP_DPAGE_VSSEQNO);
vs->vs_errors = 0;
vs->vs_clshift = local_log2(bs_get_global_clsize(0));
- vs->vs_size = ((atop(round_page(size)) - 1) >> vs->vs_clshift) + 1;
+ vs->vs_size = ((atop_32(round_page_32(size)) - 1) >> vs->vs_clshift) + 1;
vs->vs_async_pending = 0;
/*
paging_segment_t ps;
int i;
int j;
- static char here[] = "ps_select_segment";
/*
* Optimize case where there's only one segment.
PSL_LOCK();
if (paging_segment_count == 1) {
paging_segment_t lps; /* used to avoid extra PS_UNLOCK */
+ ipc_port_t trigger = IP_NULL;
ps = paging_segments[paging_segment_max];
*psindex = paging_segment_max;
dp_pages_free -= 1 << ps->ps_clshift;
if(min_pages_trigger_port &&
(dp_pages_free < minimum_pages_remaining)) {
- default_pager_space_alert(
- min_pages_trigger_port,
- HI_WAT_ALERT);
+ trigger = min_pages_trigger_port;
min_pages_trigger_port = NULL;
bs_low = TRUE;
}
}
PS_UNLOCK(ps);
PSL_UNLOCK();
+
+ if (trigger != IP_NULL) {
+ default_pager_space_alert(trigger, HI_WAT_ALERT);
+ ipc_port_release_send(trigger);
+ }
return lps;
}
j = start_index+1;
physical_transfer_cluster_count = 0;
}
- else if ((physical_transfer_cluster_count+1) == (MAXPHYS >>
+ else if ((physical_transfer_cluster_count+1) == (ALLOC_STRIDE >>
(((paging_segments[start_index])->ps_clshift)
- + page_shift))) {
+ + vm_page_shift))) {
physical_transfer_cluster_count = 0;
j = start_index + 1;
} else {
/* this segment is being turned off */
} else if ((ps->ps_clcount) &&
(ps->ps_clshift >= shift)) {
+ ipc_port_t trigger = IP_NULL;
+
ps->ps_clcount--;
dp_pages_free -= 1 << ps->ps_clshift;
if(min_pages_trigger_port &&
(dp_pages_free <
minimum_pages_remaining)) {
- default_pager_space_alert(
- min_pages_trigger_port,
- HI_WAT_ALERT);
+ trigger = min_pages_trigger_port;
min_pages_trigger_port = NULL;
}
PS_UNLOCK(ps);
*/
ps_select_array[i] = j;
PSL_UNLOCK();
+
+ if (trigger != IP_NULL) {
+ default_pager_space_alert(
+ trigger,
+ HI_WAT_ALERT);
+ ipc_port_release_send(trigger);
+ }
*psindex = j;
return ps;
}
int bit_num = 0;
paging_segment_t ps;
vm_offset_t cluster;
- static char here[] = "ps_allocate_cluster";
+ ipc_port_t trigger = IP_NULL;
/*
* Find best paging segment.
ps = use_ps;
PSL_LOCK();
PS_LOCK(ps);
+
+ ASSERT(ps->ps_clcount != 0);
+
ps->ps_clcount--;
dp_pages_free -= 1 << ps->ps_clshift;
- PSL_UNLOCK();
if(min_pages_trigger_port &&
(dp_pages_free < minimum_pages_remaining)) {
- default_pager_space_alert(
- min_pages_trigger_port,
- HI_WAT_ALERT);
+ trigger = min_pages_trigger_port;
min_pages_trigger_port = NULL;
}
+ PSL_UNLOCK();
PS_UNLOCK(ps);
+ if (trigger != IP_NULL) {
+ default_pager_space_alert(trigger, HI_WAT_ALERT);
+ ipc_port_release_send(trigger);
+ }
+
} else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) ==
PAGING_SEGMENT_NULL) {
#if 0
dprintf(("no space in available paging segments; "
"swapon suggested\n"));
/* the count got off maybe, reset to zero */
+ PSL_LOCK();
dp_pages_free = 0;
if(min_pages_trigger_port) {
- default_pager_space_alert(
- min_pages_trigger_port, HI_WAT_ALERT);
+ trigger = min_pages_trigger_port;
min_pages_trigger_port = NULL;
bs_low = TRUE;
}
+ PSL_UNLOCK();
+ if (trigger != IP_NULL) {
+ default_pager_space_alert(trigger, HI_WAT_ALERT);
+ ipc_port_release_send(trigger);
+ }
return (vm_offset_t) -1;
}
- ASSERT(ps->ps_clcount != 0);
/*
* Look for an available cluster. At the end of the loop,
++ps->ps_clcount;
dp_pages_free += 1 << ps->ps_clshift;
PSL_UNLOCK();
- if(max_pages_trigger_port && (dp_pages_free > maximum_pages_free)) {
- default_pager_space_alert(max_pages_trigger_port, LO_WAT_ALERT);
- max_pages_trigger_port = NULL;
- }
/*
* Move the hint down to the freed cluster if it is
{
int i;
spl_t s;
- static char here[] = "ps_vstruct_dealloc";
VS_MAP_LOCK(vs);
bs_commit(- vs->vs_size);
- ip_lock(vs_to_port(vs));
- (vs_to_port(vs))->ip_destination = 0;
- (vs_to_port(vs))->ip_receiver_name = MACH_PORT_NULL;
-
- s= splsched();
- imq_lock(&vs_to_port(vs)->ip_messages);
- (vs_to_port(vs))->ip_mscount = 0;
- (vs_to_port(vs))->ip_messages.imq_seqno = 0;
- imq_unlock(&vs_to_port(vs)->ip_messages);
- splx(s);
-
- ip_unlock(vs_to_port(vs));
- pager_mux_hash_delete((ipc_port_t) vs_to_port(vs));
- ipc_port_release_receive(vs_to_port(vs));
- /*
- * Do this *after* deallocating the port name
- */
- kfree((vm_offset_t)vs, sizeof *vs);
+ zfree(vstruct_zone, (vm_offset_t)vs);
}
int ps_map_extend(vstruct_t, int); /* forward */
vm_offset_t newoff;
int i;
struct vs_map *vsmap;
- static char here[] = "ps_clmap";
VS_MAP_LOCK(vs);
ASSERT(vs->vs_dmap);
- cluster = atop(offset) >> vs->vs_clshift;
+ cluster = atop_32(offset) >> vs->vs_clshift;
/*
* Initialize cluster error value
* relatively quick.
*/
ASSERT(trunc_page(offset) == offset);
- newcl = ptoa(newcl) << vs->vs_clshift;
+ newcl = ptoa_32(newcl) << vs->vs_clshift;
newoff = offset & ((1<<(vm_page_shift + vs->vs_clshift)) - 1);
if (flag == CL_ALLOC) {
/*
* set bits in the allocation bitmap according to which
* pages were requested. size is in bytes.
*/
- i = atop(newoff);
+ i = atop_32(newoff);
while ((size > 0) && (i < VSCLSIZE(vs))) {
VSM_SETALLOC(*vsmap, i);
i++;
* Offset is not cluster aligned, so number of pages
* and bitmaps must be adjusted
*/
- clmap->cl_numpages -= atop(newoff);
+ clmap->cl_numpages -= atop_32(newoff);
CLMAP_SHIFT(clmap, vs);
CLMAP_SHIFTALLOC(clmap, vs);
}
} else {
BS_STAT(clmap->cl_ps->ps_bs,
clmap->cl_ps->ps_bs->bs_pages_out_fail +=
- atop(size));
+ atop_32(size));
off = VSM_CLOFF(*vsmap);
VSM_SETERR(*vsmap, error);
}
{
vm_offset_t cluster; /* The cluster number of offset */
struct vs_map *vsmap;
- static char here[] = "ps_clunmap";
VS_MAP_LOCK(vs);
vm_offset_t newoff;
int i;
- cluster = atop(offset) >> vs->vs_clshift;
+ cluster = atop_32(offset) >> vs->vs_clshift;
if (vs->vs_indirect) /* indirect map */
vsmap = vs->vs_imap[cluster/CLMAP_ENTRIES];
else
* Not cluster aligned.
*/
ASSERT(trunc_page(newoff) == newoff);
- i = atop(newoff);
+ i = atop_32(newoff);
} else
i = 0;
while ((i < VSCLSIZE(vs)) && (length > 0)) {
boolean_t async,
int error)
{
- static char here[] = "vs_cl_write_complete";
kern_return_t kr;
if (error) {
dprintf(("write failed error = 0x%x\n", error));
/* add upl_abort code here */
} else
- GSTAT(global_stats.gs_pages_out += atop(size));
+ GSTAT(global_stats.gs_pages_out += atop_32(size));
/*
* Notify the vstruct mapping code, so it can do its accounting.
*/
VS_LOCK(vs);
ASSERT(vs->vs_async_pending > 0);
vs->vs_async_pending -= size;
- if (vs->vs_async_pending == 0) {
+ if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
+ vs->vs_waiting_async = FALSE;
VS_UNLOCK(vs);
/* mutex_unlock(&vs->vs_waiting_async); */
- thread_wakeup(&vs->vs_waiting_async);
+ thread_wakeup(&vs->vs_async_pending);
} else {
VS_UNLOCK(vs);
}
io_buf_len_t bytes_written)
{
struct vs_async *vsa;
- static char here[] = "device_write_reply";
vsa = (struct vs_async *)
((struct vstruct_alias *)(reply_port->alias))->vs;
vm_offset_t dev_buffer;
vm_offset_t buf_ptr;
unsigned int records_read;
- static char here[] = "ps_read_device";
struct vs_async *vsa;
mutex_t vs_waiting_read_reply;
default_pager_thread_t *dpt = NULL;
device = dev_port_lookup(ps->ps_device);
- clustered_reads[atop(size)]++;
+ clustered_reads[atop_32(size)]++;
dev_offset = (ps->ps_offset +
(offset >> (vm_page_shift - ps->ps_record_shift)));
(mach_msg_type_number_t *) &bytes_read);
if(kr == MIG_NO_REPLY) {
assert_wait(&vsa->vsa_lock, THREAD_UNINT);
- thread_block((void(*)(void))0);
+ thread_block(THREAD_CONTINUE_NULL);
dev_buffer = vsa->vsa_addr;
bytes_read = (unsigned int)vsa->vsa_size;
recnum_t records_written;
kern_return_t kr;
MACH_PORT_FACE reply_port;
- static char here[] = "ps_write_device";
- clustered_writes[atop(size)]++;
+ clustered_writes[atop_32(size)]++;
dev_offset = (ps->ps_offset +
(offset >> (vm_page_shift - ps->ps_record_shift)));
"device_write_request returned ",
kr, addr, size, offset));
BS_STAT(ps->ps_bs,
- ps->ps_bs->bs_pages_out_fail += atop(size));
+ ps->ps_bs->bs_pages_out_fail += atop_32(size));
/* do the completion notification to free resources */
device_write_reply(reply_port, kr, 0);
return PAGER_ERROR;
"device_write returned ",
kr, addr, size, offset));
BS_STAT(ps->ps_bs,
- ps->ps_bs->bs_pages_out_fail += atop(size));
+ ps->ps_bs->bs_pages_out_fail += atop_32(size));
return PAGER_ERROR;
}
if (bytes_written & ((vm_page_size >> ps->ps_record_shift) - 1))
vm_offset_t offset,
vm_size_t size)
{
- static char here[] = "pvs_object_data_provided";
DEBUG(DEBUG_VS_INTERNAL,
("buffer=0x%x,offset=0x%x,size=0x%x\n",
upl, offset, size));
ASSERT(size > 0);
- GSTAT(global_stats.gs_pages_in += atop(size));
+ GSTAT(global_stats.gs_pages_in += atop_32(size));
#if USE_PRECIOUS
kern_return_t
pvs_cluster_read(
vstruct_t vs,
- vm_offset_t offset,
+ vm_offset_t vs_offset,
vm_size_t cnt)
{
- vm_offset_t actual_offset;
- vm_offset_t buffer;
- paging_segment_t ps;
- struct clmap clmap;
upl_t upl;
kern_return_t error = KERN_SUCCESS;
- int size, size_wanted, i;
+ int size;
unsigned int residual;
unsigned int request_flags;
- int unavail_size;
- default_pager_thread_t *dpt;
- boolean_t dealloc;
- static char here[] = "pvs_cluster_read";
+ int seg_index;
+ int pages_in_cl;
+ int cl_size;
+ int cl_mask;
+ int cl_index;
+ int xfer_size;
+ vm_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
+ paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
+ struct clmap clmap;
+
+ pages_in_cl = 1 << vs->vs_clshift;
+ cl_size = pages_in_cl * vm_page_size;
+ cl_mask = cl_size - 1;
/*
- * This loop will be executed once per cluster referenced.
- * Typically this means once, since it's unlikely that the
- * VM system will ask for anything spanning cluster boundaries.
+ * This loop will be executed multiple times until the entire
+ * request has been satisfied... if the request spans cluster
+ * boundaries, the clusters will be checked for logical continunity,
+ * if contiguous the I/O request will span multiple clusters, otherwise
+ * it will be broken up into the minimal set of I/O's
*
- * If there are holes in a cluster (in a paging segment), we stop
+ * If there are holes in a request (either unallocated pages in a paging
+ * segment or an unallocated paging segment), we stop
* reading at the hole, inform the VM of any data read, inform
* the VM of an unavailable range, then loop again, hoping to
- * find valid pages later in the cluster. This continues until
+ * find valid pages later in the requested range. This continues until
* the entire range has been examined, and read, if present.
*/
#if USE_PRECIOUS
- request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS;
+ request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS | UPL_RET_ONLY_ABSENT;
#else
- request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE ;
+ request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_RET_ONLY_ABSENT;
#endif
while (cnt && (error == KERN_SUCCESS)) {
- actual_offset = ps_clmap(vs, offset, &clmap, CL_FIND, 0, 0);
+ int ps_info_valid;
+ int page_list_count;
+
+ if((vs_offset & cl_mask) &&
+ (cnt > (VM_SUPER_CLUSTER -
+ (vs_offset & cl_mask)))) {
+ size = VM_SUPER_CLUSTER;
+ size -= vs_offset & cl_mask;
+ } else if (cnt > VM_SUPER_CLUSTER) {
+ size = VM_SUPER_CLUSTER;
+ } else {
+ size = cnt;
+ }
+ cnt -= size;
- if (actual_offset == (vm_offset_t) -1) {
+ ps_info_valid = 0;
+ seg_index = 0;
- /*
- * Either a failure due to an error on a previous
- * write or a zero fill on demand page. In either case,
- * optimize to do one reply for all pages up to next
- * cluster boundary.
- */
- unsigned int local_size, clmask, clsize;
+ while (size > 0 && error == KERN_SUCCESS) {
+ int abort_size;
+ int failed_size;
+ int beg_pseg;
+ int beg_indx;
+ vm_offset_t cur_offset;
- clmask = (vm_page_size << vs->vs_clshift) - 1;
- clsize = vm_page_size << vs->vs_clshift;
- clmask = clsize - 1;
- local_size = clsize - (offset & clmask);
- ASSERT(local_size);
- local_size = MIN(local_size, cnt);
- upl_system_list_request((vm_object_t)
- vs->vs_control_port->ip_kobject,
- offset, local_size, local_size,
- &upl, NULL, 0, request_flags);
- if (clmap.cl_error) {
- uc_upl_abort(upl, UPL_ABORT_ERROR);
- } else {
- uc_upl_abort(upl, UPL_ABORT_UNAVAILABLE);
+ if ( !ps_info_valid) {
+ ps_offset[seg_index] = ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0);
+ psp[seg_index] = CLMAP_PS(clmap);
+ ps_info_valid = 1;
}
+ /*
+ * skip over unallocated physical segments
+ */
+ if (ps_offset[seg_index] == (vm_offset_t) -1) {
+ abort_size = cl_size - (vs_offset & cl_mask);
+ abort_size = MIN(abort_size, size);
+
+ page_list_count = 0;
+ memory_object_super_upl_request(
+ vs->vs_control,
+ (memory_object_offset_t)vs_offset,
+ abort_size, abort_size,
+ &upl, NULL, &page_list_count,
+ request_flags);
- cnt -= local_size;
- offset += local_size;
- continue;
- }
+ if (clmap.cl_error) {
+ upl_abort(upl, UPL_ABORT_ERROR);
+ } else {
+ upl_abort(upl, UPL_ABORT_UNAVAILABLE);
+ }
+ upl_deallocate(upl);
- /*
- * Count up contiguous available or unavailable
- * pages.
- */
- ps = CLMAP_PS(clmap);
- ASSERT(ps);
- size = 0;
- unavail_size = 0;
+ size -= abort_size;
+ vs_offset += abort_size;
- for (i = 0;
- (size < cnt) && (unavail_size < cnt) &&
- (i < CLMAP_NPGS(clmap)); i++) {
- if (CLMAP_ISSET(clmap, i)) {
- if (unavail_size != 0)
- break;
- size += vm_page_size;
- BS_STAT(ps->ps_bs,
- ps->ps_bs->bs_pages_in++);
- } else {
- if (size != 0)
- break;
- unavail_size += vm_page_size;
+ seg_index++;
+ ps_info_valid = 0;
+ continue;
}
- }
- /*
- * Let VM system know about holes in clusters.
- */
- if (size == 0) {
- ASSERT(unavail_size);
- GSTAT(global_stats.gs_pages_unavail +=
- atop(unavail_size));
- upl_system_list_request((vm_object_t)
- vs->vs_control_port->ip_kobject,
- offset, unavail_size,
- unavail_size, &upl, NULL, 0,
+ cl_index = (vs_offset & cl_mask) / vm_page_size;
+
+ for (abort_size = 0; cl_index < pages_in_cl && abort_size < size; cl_index++) {
+ /*
+ * skip over unallocated pages
+ */
+ if (CLMAP_ISSET(clmap, cl_index))
+ break;
+ abort_size += vm_page_size;
+ }
+ if (abort_size) {
+ /*
+ * Let VM system know about holes in clusters.
+ */
+ GSTAT(global_stats.gs_pages_unavail += atop_32(abort_size));
+
+ page_list_count = 0;
+ memory_object_super_upl_request(
+ vs->vs_control,
+ (memory_object_offset_t)vs_offset,
+ abort_size, abort_size,
+ &upl, NULL, &page_list_count,
request_flags);
- uc_upl_abort(upl, UPL_ABORT_UNAVAILABLE);
- cnt -= unavail_size;
- offset += unavail_size;
- continue;
- }
- upl_system_list_request((vm_object_t)
- vs->vs_control_port->ip_kobject,
- offset, size, size, &upl,
- NULL, 0, request_flags | UPL_SET_INTERNAL);
- if(ps->ps_segtype == PS_PARTITION) {
-/*
- error = ps_read_device(ps, actual_offset, upl,
- size, &residual, 0);
-*/
- } else {
- error = ps_read_file(ps, upl, actual_offset,
- size, &residual, 0);
- }
+ upl_abort(upl, UPL_ABORT_UNAVAILABLE);
+ upl_deallocate(upl);
- /*
- * Adjust counts and send response to VM. Optimize for the
- * common case, i.e. no error and/or partial data.
- * If there was an error, then we need to error the entire
- * range, even if some data was successfully read.
- * If there was a partial read we may supply some
- * data and may error some as well. In all cases the
- * VM must receive some notification for every page in the
- * range.
- */
- if ((error == KERN_SUCCESS) && (residual == 0)) {
+ size -= abort_size;
+ vs_offset += abort_size;
+
+ if (cl_index == pages_in_cl) {
+ /*
+ * if we're at the end of this physical cluster
+ * then bump to the next one and continue looking
+ */
+ seg_index++;
+ ps_info_valid = 0;
+ continue;
+ }
+ if (size == 0)
+ break;
+ }
/*
- * Got everything we asked for, supply the data to
- * the VM. Note that as a side effect of supplying
- * the data, the buffer holding the supplied data is
- * deallocated from the pager's address space.
+ * remember the starting point of the first allocated page
+ * for the I/O we're about to issue
*/
- pvs_object_data_provided(vs, upl, offset, size);
- } else {
- size_wanted = size;
- if (error == KERN_SUCCESS) {
- if (residual == size) {
- /*
+ beg_pseg = seg_index;
+ beg_indx = cl_index;
+ cur_offset = vs_offset;
+
+ /*
+ * calculate the size of the I/O that we can do...
+ * this may span multiple physical segments if
+ * they are contiguous
+ */
+ for (xfer_size = 0; xfer_size < size; ) {
+
+ while (cl_index < pages_in_cl
+ && xfer_size < size) {
+ /*
+ * accumulate allocated pages within
+ * a physical segment
+ */
+ if (CLMAP_ISSET(clmap, cl_index)) {
+ xfer_size += vm_page_size;
+ cur_offset += vm_page_size;
+ cl_index++;
+
+ BS_STAT(psp[seg_index]->ps_bs,
+ psp[seg_index]->ps_bs->bs_pages_in++);
+ } else
+ break;
+ }
+ if (cl_index < pages_in_cl
+ || xfer_size >= size) {
+ /*
+ * we've hit an unallocated page or
+ * the end of this request... go fire
+ * the I/O
+ */
+ break;
+ }
+ /*
+ * we've hit the end of the current physical
+ * segment and there's more to do, so try
+ * moving to the next one
+ */
+ seg_index++;
+
+ ps_offset[seg_index] =
+ ps_clmap(vs,
+ cur_offset & ~cl_mask,
+ &clmap, CL_FIND, 0, 0);
+ psp[seg_index] = CLMAP_PS(clmap);
+ ps_info_valid = 1;
+
+ if ((ps_offset[seg_index - 1] != (ps_offset[seg_index] - cl_size)) || (psp[seg_index - 1] != psp[seg_index])) {
+ /*
+ * if the physical segment we're about
+ * to step into is not contiguous to
+ * the one we're currently in, or it's
+ * in a different paging file, or
+ * it hasn't been allocated....
+ * we stop here and generate the I/O
+ */
+ break;
+ }
+ /*
+ * start with first page of the next physical
+ * segment
+ */
+ cl_index = 0;
+ }
+ if (xfer_size) {
+ /*
+ * we have a contiguous range of allocated pages
+ * to read from
+ */
+ page_list_count = 0;
+ memory_object_super_upl_request(vs->vs_control,
+ (memory_object_offset_t)vs_offset,
+ xfer_size, xfer_size,
+ &upl, NULL, &page_list_count,
+ request_flags | UPL_SET_INTERNAL);
+
+ error = ps_read_file(psp[beg_pseg],
+ upl, (vm_offset_t) 0,
+ ps_offset[beg_pseg] +
+ (beg_indx * vm_page_size),
+ xfer_size, &residual, 0);
+ } else
+ continue;
+
+ failed_size = 0;
+
+ /*
+ * Adjust counts and send response to VM. Optimize
+ * for the common case, i.e. no error and/or partial
+ * data. If there was an error, then we need to error
+ * the entire range, even if some data was successfully
+ * read. If there was a partial read we may supply some
+ * data and may error some as well. In all cases the
+ * VM must receive some notification for every page
+ * in the range.
+ */
+ if ((error == KERN_SUCCESS) && (residual == 0)) {
+ /*
+ * Got everything we asked for, supply the data
+ * to the VM. Note that as a side effect of
+ * supplying the data, the buffer holding the
+ * supplied data is deallocated from the pager's
+ * address space.
+ */
+ pvs_object_data_provided(
+ vs, upl, vs_offset, xfer_size);
+ } else {
+ failed_size = xfer_size;
+
+ if (error == KERN_SUCCESS) {
+ if (residual == xfer_size) {
+ /*
* If a read operation returns no error
* and no data moved, we turn it into
* an error, assuming we're reading at
* Fall through and error the entire
* range.
*/
- error = KERN_FAILURE;
- } else {
- /*
+ error = KERN_FAILURE;
+ } else {
+ /*
* Otherwise, we have partial read. If
* the part read is a integral number
* of pages supply it. Otherwise round
* Fall through and error the remainder
* of the range, if any.
*/
- int fill, lsize;
-
- fill = residual & ~vm_page_size;
- lsize = (size - residual) + fill;
- pvs_object_data_provided(vs, upl,
- offset, lsize);
- cnt -= lsize;
- offset += lsize;
- if (size -= lsize) {
- error = KERN_FAILURE;
+ int fill, lsize;
+
+ fill = residual
+ & ~vm_page_size;
+ lsize = (xfer_size - residual)
+ + fill;
+ pvs_object_data_provided(
+ vs, upl,
+ vs_offset, lsize);
+
+ if (lsize < xfer_size) {
+ failed_size =
+ xfer_size - lsize;
+ error = KERN_FAILURE;
+ }
}
- }
+ }
}
-
/*
* If there was an error in any part of the range, tell
- * the VM. Deallocate the remainder of the buffer.
- * Note that error is explicitly checked again since
- * it can be modified above.
+ * the VM. Note that error is explicitly checked again
+ * since it can be modified above.
*/
if (error != KERN_SUCCESS) {
- BS_STAT(ps->ps_bs,
- ps->ps_bs->bs_pages_in_fail +=
- atop(size));
+ BS_STAT(psp[beg_pseg]->ps_bs,
+ psp[beg_pseg]->ps_bs->bs_pages_in_fail
+ += atop_32(failed_size));
}
+ size -= xfer_size;
+ vs_offset += xfer_size;
}
- cnt -= size;
- offset += size;
} /* END while (cnt && (error == 0)) */
return error;
boolean_t dp_internal,
int flags)
{
- vm_offset_t actual_offset; /* Offset within paging segment */
vm_offset_t size;
vm_offset_t transfer_size;
- vm_offset_t subx_size;
int error = 0;
struct clmap clmap;
+
+ vm_offset_t actual_offset; /* Offset within paging segment */
paging_segment_t ps;
+ vm_offset_t subx_size;
+ vm_offset_t mobj_base_addr;
+ vm_offset_t mobj_target_addr;
+ int mobj_size;
+
struct vs_async *vsa;
vm_map_copy_t copy;
- static char here[] = "vs_cluster_write";
upl_t upl;
- upl_page_info_t *page_list;
- upl_page_info_t pl[20];
- vm_offset_t mobj_base_addr;
- vm_offset_t mobj_target_addr;
- int mobj_size;
+ upl_page_info_t *pl;
int page_index;
int list_size;
+ int pages_in_cl;
int cl_size;
+ int base_index;
+ int seg_size;
+ pages_in_cl = 1 << vs->vs_clshift;
+ cl_size = pages_in_cl * vm_page_size;
- ps = PAGING_SEGMENT_NULL;
-
if (!dp_internal) {
+ int page_list_count;
int request_flags;
int super_size;
+ int first_dirty;
+ int num_dirty;
+ int num_of_pages;
+ int seg_index;
vm_offset_t upl_offset;
+ vm_offset_t seg_offset;
+ vm_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
+ paging_segment_t psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
- cl_size = (1 << vs->vs_clshift) * vm_page_size;
if (bs_low) {
super_size = cl_size;
+
request_flags = UPL_NOBLOCK |
UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
UPL_NO_SYNC | UPL_SET_INTERNAL;
} else {
super_size = VM_SUPER_CLUSTER;
+
request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE |
UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
UPL_NO_SYNC | UPL_SET_INTERNAL;
}
+ page_list_count = 0;
+ memory_object_super_upl_request(vs->vs_control,
+ (memory_object_offset_t)offset,
+ cnt, super_size,
+ &upl, NULL, &page_list_count,
+ request_flags | UPL_FOR_PAGEOUT);
- upl_system_list_request((vm_object_t)
- vs->vs_control_port->ip_kobject,
- offset, cnt, super_size,
- &upl, NULL,
- 0, request_flags);
+ pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
- mobj_base_addr = upl->offset;
- list_size = upl->size;
+ seg_size = cl_size - (upl->offset % cl_size);
+ upl_offset = upl->offset & ~(cl_size - 1);
- page_list = UPL_GET_INTERNAL_PAGE_LIST(upl);
- memcpy(pl, page_list,
- sizeof(upl_page_info_t) * (list_size/page_size));
+ for (seg_index = 0, transfer_size = upl->size;
+ transfer_size > 0; ) {
+ ps_offset[seg_index] =
+ ps_clmap(vs,
+ upl_offset,
+ &clmap, CL_ALLOC,
+ cl_size, 0);
- /* Now parcel up the 64k transfer, do at most cluster size */
- /* at a time. */
- upl_offset = 0;
- page_index = 0;
- mobj_target_addr = mobj_base_addr;
+ if (ps_offset[seg_index] == (vm_offset_t) -1) {
+ upl_abort(upl, 0);
+ upl_deallocate(upl);
+
+ return KERN_FAILURE;
- for (transfer_size = list_size; transfer_size != 0;) {
- actual_offset = ps_clmap(vs, mobj_target_addr,
- &clmap, CL_ALLOC,
- transfer_size < cl_size ?
- transfer_size : cl_size, 0);
+ }
+ psp[seg_index] = CLMAP_PS(clmap);
- if (actual_offset == (vm_offset_t) -1) {
- for(;transfer_size != 0;) {
- if(UPL_PAGE_PRESENT(pl, page_index)) {
- uc_upl_abort_range(upl,
- upl_offset,
- transfer_size,
- UPL_ABORT_FREE_ON_EMPTY);
- break;
- }
- transfer_size-=page_size;
- upl_offset += vm_page_size;
- page_index++;
- }
- error = 1;
+ if (transfer_size > seg_size) {
+ transfer_size -= seg_size;
+ upl_offset += cl_size;
+ seg_size = cl_size;
+ seg_index++;
+ } else
+ transfer_size = 0;
+ }
+ /*
+ * Ignore any non-present pages at the end of the
+ * UPL.
+ */
+ for (page_index = upl->size / vm_page_size; page_index > 0;)
+ if (UPL_PAGE_PRESENT(pl, --page_index))
break;
+ num_of_pages = page_index + 1;
+
+ base_index = (upl->offset % cl_size) / PAGE_SIZE;
+
+ for (page_index = 0; page_index < num_of_pages; ) {
+ /*
+ * skip over non-dirty pages
+ */
+ for ( ; page_index < num_of_pages; page_index++) {
+ if (UPL_DIRTY_PAGE(pl, page_index)
+ || UPL_PRECIOUS_PAGE(pl, page_index))
+ /*
+ * this is a page we need to write
+ * go see if we can buddy it up with
+ * others that are contiguous to it
+ */
+ break;
+ /*
+ * if the page is not-dirty, but present we
+ * need to commit it... This is an unusual
+ * case since we only asked for dirty pages
+ */
+ if (UPL_PAGE_PRESENT(pl, page_index)) {
+ boolean_t empty = FALSE;
+ upl_commit_range(upl,
+ page_index * vm_page_size,
+ vm_page_size,
+ UPL_COMMIT_NOTIFY_EMPTY,
+ pl,
+ page_list_count,
+ &empty);
+ if (empty) {
+ assert(page_index ==
+ num_of_pages - 1);
+ upl_deallocate(upl);
+ }
+ }
}
- cnt = MIN(transfer_size,
- CLMAP_NPGS(clmap) * vm_page_size);
- ps = CLMAP_PS(clmap);
+ if (page_index == num_of_pages)
+ /*
+ * no more pages to look at, we're out of here
+ */
+ break;
- while (cnt > 0) {
- /* attempt to send entire cluster */
- subx_size = 0;
-
- while (cnt > 0) {
- /* do the biggest contiguous transfer of dirty */
- /* pages */
- if (UPL_DIRTY_PAGE(pl, page_index) ||
- UPL_PRECIOUS_PAGE(pl, page_index)){
- page_index++;
- subx_size += vm_page_size;
- cnt -= vm_page_size;
- } else {
- if (subx_size == 0) {
- actual_offset += vm_page_size;
- mobj_target_addr += vm_page_size;
-
- if(UPL_PAGE_PRESENT(pl, page_index)) {
- uc_upl_commit_range(upl,
- upl_offset,
- vm_page_size,
- TRUE, pl);
+ /*
+ * gather up contiguous dirty pages... we have at
+ * least 1 * otherwise we would have bailed above
+ * make sure that each physical segment that we step
+ * into is contiguous to the one we're currently in
+ * if it's not, we have to stop and write what we have
+ */
+ for (first_dirty = page_index;
+ page_index < num_of_pages; ) {
+ if ( !UPL_DIRTY_PAGE(pl, page_index)
+ && !UPL_PRECIOUS_PAGE(pl, page_index))
+ break;
+ page_index++;
+ /*
+ * if we just looked at the last page in the UPL
+ * we don't need to check for physical segment
+ * continuity
+ */
+ if (page_index < num_of_pages) {
+ int cur_seg;
+ int nxt_seg;
+
+ cur_seg = (base_index + (page_index - 1))/pages_in_cl;
+ nxt_seg = (base_index + page_index)/pages_in_cl;
+
+ if (cur_seg != nxt_seg) {
+ if ((ps_offset[cur_seg] != (ps_offset[nxt_seg] - cl_size)) || (psp[cur_seg] != psp[nxt_seg]))
+ /*
+ * if the segment we're about
+ * to step into is not
+ * contiguous to the one we're
+ * currently in, or it's in a
+ * different paging file....
+ * we stop here and generate
+ * the I/O
+ */
+ break;
}
+ }
+ }
+ num_dirty = page_index - first_dirty;
- upl_offset += vm_page_size;
- transfer_size -= vm_page_size;
- page_index++;
- cnt -= vm_page_size;
- } else {
- break;
- }
- }
- }
- if (subx_size) {
-
- error = ps_write_file(ps, upl, upl_offset,
- actual_offset, subx_size, flags);
- if (error) {
- actual_offset += subx_size;
- mobj_target_addr += subx_size;
- upl_offset += subx_size;
- transfer_size -= subx_size;
-
- for(;transfer_size != 0;) {
- if(UPL_PAGE_PRESENT(pl, page_index)) {
- uc_upl_abort_range(upl,
- upl_offset,
- transfer_size,
- UPL_ABORT_FREE_ON_EMPTY);
- break;
- }
- transfer_size-=page_size;
- upl_offset += vm_page_size;
- page_index++;
- }
- break;
+ if (num_dirty) {
+ upl_offset = first_dirty * vm_page_size;
+ transfer_size = num_dirty * vm_page_size;
+
+ while (transfer_size) {
+
+ if ((seg_size = cl_size -
+ ((upl->offset + upl_offset) % cl_size))
+ > transfer_size)
+ seg_size = transfer_size;
+
+ ps_vs_write_complete(vs,
+ upl->offset + upl_offset,
+ seg_size, error);
+
+ transfer_size -= seg_size;
+ upl_offset += seg_size;
}
+ upl_offset = first_dirty * vm_page_size;
+ transfer_size = num_dirty * vm_page_size;
- ps_vs_write_complete(vs, mobj_target_addr,
- subx_size, error);
- }
- actual_offset += subx_size;
- mobj_target_addr += subx_size;
- upl_offset += subx_size;
+ seg_index = (base_index + first_dirty) / pages_in_cl;
+ seg_offset = (upl->offset + upl_offset) % cl_size;
- transfer_size -= subx_size;
- subx_size = 0;
+ error = ps_write_file(psp[seg_index],
+ upl, upl_offset,
+ ps_offset[seg_index]
+ + seg_offset,
+ transfer_size, flags);
+ } else {
+ boolean_t empty = FALSE;
+ upl_abort_range(upl,
+ first_dirty * vm_page_size,
+ num_dirty * vm_page_size,
+ UPL_ABORT_NOTIFY_EMPTY,
+ &empty);
+ if (empty) {
+ assert(page_index == num_of_pages);
+ upl_deallocate(upl);
+ }
}
- if (error)
- break;
}
+
} else {
assert(cnt <= (vm_page_size << vs->vs_clshift));
list_size = cnt;
/* Assume that the caller has given us contiguous */
/* pages */
if(cnt) {
+ ps_vs_write_complete(vs, mobj_target_addr,
+ cnt, error);
error = ps_write_file(ps, internal_upl,
0, actual_offset,
cnt, flags);
if (error)
break;
- ps_vs_write_complete(vs, mobj_target_addr,
- cnt, error);
}
if (error)
break;
}
}
- return ptoa(num_pages);
+ return ptoa_32(num_pages);
}
size_t
ps_vstruct_transfer_from_segment(
vstruct_t vs,
paging_segment_t segment,
-#ifndef ubc_sync_working
- vm_object_t transfer_object)
-#else
upl_t upl)
-#endif
{
struct vs_map *vsmap;
struct vs_map old_vsmap;
(vm_page_size * (j << vs->vs_clshift))
+ clmap_off,
vm_page_size << vs->vs_clshift,
-#ifndef ubc_sync_working
- transfer_object)
-#else
upl)
-#endif
!= KERN_SUCCESS) {
VS_LOCK(vs);
vs->vs_xfer_pending = FALSE;
vs_finish_write(vs);
VS_LOCK(vs);
vs->vs_xfer_pending = TRUE;
- VS_UNLOCK(vs);
vs_wait_for_sync_writers(vs);
vs_start_write(vs);
vs_wait_for_readers(vs);
+ VS_UNLOCK(vs);
if (!(vs->vs_indirect)) {
goto vs_changed;
}
if(vs_cluster_transfer(vs,
vm_page_size * (j << vs->vs_clshift),
vm_page_size << vs->vs_clshift,
-#ifndef ubc_sync_working
- transfer_object) != KERN_SUCCESS) {
-#else
upl) != KERN_SUCCESS) {
-#endif
VS_LOCK(vs);
vs->vs_xfer_pending = FALSE;
VS_UNLOCK(vs);
struct vs_map *vsmap;
vm_offset_t cluster;
- cluster = atop(offset) >> vs->vs_clshift;
+ cluster = atop_32(offset) >> vs->vs_clshift;
if (vs->vs_indirect) {
long ind_block = cluster/CLMAP_ENTRIES;
vstruct_t vs,
vm_offset_t offset,
vm_size_t cnt,
-#ifndef ubc_sync_working
- vm_object_t transfer_object)
-#else
upl_t upl)
-#endif
{
vm_offset_t actual_offset;
paging_segment_t ps;
struct vs_map original_read_vsmap;
struct vs_map write_vsmap;
upl_t sync_upl;
-#ifndef ubc_sync_working
- upl_t upl;
-#endif
-
vm_offset_t ioaddr;
- static char here[] = "vs_cluster_transfer";
-
/* vs_cluster_transfer reads in the pages of a cluster and
* then writes these pages back to new backing store. The
* segment the pages are being read from is assumed to have
* in effect.
*/
- /* uc_upl_map(kernel_map, upl, &ioaddr); */
-
VSM_CLR(write_vsmap);
VSM_CLR(original_read_vsmap);
/* grab the actual object's pages to sync with I/O */
if(ps->ps_segtype == PS_PARTITION) {
/*
- NEED TO BE WITH SYNC & NO COMMIT
+ NEED TO ISSUE WITH SYNC & NO COMMIT
error = ps_read_device(ps, actual_offset, &buffer,
size, &residual, flags);
*/
} else {
-#ifndef ubc_sync_working
- error = vm_fault_list_request(transfer_object,
-(vm_object_offset_t) (actual_offset & ((vm_page_size << vs->vs_clshift) - 1)),
- size, &upl, NULL,
- 0, UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
- | UPL_SET_INTERNAL);
- if (error == KERN_SUCCESS) {
- error = ps_read_file(ps, upl, actual_offset,
- size, &residual, 0);
- if(error)
- uc_upl_commit(upl, NULL);
- }
-
-#else
- /* NEED TO BE WITH SYNC & NO COMMIT & NO RDAHEAD*/
- error = ps_read_file(ps, upl, actual_offset,
+ /* NEED TO ISSUE WITH SYNC & NO COMMIT */
+ error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset,
size, &residual,
- (UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD));
-#endif
+ (UPL_IOSYNC | UPL_NOCOMMIT));
}
read_vsmap = *vsmap_ptr;
*
*/
if ((error == KERN_SUCCESS) && (residual == 0)) {
+ int page_list_count = 0;
+
/*
* Got everything we asked for, supply the data to
* the new BS. Note that as a side effect of supplying
/* the vm_map_copy_page_discard call */
*vsmap_ptr = write_vsmap;
-#ifndef ubc_sync_working
- error = vm_fault_list_request(transfer_object,
-(vm_object_offset_t) (actual_offset & ((vm_page_size << vs->vs_clshift) - 1)),
- size, &upl, NULL,
- 0, UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
- | UPL_SET_INTERNAL);
- if(vs_cluster_write(vs, upl, offset,
- size, TRUE, 0) != KERN_SUCCESS) {
- uc_upl_commit(upl, NULL);
-#else
if(vs_cluster_write(vs, upl, offset,
size, TRUE, UPL_IOSYNC | UPL_NOCOMMIT ) != KERN_SUCCESS) {
-#endif
error = KERN_FAILURE;
if(!(VSM_ISCLR(*vsmap_ptr))) {
/* unmap the new backing store object */
if(!VSM_ISCLR(write_vsmap))
*vsmap_ptr = write_vsmap;
- /* uc_upl_un_map(kernel_map, upl); */
return error;
}
paging_segment_t ps;
int i;
int error;
- static char here[] = "default_pager_add_file";
if ((bs = backing_store_lookup(backing_store))
== BACKING_STORE_NULL)
-kern_return_t ps_read_file(paging_segment_t, upl_t, vm_offset_t, unsigned int, unsigned int *, int); /* forward */
-
kern_return_t
ps_read_file(
paging_segment_t ps,
upl_t upl,
+ vm_offset_t upl_offset,
vm_offset_t offset,
unsigned int size,
unsigned int *residualp,
vm_object_offset_t f_offset;
int error = 0;
int result;
- static char here[] = "ps_read_file";
- clustered_reads[atop(size)]++;
+ clustered_reads[atop_32(size)]++;
f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
/* for transfer case we need to pass uploffset and flags */
error = vnode_pagein(ps->ps_vnode,
- upl, (vm_offset_t)0, f_offset, (vm_size_t)size, flags, NULL);
+ upl, upl_offset, f_offset, (vm_size_t)size, flags | UPL_NORDAHEAD, NULL);
/* The vnode_pagein semantic is somewhat at odds with the existing */
/* device_read semantic. Partial reads are not experienced at this */
result = KERN_SUCCESS;
}
return result;
-
}
kern_return_t
{
vm_object_offset_t f_offset;
kern_return_t result;
- static char here[] = "ps_write_file";
int error = 0;
- clustered_writes[atop(size)]++;
+ clustered_writes[atop_32(size)]++;
f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
if (vnode_pageout(ps->ps_vnode,
int flags,
MACH_PORT_FACE trigger_port)
{
+ MACH_PORT_FACE release;
+ kern_return_t kr;
- if(flags & HI_WAT_ALERT) {
- if(min_pages_trigger_port)
- ipc_port_release_send(min_pages_trigger_port);
+ PSL_LOCK();
+ if (flags == HI_WAT_ALERT) {
+ release = min_pages_trigger_port;
min_pages_trigger_port = trigger_port;
minimum_pages_remaining = hi_wat/vm_page_size;
bs_low = FALSE;
- }
- if(flags & LO_WAT_ALERT) {
- if(max_pages_trigger_port)
- ipc_port_release_send(max_pages_trigger_port);
+ kr = KERN_SUCCESS;
+ } else if (flags == LO_WAT_ALERT) {
+ release = max_pages_trigger_port;
max_pages_trigger_port = trigger_port;
maximum_pages_free = lo_wat/vm_page_size;
+ kr = KERN_SUCCESS;
+ } else {
+ release = trigger_port;
+ kr = KERN_INVALID_ARGUMENT;
}
+ PSL_UNLOCK();
+
+ if (IP_VALID(release))
+ ipc_port_release_send(release);
+
+ return kr;
+}
+
+/*
+ * Monitor the amount of available backing store vs. the amount of
+ * required backing store, notify a listener (if present) when
+ * backing store may safely be removed.
+ *
+ * We attempt to avoid the situation where backing store is
+ * discarded en masse, as this can lead to thrashing as the
+ * backing store is compacted.
+ */
+
+#define PF_INTERVAL 3 /* time between free level checks */
+#define PF_LATENCY 10 /* number of intervals before release */
+
+static int dp_pages_free_low_count = 0;
+
+void
+default_pager_backing_store_monitor(thread_call_param_t p1, thread_call_param_t p2)
+{
+ unsigned long long average;
+ ipc_port_t trigger;
+ uint64_t deadline;
+
+ /*
+ * We determine whether it will be safe to release some
+ * backing store by watching the free page level. If
+ * it remains below the maximum_pages_free threshold for
+ * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
+ * then we deem it safe.
+ *
+ * Note that this establishes a maximum rate at which backing
+ * store will be released, as each notification (currently)
+ * only results in a single backing store object being
+ * released.
+ */
+ if (dp_pages_free > maximum_pages_free) {
+ dp_pages_free_low_count++;
+ } else {
+ dp_pages_free_low_count = 0;
+ }
+
+ /* decide whether to send notification */
+ trigger = IP_NULL;
+ if (max_pages_trigger_port &&
+ (backing_store_release_trigger_disable == 0) &&
+ (dp_pages_free_low_count > PF_LATENCY)) {
+ trigger = max_pages_trigger_port;
+ max_pages_trigger_port = NULL;
+ }
+
+ /* send notification */
+ if (trigger != IP_NULL) {
+ VSL_LOCK();
+ if(backing_store_release_trigger_disable != 0) {
+ assert_wait((event_t)
+ &backing_store_release_trigger_disable,
+ THREAD_UNINT);
+ VSL_UNLOCK();
+ thread_block(THREAD_CONTINUE_NULL);
+ } else {
+ VSL_UNLOCK();
+ }
+ default_pager_space_alert(trigger, LO_WAT_ALERT);
+ ipc_port_release_send(trigger);
+ dp_pages_free_low_count = 0;
+ }
+
+ clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline);
+ thread_call_func_delayed(default_pager_backing_store_monitor, NULL, deadline);
}