/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
priority = BS_MINPRI;
bs->bs_priority = priority;
- bs->bs_clsize = bs_get_global_clsize(atop(clsize));
+ bs->bs_clsize = bs_get_global_clsize(atop_32(clsize));
BSL_LOCK();
queue_enter(&backing_store_list.bsl_queue, bs, backing_store_t,
basic->bs_pages_out_fail= bs->bs_pages_out_fail;
basic->bs_priority = bs->bs_priority;
- basic->bs_clsize = ptoa(bs->bs_clsize); /* in bytes */
+ basic->bs_clsize = ptoa_32(bs->bs_clsize); /* in bytes */
BS_UNLOCK(bs);
vs->vs_errors = 0;
vs->vs_clshift = local_log2(bs_get_global_clsize(0));
- vs->vs_size = ((atop(round_page(size)) - 1) >> vs->vs_clshift) + 1;
+ vs->vs_size = ((atop_32(round_page_32(size)) - 1) >> vs->vs_clshift) + 1;
vs->vs_async_pending = 0;
/*
ps = use_ps;
PSL_LOCK();
PS_LOCK(ps);
+
+ ASSERT(ps->ps_clcount != 0);
+
ps->ps_clcount--;
dp_pages_free -= 1 << ps->ps_clshift;
if(min_pages_trigger_port &&
}
return (vm_offset_t) -1;
}
- ASSERT(ps->ps_clcount != 0);
/*
* Look for an available cluster. At the end of the loop,
paging_segment_t ps,
vm_offset_t cluster)
{
- ipc_port_t trigger = IP_NULL;
if (cluster >= (vm_offset_t) ps->ps_ncls)
panic("ps_deallocate_cluster: Invalid cluster number");
clrbit(ps->ps_bmap, cluster);
++ps->ps_clcount;
dp_pages_free += 1 << ps->ps_clshift;
- if(max_pages_trigger_port
- && (backing_store_release_trigger_disable == 0)
- && (dp_pages_free > maximum_pages_free)) {
- trigger = max_pages_trigger_port;
- max_pages_trigger_port = NULL;
- }
PSL_UNLOCK();
/*
ps_select_array[ps->ps_bs->bs_priority] = 0;
PSL_UNLOCK();
- if (trigger != IP_NULL) {
- VSL_LOCK();
- if(backing_store_release_trigger_disable != 0) {
- assert_wait((event_t)
- &backing_store_release_trigger_disable,
- THREAD_UNINT);
- VSL_UNLOCK();
- thread_block(THREAD_CONTINUE_NULL);
- } else {
- VSL_UNLOCK();
- }
- default_pager_space_alert(trigger, LO_WAT_ALERT);
- ipc_port_release_send(trigger);
- }
-
return;
}
VS_MAP_LOCK(vs);
ASSERT(vs->vs_dmap);
- cluster = atop(offset) >> vs->vs_clshift;
+ cluster = atop_32(offset) >> vs->vs_clshift;
/*
* Initialize cluster error value
* relatively quick.
*/
ASSERT(trunc_page(offset) == offset);
- newcl = ptoa(newcl) << vs->vs_clshift;
+ newcl = ptoa_32(newcl) << vs->vs_clshift;
newoff = offset & ((1<<(vm_page_shift + vs->vs_clshift)) - 1);
if (flag == CL_ALLOC) {
/*
* set bits in the allocation bitmap according to which
* pages were requested. size is in bytes.
*/
- i = atop(newoff);
+ i = atop_32(newoff);
while ((size > 0) && (i < VSCLSIZE(vs))) {
VSM_SETALLOC(*vsmap, i);
i++;
* Offset is not cluster aligned, so number of pages
* and bitmaps must be adjusted
*/
- clmap->cl_numpages -= atop(newoff);
+ clmap->cl_numpages -= atop_32(newoff);
CLMAP_SHIFT(clmap, vs);
CLMAP_SHIFTALLOC(clmap, vs);
}
} else {
BS_STAT(clmap->cl_ps->ps_bs,
clmap->cl_ps->ps_bs->bs_pages_out_fail +=
- atop(size));
+ atop_32(size));
off = VSM_CLOFF(*vsmap);
VSM_SETERR(*vsmap, error);
}
vm_offset_t newoff;
int i;
- cluster = atop(offset) >> vs->vs_clshift;
+ cluster = atop_32(offset) >> vs->vs_clshift;
if (vs->vs_indirect) /* indirect map */
vsmap = vs->vs_imap[cluster/CLMAP_ENTRIES];
else
* Not cluster aligned.
*/
ASSERT(trunc_page(newoff) == newoff);
- i = atop(newoff);
+ i = atop_32(newoff);
} else
i = 0;
while ((i < VSCLSIZE(vs)) && (length > 0)) {
dprintf(("write failed error = 0x%x\n", error));
/* add upl_abort code here */
} else
- GSTAT(global_stats.gs_pages_out += atop(size));
+ GSTAT(global_stats.gs_pages_out += atop_32(size));
/*
* Notify the vstruct mapping code, so it can do its accounting.
*/
default_pager_thread_t *dpt = NULL;
device = dev_port_lookup(ps->ps_device);
- clustered_reads[atop(size)]++;
+ clustered_reads[atop_32(size)]++;
dev_offset = (ps->ps_offset +
(offset >> (vm_page_shift - ps->ps_record_shift)));
- clustered_writes[atop(size)]++;
+ clustered_writes[atop_32(size)]++;
dev_offset = (ps->ps_offset +
(offset >> (vm_page_shift - ps->ps_record_shift)));
"device_write_request returned ",
kr, addr, size, offset));
BS_STAT(ps->ps_bs,
- ps->ps_bs->bs_pages_out_fail += atop(size));
+ ps->ps_bs->bs_pages_out_fail += atop_32(size));
/* do the completion notification to free resources */
device_write_reply(reply_port, kr, 0);
return PAGER_ERROR;
"device_write returned ",
kr, addr, size, offset));
BS_STAT(ps->ps_bs,
- ps->ps_bs->bs_pages_out_fail += atop(size));
+ ps->ps_bs->bs_pages_out_fail += atop_32(size));
return PAGER_ERROR;
}
if (bytes_written & ((vm_page_size >> ps->ps_record_shift) - 1))
upl, offset, size));
ASSERT(size > 0);
- GSTAT(global_stats.gs_pages_in += atop(size));
+ GSTAT(global_stats.gs_pages_in += atop_32(size));
#if USE_PRECIOUS
/*
* Let VM system know about holes in clusters.
*/
- GSTAT(global_stats.gs_pages_unavail += atop(abort_size));
+ GSTAT(global_stats.gs_pages_unavail += atop_32(abort_size));
page_list_count = 0;
memory_object_super_upl_request(
while (cl_index < pages_in_cl
&& xfer_size < size) {
/*
- * accumulate allocated pages within
+ * accumulate allocated pages within
* a physical segment
*/
if (CLMAP_ISSET(clmap, cl_index)) {
if (cl_index < pages_in_cl
|| xfer_size >= size) {
/*
- * we've hit an unallocated page or
+ * we've hit an unallocated page or
* the end of this request... go fire
* the I/O
*/
}
/*
* we've hit the end of the current physical
- * segment and there's more to do, so try
+ * segment and there's more to do, so try
* moving to the next one
*/
seg_index++;
ps_offset[seg_index] =
- ps_clmap(vs,
- cur_offset & ~cl_mask,
+ ps_clmap(vs,
+ cur_offset & ~cl_mask,
&clmap, CL_FIND, 0, 0);
psp[seg_index] = CLMAP_PS(clmap);
ps_info_valid = 1;
if ((ps_offset[seg_index - 1] != (ps_offset[seg_index] - cl_size)) || (psp[seg_index - 1] != psp[seg_index])) {
/*
- * if the physical segment we're about
- * to step into is not contiguous to
- * the one we're currently in, or it's
+ * if the physical segment we're about
+ * to step into is not contiguous to
+ * the one we're currently in, or it's
* in a different paging file, or
* it hasn't been allocated....
* we stop here and generate the I/O
}
/*
* start with first page of the next physical
- * segment
+ * segment
*/
cl_index = 0;
}
&upl, NULL, &page_list_count,
request_flags | UPL_SET_INTERNAL);
- error = ps_read_file(psp[beg_pseg],
+ error = ps_read_file(psp[beg_pseg],
upl, (vm_offset_t) 0,
- ps_offset[beg_pseg] +
- (beg_indx * vm_page_size),
+ ps_offset[beg_pseg] +
+ (beg_indx * vm_page_size),
xfer_size, &residual, 0);
} else
continue;
failed_size = 0;
/*
- * Adjust counts and send response to VM. Optimize
+ * Adjust counts and send response to VM. Optimize
* for the common case, i.e. no error and/or partial
- * data. If there was an error, then we need to error
+ * data. If there was an error, then we need to error
* the entire range, even if some data was successfully
- * read. If there was a partial read we may supply some
+ * read. If there was a partial read we may supply some
* data and may error some as well. In all cases the
- * VM must receive some notification for every page in the
- * range.
+ * VM must receive some notification for every page
+ * in the range.
*/
if ((error == KERN_SUCCESS) && (residual == 0)) {
/*
* Got everything we asked for, supply the data
- * to the VM. Note that as a side effect of
- * supplying * the data, the buffer holding the
- * supplied data is * deallocated from the pager's
- * address space.
+ * to the VM. Note that as a side effect of
+ * supplying the data, the buffer holding the
+ * supplied data is deallocated from the pager's
+ * address space.
*/
pvs_object_data_provided(
vs, upl, vs_offset, xfer_size);
fill = residual
& ~vm_page_size;
- lsize = (xfer_size - residual)
+ lsize = (xfer_size - residual)
+ fill;
pvs_object_data_provided(
- vs, upl,
+ vs, upl,
vs_offset, lsize);
if (lsize < xfer_size) {
/*
* If there was an error in any part of the range, tell
* the VM. Note that error is explicitly checked again
- * since it can be modified above.
+ * since it can be modified above.
*/
if (error != KERN_SUCCESS) {
BS_STAT(psp[beg_pseg]->ps_bs,
psp[beg_pseg]->ps_bs->bs_pages_in_fail
- += atop(failed_size));
+ += atop_32(failed_size));
}
size -= xfer_size;
vs_offset += xfer_size;
upl_page_info_t *pl;
int page_index;
int list_size;
+ int pages_in_cl;
int cl_size;
+ int base_index;
+ int seg_size;
+
+ pages_in_cl = 1 << vs->vs_clshift;
+ cl_size = pages_in_cl * vm_page_size;
if (!dp_internal) {
int page_list_count;
int num_dirty;
int num_of_pages;
int seg_index;
- int pages_in_cl;
- int must_abort;
vm_offset_t upl_offset;
vm_offset_t seg_offset;
- vm_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
- paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
+ vm_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
+ paging_segment_t psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
- pages_in_cl = 1 << vs->vs_clshift;
- cl_size = pages_in_cl * vm_page_size;
-
if (bs_low) {
super_size = cl_size;
(memory_object_offset_t)offset,
cnt, super_size,
&upl, NULL, &page_list_count,
- request_flags | UPL_PAGEOUT);
+ request_flags | UPL_FOR_PAGEOUT);
pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
+ seg_size = cl_size - (upl->offset % cl_size);
+ upl_offset = upl->offset & ~(cl_size - 1);
+
for (seg_index = 0, transfer_size = upl->size;
transfer_size > 0; ) {
-
ps_offset[seg_index] =
- ps_clmap(vs, upl->offset + (seg_index * cl_size),
- &clmap, CL_ALLOC,
- transfer_size < cl_size ?
- transfer_size : cl_size, 0);
+ ps_clmap(vs,
+ upl_offset,
+ &clmap, CL_ALLOC,
+ cl_size, 0);
if (ps_offset[seg_index] == (vm_offset_t) -1) {
upl_abort(upl, 0);
}
psp[seg_index] = CLMAP_PS(clmap);
- if (transfer_size > cl_size) {
- transfer_size -= cl_size;
+ if (transfer_size > seg_size) {
+ transfer_size -= seg_size;
+ upl_offset += cl_size;
+ seg_size = cl_size;
seg_index++;
} else
transfer_size = 0;
}
- for (page_index = 0,
- num_of_pages = upl->size / vm_page_size;
- page_index < num_of_pages; ) {
+ /*
+ * Ignore any non-present pages at the end of the
+ * UPL.
+ */
+ for (page_index = upl->size / vm_page_size; page_index > 0;)
+ if (UPL_PAGE_PRESENT(pl, --page_index))
+ break;
+ num_of_pages = page_index + 1;
+
+ base_index = (upl->offset % cl_size) / PAGE_SIZE;
+
+ for (page_index = 0; page_index < num_of_pages; ) {
/*
* skip over non-dirty pages
*/
for ( ; page_index < num_of_pages; page_index++) {
- if (UPL_DIRTY_PAGE(pl, page_index)
+ if (UPL_DIRTY_PAGE(pl, page_index)
|| UPL_PRECIOUS_PAGE(pl, page_index))
/*
* this is a page we need to write
- * go see if we can buddy it up with
+ * go see if we can buddy it up with
* others that are contiguous to it
*/
break;
/*
* if the page is not-dirty, but present we
- * need to commit it... This is an unusual
+ * need to commit it... This is an unusual
* case since we only asked for dirty pages
*/
if (UPL_PAGE_PRESENT(pl, page_index)) {
pl,
page_list_count,
&empty);
- if (empty)
+ if (empty) {
+ assert(page_index ==
+ num_of_pages - 1);
upl_deallocate(upl);
+ }
}
}
if (page_index == num_of_pages)
break;
/*
- * gather up contiguous dirty pages... we have at
- * least 1 otherwise we would have bailed above
+ * gather up contiguous dirty pages... we have at
+ * least 1 * otherwise we would have bailed above
* make sure that each physical segment that we step
* into is contiguous to the one we're currently in
* if it's not, we have to stop and write what we have
*/
- for (first_dirty = page_index;
+ for (first_dirty = page_index;
page_index < num_of_pages; ) {
- if ( !UPL_DIRTY_PAGE(pl, page_index)
+ if ( !UPL_DIRTY_PAGE(pl, page_index)
&& !UPL_PRECIOUS_PAGE(pl, page_index))
break;
page_index++;
int cur_seg;
int nxt_seg;
- cur_seg =
- (page_index - 1) / pages_in_cl;
- nxt_seg = page_index / pages_in_cl;
+ cur_seg = (base_index + (page_index - 1))/pages_in_cl;
+ nxt_seg = (base_index + page_index)/pages_in_cl;
if (cur_seg != nxt_seg) {
if ((ps_offset[cur_seg] != (ps_offset[nxt_seg] - cl_size)) || (psp[cur_seg] != psp[nxt_seg]))
- /*
- * if the segment we're about
- * to step into is not
- * contiguous to the one we're
- * currently in, or it's in a
+ /*
+ * if the segment we're about
+ * to step into is not
+ * contiguous to the one we're
+ * currently in, or it's in a
* different paging file....
- * we stop here and generate
+ * we stop here and generate
* the I/O
*/
break;
}
}
num_dirty = page_index - first_dirty;
- must_abort = 1;
if (num_dirty) {
upl_offset = first_dirty * vm_page_size;
- seg_index = first_dirty / pages_in_cl;
- seg_offset = upl_offset - (seg_index * cl_size);
transfer_size = num_dirty * vm_page_size;
-
while (transfer_size) {
- int seg_size;
if ((seg_size = cl_size -
- (upl_offset % cl_size))
+ ((upl->offset + upl_offset) % cl_size))
> transfer_size)
seg_size = transfer_size;
}
upl_offset = first_dirty * vm_page_size;
transfer_size = num_dirty * vm_page_size;
+
+ seg_index = (base_index + first_dirty) / pages_in_cl;
+ seg_offset = (upl->offset + upl_offset) % cl_size;
+
error = ps_write_file(psp[seg_index],
upl, upl_offset,
ps_offset[seg_index]
+ seg_offset,
transfer_size, flags);
- must_abort = 0;
- }
- if (must_abort) {
+ } else {
boolean_t empty = FALSE;
upl_abort_range(upl,
first_dirty * vm_page_size,
num_dirty * vm_page_size,
UPL_ABORT_NOTIFY_EMPTY,
&empty);
- if (empty)
+ if (empty) {
+ assert(page_index == num_of_pages);
upl_deallocate(upl);
+ }
}
}
cnt, flags);
if (error)
break;
- }
+ }
if (error)
break;
actual_offset += cnt;
}
}
- return ptoa(num_pages);
+ return ptoa_32(num_pages);
}
size_t
struct vs_map *vsmap;
vm_offset_t cluster;
- cluster = atop(offset) >> vs->vs_clshift;
+ cluster = atop_32(offset) >> vs->vs_clshift;
if (vs->vs_indirect) {
long ind_block = cluster/CLMAP_ENTRIES;
int result;
- clustered_reads[atop(size)]++;
+ clustered_reads[atop_32(size)]++;
f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
int error = 0;
- clustered_writes[atop(size)]++;
+ clustered_writes[atop_32(size)]++;
f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
if (vnode_pageout(ps->ps_vnode,
return kr;
}
+
+/*
+ * Monitor the amount of available backing store vs. the amount of
+ * required backing store, notify a listener (if present) when
+ * backing store may safely be removed.
+ *
+ * We attempt to avoid the situation where backing store is
+ * discarded en masse, as this can lead to thrashing as the
+ * backing store is compacted.
+ */
+
+#define PF_INTERVAL 3 /* time between free level checks */
+#define PF_LATENCY 10 /* number of intervals before release */
+
+static int dp_pages_free_low_count = 0;
+
+void
+default_pager_backing_store_monitor(thread_call_param_t p1, thread_call_param_t p2)
+{
+ unsigned long long average;
+ ipc_port_t trigger;
+ uint64_t deadline;
+
+ /*
+ * We determine whether it will be safe to release some
+ * backing store by watching the free page level. If
+ * it remains below the maximum_pages_free threshold for
+ * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
+ * then we deem it safe.
+ *
+ * Note that this establishes a maximum rate at which backing
+ * store will be released, as each notification (currently)
+ * only results in a single backing store object being
+ * released.
+ */
+ if (dp_pages_free > maximum_pages_free) {
+ dp_pages_free_low_count++;
+ } else {
+ dp_pages_free_low_count = 0;
+ }
+
+ /* decide whether to send notification */
+ trigger = IP_NULL;
+ if (max_pages_trigger_port &&
+ (backing_store_release_trigger_disable == 0) &&
+ (dp_pages_free_low_count > PF_LATENCY)) {
+ trigger = max_pages_trigger_port;
+ max_pages_trigger_port = NULL;
+ }
+
+ /* send notification */
+ if (trigger != IP_NULL) {
+ VSL_LOCK();
+ if(backing_store_release_trigger_disable != 0) {
+ assert_wait((event_t)
+ &backing_store_release_trigger_disable,
+ THREAD_UNINT);
+ VSL_UNLOCK();
+ thread_block(THREAD_CONTINUE_NULL);
+ } else {
+ VSL_UNLOCK();
+ }
+ default_pager_space_alert(trigger, LO_WAT_ALERT);
+ ipc_port_release_send(trigger);
+ dp_pages_free_low_count = 0;
+ }
+
+ clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline);
+ thread_call_func_delayed(default_pager_backing_store_monitor, NULL, deadline);
+}