X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/fa4905b191e0d16b0fffd53bd565eca71d01fae0..ff6e181ae92fc6f1e89841290f461d1f2f9badd9:/osfmk/default_pager/dp_backing_store.c?ds=sidebyside diff --git a/osfmk/default_pager/dp_backing_store.c b/osfmk/default_pager/dp_backing_store.c index 34f601059..f46378f8b 100644 --- a/osfmk/default_pager/dp_backing_store.c +++ b/osfmk/default_pager/dp_backing_store.c @@ -1,22 +1,22 @@ - /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ @@ -54,19 +54,31 @@ * Paging File Management. */ +#include #include #include -#include "default_pager_internal.h" +#include +#include #include +#include + +#include #include #include + +#include +#include #include #include #include + #include #include -/* CDY CDY */ #include +#include +#include + +/* LP64todo - need large internal object support */ /* * ALLOC_STRIDE... the maximum number of bytes allocated from @@ -82,8 +94,8 @@ #define ALLOC_STRIDE (1024 * 1024 * 1024) int physical_transfer_cluster_count = 0; -#define VM_SUPER_CLUSTER 0x20000 -#define VM_SUPER_PAGES 32 +#define VM_SUPER_CLUSTER 0x40000 +#define VM_SUPER_PAGES 64 /* * 0 means no shift to pages, so == 1 page/cluster. 1 would mean @@ -129,8 +141,7 @@ void vs_free_async(struct vs_async *vsa); /* forward */ #define VS_ASYNC_LOCK() mutex_lock(&default_pager_async_lock) #define VS_ASYNC_UNLOCK() mutex_unlock(&default_pager_async_lock) -#define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, \ - ETAP_IO_DEV_PAGEH) +#define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, 0) #define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock) /* * Paging Space Hysteresis triggers and the target notification port @@ -144,7 +155,12 @@ ipc_port_t max_pages_trigger_port = NULL; boolean_t bs_low = FALSE; int backing_store_release_trigger_disable = 0; - + + +/* Have we decided if swap needs to be encrypted yet ? */ +boolean_t dp_encryption_inited = FALSE; +/* Should we encrypt swap ? */ +boolean_t dp_encryption = FALSE; /* @@ -173,12 +189,28 @@ int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 }; unsigned int dp_pages_free = 0; unsigned int cluster_transfer_minimum = 100; -kern_return_t ps_write_file(paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, int); /* forward */ -kern_return_t ps_read_file (paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, unsigned int *, int); /* forward */ +/* forward declarations */ +kern_return_t ps_write_file(paging_segment_t, upl_t, upl_offset_t, vm_offset_t, unsigned int, int); /* forward */ +kern_return_t ps_read_file (paging_segment_t, upl_t, upl_offset_t, vm_offset_t, unsigned int, unsigned int *, int); /* forward */ +default_pager_thread_t *get_read_buffer( void ); +kern_return_t ps_vstruct_transfer_from_segment( + vstruct_t vs, + paging_segment_t segment, + upl_t upl); +kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */ +kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */ +kern_return_t vs_cluster_transfer( + vstruct_t vs, + upl_offset_t offset, + upl_size_t cnt, + upl_t upl); +vs_map_t vs_get_map_entry( + vstruct_t vs, + vm_offset_t offset); default_pager_thread_t * -get_read_buffer() +get_read_buffer( void ) { int i; @@ -191,9 +223,7 @@ get_read_buffer() return dpt_array[i]; } } - assert_wait(&dpt_array, THREAD_UNINT); - DPT_UNLOCK(dpt_lock); - thread_block((void(*)(void))0); + DPT_SLEEP(dpt_lock, &dpt_array, THREAD_UNINT); } } @@ -334,9 +364,9 @@ bs_global_info( */ pages_total += ps->ps_pgnum; pages_free += ps->ps_clcount << ps->ps_clshift; - DEBUG(DEBUG_BS_INTERNAL, - ("segment #%d: %d total, %d free\n", - i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift)); + DP_DEBUG(DEBUG_BS_INTERNAL, + ("segment #%d: %d total, %d free\n", + i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift)); } *totalp = pages_total; *freep = pages_free; @@ -415,10 +445,10 @@ void backing_store_add(backing_store_t); /* forward */ void backing_store_add( - backing_store_t bs) + __unused backing_store_t bs) { - MACH_PORT_FACE port = bs->bs_port; - MACH_PORT_FACE pset = default_pager_default_set; +// MACH_PORT_FACE port = bs->bs_port; +// MACH_PORT_FACE pset = default_pager_default_set; kern_return_t kr = KERN_SUCCESS; if (kr != KERN_SUCCESS) @@ -515,7 +545,7 @@ default_pager_backing_store_create( { backing_store_t bs; MACH_PORT_FACE port; - kern_return_t kr; +// kern_return_t kr; struct vstruct_alias *alias_struct; if (pager != default_pager_object) @@ -526,9 +556,9 @@ default_pager_backing_store_create( ipc_port_make_send(port); assert (port != IP_NULL); - DEBUG(DEBUG_BS_EXTERNAL, - ("priority=%d clsize=%d bs_port=0x%x\n", - priority, clsize, (int) backing_store)); + DP_DEBUG(DEBUG_BS_EXTERNAL, + ("priority=%d clsize=%d bs_port=0x%x\n", + priority, clsize, (int) backing_store)); alias_struct = (struct vstruct_alias *) kalloc(sizeof (struct vstruct_alias)); @@ -539,7 +569,7 @@ default_pager_backing_store_create( } else { ipc_port_dealloc_kernel((MACH_PORT_FACE)(port)); - kfree((vm_offset_t)bs, sizeof (struct backing_store)); + kfree(bs, sizeof (struct backing_store)); return KERN_RESOURCE_SHORTAGE; } @@ -552,7 +582,7 @@ default_pager_backing_store_create( priority = BS_MINPRI; bs->bs_priority = priority; - bs->bs_clsize = bs_get_global_clsize(atop(clsize)); + bs->bs_clsize = bs_get_global_clsize(atop_32(clsize)); BSL_LOCK(); queue_enter(&backing_store_list.bsl_queue, bs, backing_store_t, @@ -616,7 +646,7 @@ default_pager_backing_store_info( basic->bs_pages_out_fail= bs->bs_pages_out_fail; basic->bs_priority = bs->bs_priority; - basic->bs_clsize = ptoa(bs->bs_clsize); /* in bytes */ + basic->bs_clsize = ptoa_32(bs->bs_clsize); /* in bytes */ BS_UNLOCK(bs); @@ -650,12 +680,7 @@ ps_delete( while(backing_store_release_trigger_disable != 0) { - assert_wait((event_t) - &backing_store_release_trigger_disable, - THREAD_UNINT); - VSL_UNLOCK(); - thread_block((void (*)(void)) 0); - VSL_LOCK(); + VSL_SLEEP(&backing_store_release_trigger_disable, THREAD_UNINT); } /* we will choose instead to hold a send right */ @@ -693,7 +718,7 @@ ps_delete( int count; upl_t upl; - transfer_object = vm_object_allocate(VM_SUPER_CLUSTER); + transfer_object = vm_object_allocate((vm_object_size_t)VM_SUPER_CLUSTER); count = 0; error = vm_object_upl_request(transfer_object, (vm_object_offset_t)0, VM_SUPER_CLUSTER, @@ -701,22 +726,14 @@ ps_delete( UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL); if(error == KERN_SUCCESS) { -#ifndef ubc_sync_working - upl_commit(upl, NULL); - upl_deallocate(upl); - error = ps_vstruct_transfer_from_segment( - vs, ps, transfer_object); -#else error = ps_vstruct_transfer_from_segment( vs, ps, upl); - upl_commit(upl, NULL); + upl_commit(upl, NULL, 0); upl_deallocate(upl); -#endif - vm_object_deallocate(transfer_object); } else { - vm_object_deallocate(transfer_object); error = KERN_FAILURE; } + vm_object_deallocate(transfer_object); } if(error) { VS_LOCK(vs); @@ -734,12 +751,8 @@ ps_delete( VSL_LOCK(); while(backing_store_release_trigger_disable != 0) { - assert_wait((event_t) - &backing_store_release_trigger_disable, - THREAD_UNINT); - VSL_UNLOCK(); - thread_block((void (*)(void)) 0); - VSL_LOCK(); + VSL_SLEEP(&backing_store_release_trigger_disable, + THREAD_UNINT); } next_vs = (vstruct_t) queue_next(&(vs->vs_links)); @@ -781,7 +794,7 @@ default_pager_backing_store_delete( paging_segment_t ps; int error; int interim_pages_removed = 0; - kern_return_t kr; +// kern_return_t kr; if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL) return KERN_INVALID_ARGUMENT; @@ -864,9 +877,8 @@ default_pager_backing_store_delete( paging_segments[i] = PAGING_SEGMENT_NULL; paging_segment_count--; PS_LOCK(ps); - kfree((vm_offset_t)ps->ps_bmap, - RMAPSIZE(ps->ps_ncls)); - kfree((vm_offset_t)ps, sizeof *ps); + kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); + kfree(ps, sizeof *ps); } } } @@ -889,8 +901,8 @@ default_pager_backing_store_delete( * Disable lookups of this backing store. */ if((void *)bs->bs_port->alias != NULL) - kfree((vm_offset_t) bs->bs_port->alias, - sizeof (struct vstruct_alias)); + kfree((void *) bs->bs_port->alias, + sizeof (struct vstruct_alias)); ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port)); bs->bs_port = MACH_PORT_NULL; BS_UNLOCK(bs); @@ -906,7 +918,7 @@ default_pager_backing_store_delete( /* * Free the backing store structure. */ - kfree((vm_offset_t)bs, sizeof *bs); + kfree(bs, sizeof *bs); return KERN_SUCCESS; } @@ -1005,7 +1017,7 @@ default_pager_add_segment( PS_LOCK_INIT(ps); ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls)); if (!ps->ps_bmap) { - kfree((vm_offset_t)ps, sizeof *ps); + kfree(ps, sizeof *ps); BS_UNLOCK(bs); return KERN_RESOURCE_SHORTAGE; } @@ -1017,8 +1029,8 @@ default_pager_add_segment( ps->ps_bs = bs; if ((error = ps_enter(ps)) != 0) { - kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); - kfree((vm_offset_t)ps, sizeof *ps); + kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); + kfree(ps, sizeof *ps); BS_UNLOCK(bs); return KERN_RESOURCE_SHORTAGE; } @@ -1033,10 +1045,10 @@ default_pager_add_segment( bs_more_space(ps->ps_clcount); - DEBUG(DEBUG_BS_INTERNAL, - ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n", - device, offset, count, record_size, - ps->ps_record_shift, ps->ps_pgnum)); + DP_DEBUG(DEBUG_BS_INTERNAL, + ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n", + device, offset, count, record_size, + ps->ps_record_shift, ps->ps_pgnum)); return KERN_SUCCESS; } @@ -1092,7 +1104,7 @@ vs_alloc_async(void) { struct vs_async *vsa; MACH_PORT_FACE reply_port; - kern_return_t kr; +// kern_return_t kr; VS_ASYNC_LOCK(); if (vs_async_free_list == NULL) { @@ -1119,8 +1131,7 @@ vs_alloc_async(void) vs_alloc_async_failed++; ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port)); - kfree((vm_offset_t)vsa, - sizeof (struct vs_async)); + kfree(vsa, sizeof (struct vs_async)); vsa = NULL; } } @@ -1172,8 +1183,7 @@ vs_alloc_async(void) vs_alloc_async_failed++; ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port)); - kfree((vm_offset_t) vsa, - sizeof (struct vs_async)); + kfree(vsa, sizeof (struct vs_async)); vsa = NULL; } } @@ -1189,8 +1199,8 @@ vs_free_async( kern_return_t kr; reply_port = vsa->reply_port; - kfree((vm_offset_t) reply_port->alias, sizeof (struct vstuct_alias)); - kfree((vm_offset_t) vsa, sizeof (struct vs_async)); + kfree(reply_port->alias, sizeof (struct vstuct_alias)); + kfree(vsa, sizeof (struct vs_async)); ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port)); #if 0 VS_ASYNC_LOCK(); @@ -1208,7 +1218,7 @@ ps_vstruct_create( vm_size_t size) { vstruct_t vs; - int i; + unsigned int i; vs = (vstruct_t) zalloc(vstruct_zone); if (vs == VSTRUCT_NULL) { @@ -1231,11 +1241,11 @@ ps_vstruct_create( vs->vs_waiting_write = FALSE; vs->vs_waiting_async = FALSE; #else - mutex_init(&vs->vs_waiting_seqno, ETAP_DPAGE_VSSEQNO); - mutex_init(&vs->vs_waiting_read, ETAP_DPAGE_VSREAD); - mutex_init(&vs->vs_waiting_write, ETAP_DPAGE_VSWRITE); - mutex_init(&vs->vs_waiting_refs, ETAP_DPAGE_VSREFS); - mutex_init(&vs->vs_waiting_async, ETAP_DPAGE_VSASYNC); + mutex_init(&vs->vs_waiting_seqno, 0); + mutex_init(&vs->vs_waiting_read, 0); + mutex_init(&vs->vs_waiting_write, 0); + mutex_init(&vs->vs_waiting_refs, 0); + mutex_init(&vs->vs_waiting_async, 0); #endif vs->vs_readers = 0; @@ -1244,7 +1254,7 @@ ps_vstruct_create( vs->vs_errors = 0; vs->vs_clshift = local_log2(bs_get_global_clsize(0)); - vs->vs_size = ((atop(round_page(size)) - 1) >> vs->vs_clshift) + 1; + vs->vs_size = ((atop_32(round_page_32(size)) - 1) >> vs->vs_clshift) + 1; vs->vs_async_pending = 0; /* @@ -1261,14 +1271,14 @@ ps_vstruct_create( vs->vs_indirect = FALSE; } vs->vs_xfer_pending = FALSE; - DEBUG(DEBUG_VS_INTERNAL, - ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect)); + DP_DEBUG(DEBUG_VS_INTERNAL, + ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect)); /* * Check to see that we got the space. */ if (!vs->vs_dmap) { - kfree((vm_offset_t)vs, sizeof *vs); + kfree(vs, sizeof *vs); return VSTRUCT_NULL; } @@ -1289,12 +1299,12 @@ ps_vstruct_create( return vs; } -paging_segment_t ps_select_segment(int, int *); /* forward */ +paging_segment_t ps_select_segment(unsigned int, int *); /* forward */ paging_segment_t ps_select_segment( - int shift, - int *psindex) + unsigned int shift, + int *psindex) { paging_segment_t ps; int i; @@ -1437,7 +1447,7 @@ ps_allocate_cluster( int *psindex, paging_segment_t use_ps) { - int byte_num; + unsigned int byte_num; int bit_num = 0; paging_segment_t ps; vm_offset_t cluster; @@ -1470,6 +1480,9 @@ ps_allocate_cluster( ps = use_ps; PSL_LOCK(); PS_LOCK(ps); + + ASSERT(ps->ps_clcount != 0); + ps->ps_clcount--; dp_pages_free -= 1 << ps->ps_clshift; if(min_pages_trigger_port && @@ -1486,14 +1499,21 @@ ps_allocate_cluster( } else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) == PAGING_SEGMENT_NULL) { -#if 0 - bs_no_paging_space(TRUE); -#endif -#if 0 - if (verbose) -#endif - dprintf(("no space in available paging segments; " - "swapon suggested\n")); + static uint32_t lastnotify = 0; + uint32_t now, nanoseconds_dummy; + + /* + * Emit a notification of the low-paging resource condition + * but don't issue it more than once every five seconds. This + * prevents us from overflowing logs with thousands of + * repetitions of the message. + */ + clock_get_system_nanotime(&now, &nanoseconds_dummy); + if (now > lastnotify + 5) { + dprintf(("no space in available paging segments\n")); + lastnotify = now; + } + /* the count got off maybe, reset to zero */ PSL_LOCK(); dp_pages_free = 0; @@ -1509,7 +1529,6 @@ ps_allocate_cluster( } return (vm_offset_t) -1; } - ASSERT(ps->ps_clcount != 0); /* * Look for an available cluster. At the end of the loop, @@ -1547,7 +1566,6 @@ ps_deallocate_cluster( paging_segment_t ps, vm_offset_t cluster) { - ipc_port_t trigger = IP_NULL; if (cluster >= (vm_offset_t) ps->ps_ncls) panic("ps_deallocate_cluster: Invalid cluster number"); @@ -1561,12 +1579,6 @@ ps_deallocate_cluster( clrbit(ps->ps_bmap, cluster); ++ps->ps_clcount; dp_pages_free += 1 << ps->ps_clshift; - if(max_pages_trigger_port - && (backing_store_release_trigger_disable == 0) - && (dp_pages_free > maximum_pages_free)) { - trigger = max_pages_trigger_port; - max_pages_trigger_port = NULL; - } PSL_UNLOCK(); /* @@ -1587,21 +1599,6 @@ ps_deallocate_cluster( ps_select_array[ps->ps_bs->bs_priority] = 0; PSL_UNLOCK(); - if (trigger != IP_NULL) { - VSL_LOCK(); - if(backing_store_release_trigger_disable != 0) { - assert_wait((event_t) - &backing_store_release_trigger_disable, - THREAD_UNINT); - VSL_UNLOCK(); - thread_block((void (*)(void)) 0); - } else { - VSL_UNLOCK(); - } - default_pager_space_alert(trigger, LO_WAT_ALERT); - ipc_port_release_send(trigger); - } - return; } @@ -1612,7 +1609,7 @@ ps_dealloc_vsmap( struct vs_map *vsmap, vm_size_t size) { - int i; + unsigned int i; for (i = 0; i < size; i++) if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i])) ps_deallocate_cluster(VSM_PS(vsmap[i]), @@ -1623,8 +1620,8 @@ void ps_vstruct_dealloc( vstruct_t vs) { - int i; - spl_t s; + unsigned int i; +// spl_t s; VS_MAP_LOCK(vs); @@ -1641,31 +1638,29 @@ ps_vstruct_dealloc( for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) { if (vs->vs_imap[i] != NULL) { ps_dealloc_vsmap(vs->vs_imap[i], CLMAP_ENTRIES); - kfree((vm_offset_t)vs->vs_imap[i], - CLMAP_THRESHOLD); + kfree(vs->vs_imap[i], CLMAP_THRESHOLD); } } - kfree((vm_offset_t)vs->vs_imap, - INDIRECT_CLMAP_SIZE(vs->vs_size)); + kfree(vs->vs_imap, INDIRECT_CLMAP_SIZE(vs->vs_size)); } else { /* * Direct map. Free used clusters, then memory. */ ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size); - kfree((vm_offset_t)vs->vs_dmap, CLMAP_SIZE(vs->vs_size)); + kfree(vs->vs_dmap, CLMAP_SIZE(vs->vs_size)); } VS_MAP_UNLOCK(vs); bs_commit(- vs->vs_size); - zfree(vstruct_zone, (vm_offset_t)vs); + zfree(vstruct_zone, vs); } -int ps_map_extend(vstruct_t, int); /* forward */ +int ps_map_extend(vstruct_t, unsigned int); /* forward */ int ps_map_extend( vstruct_t vs, - int new_size) + unsigned int new_size) { struct vs_map **new_imap; struct vs_map *new_dmap = NULL; @@ -1727,7 +1722,7 @@ int ps_map_extend( /* Allocate an indirect page */ if ((new_imap[0] = (struct vs_map *) kalloc(CLMAP_THRESHOLD)) == NULL) { - kfree((vm_offset_t)new_imap, new_map_size); + kfree(new_imap, new_map_size); return -1; } new_dmap = new_imap[0]; @@ -1768,7 +1763,7 @@ int ps_map_extend( bs_commit(new_size - vs->vs_size); vs->vs_size = new_size; if (old_map) - kfree((vm_offset_t)old_map, old_map_size); + kfree(old_map, old_map_size); return 0; } @@ -1784,13 +1779,13 @@ ps_clmap( vm_offset_t cluster; /* The cluster of offset. */ vm_offset_t newcl; /* The new cluster allocated. */ vm_offset_t newoff; - int i; + unsigned int i; struct vs_map *vsmap; VS_MAP_LOCK(vs); ASSERT(vs->vs_dmap); - cluster = atop(offset) >> vs->vs_clshift; + cluster = atop_32(offset) >> vs->vs_clshift; /* * Initialize cluster error value @@ -1882,7 +1877,7 @@ ps_clmap( */ newcl = ps_allocate_cluster(vs, &psindex, PAGING_SEGMENT_NULL); - if (newcl == -1) { + if (newcl == (vm_offset_t) -1) { VS_MAP_UNLOCK(vs); return (vm_offset_t) -1; } @@ -1906,14 +1901,14 @@ ps_clmap( * relatively quick. */ ASSERT(trunc_page(offset) == offset); - newcl = ptoa(newcl) << vs->vs_clshift; + newcl = ptoa_32(newcl) << vs->vs_clshift; newoff = offset & ((1<<(vm_page_shift + vs->vs_clshift)) - 1); if (flag == CL_ALLOC) { /* * set bits in the allocation bitmap according to which * pages were requested. size is in bytes. */ - i = atop(newoff); + i = atop_32(newoff); while ((size > 0) && (i < VSCLSIZE(vs))) { VSM_SETALLOC(*vsmap, i); i++; @@ -1926,7 +1921,7 @@ ps_clmap( * Offset is not cluster aligned, so number of pages * and bitmaps must be adjusted */ - clmap->cl_numpages -= atop(newoff); + clmap->cl_numpages -= atop_32(newoff); CLMAP_SHIFT(clmap, vs); CLMAP_SHIFTALLOC(clmap, vs); } @@ -1955,7 +1950,7 @@ ps_clmap( } else { BS_STAT(clmap->cl_ps->ps_bs, clmap->cl_ps->ps_bs->bs_pages_out_fail += - atop(size)); + atop_32(size)); off = VSM_CLOFF(*vsmap); VSM_SETERR(*vsmap, error); } @@ -1970,13 +1965,13 @@ ps_clmap( } else VS_MAP_UNLOCK(vs); - DEBUG(DEBUG_VS_INTERNAL, - ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n", - newcl+newoff, (int) vs, (int) vsmap, flag)); - DEBUG(DEBUG_VS_INTERNAL, - (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n", - (int) clmap->cl_ps, clmap->cl_numpages, - (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map)); + DP_DEBUG(DEBUG_VS_INTERNAL, + ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n", + newcl+newoff, (int) vs, (int) vsmap, flag)); + DP_DEBUG(DEBUG_VS_INTERNAL, + (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n", + (int) clmap->cl_ps, clmap->cl_numpages, + (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map)); return (newcl + newoff); } @@ -2000,9 +1995,9 @@ ps_clunmap( */ while (length > 0) { vm_offset_t newoff; - int i; + unsigned int i; - cluster = atop(offset) >> vs->vs_clshift; + cluster = atop_32(offset) >> vs->vs_clshift; if (vs->vs_indirect) /* indirect map */ vsmap = vs->vs_imap[cluster/CLMAP_ENTRIES]; else @@ -2022,12 +2017,12 @@ ps_clunmap( * paging segment cluster pages. * Optimize for entire cluster cleraing. */ - if (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) { + if ( (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) ) { /* * Not cluster aligned. */ ASSERT(trunc_page(newoff) == newoff); - i = atop(newoff); + i = atop_32(newoff); } else i = 0; while ((i < VSCLSIZE(vs)) && (length > 0)) { @@ -2079,15 +2074,15 @@ void vs_cl_write_complete(vstruct_t, paging_segment_t, vm_offset_t, vm_offset_t, void vs_cl_write_complete( - vstruct_t vs, - paging_segment_t ps, - vm_offset_t offset, - vm_offset_t addr, - vm_size_t size, - boolean_t async, - int error) + vstruct_t vs, + __unused paging_segment_t ps, + vm_offset_t offset, + __unused vm_offset_t addr, + vm_size_t size, + boolean_t async, + int error) { - kern_return_t kr; +// kern_return_t kr; if (error) { /* @@ -2098,7 +2093,7 @@ vs_cl_write_complete( dprintf(("write failed error = 0x%x\n", error)); /* add upl_abort code here */ } else - GSTAT(global_stats.gs_pages_out += atop(size)); + GSTAT(global_stats.gs_pages_out += atop_32(size)); /* * Notify the vstruct mapping code, so it can do its accounting. */ @@ -2146,11 +2141,7 @@ device_write_reply( if(vsa->vsa_error) { /* need to consider error condition. re-write data or */ /* throw it away here. */ - vm_offset_t ioaddr; - if(vm_map_copyout(kernel_map, &ioaddr, - (vm_map_copy_t)vsa->vsa_addr) != KERN_SUCCESS) - panic("vs_cluster_write: unable to copy source list\n"); - vm_deallocate(kernel_map, ioaddr, vsa->vsa_size); + vm_map_copy_discard((vm_map_copy_t)vsa->vsa_addr); } ps_vs_write_complete(vsa->vsa_vs, vsa->vsa_offset, vsa->vsa_size, vsa->vsa_error); @@ -2227,8 +2218,6 @@ device_open_reply( return KERN_SUCCESS; } -kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */ - kern_return_t ps_read_device( paging_segment_t ps, @@ -2254,7 +2243,7 @@ ps_read_device( default_pager_thread_t *dpt = NULL; device = dev_port_lookup(ps->ps_device); - clustered_reads[atop(size)]++; + clustered_reads[atop_32(size)]++; dev_offset = (ps->ps_offset + (offset >> (vm_page_shift - ps->ps_record_shift))); @@ -2271,7 +2260,7 @@ ps_read_device( vsa->vsa_size = 0; vsa->vsa_ps = NULL; } - mutex_init(&vsa->vsa_lock, ETAP_DPAGE_VSSEQNO); + mutex_init(&vsa->vsa_lock, 0); ip_lock(vsa->reply_port); vsa->reply_port->ip_sorights++; ip_reference(vsa->reply_port); @@ -2288,7 +2277,7 @@ ps_read_device( (mach_msg_type_number_t *) &bytes_read); if(kr == MIG_NO_REPLY) { assert_wait(&vsa->vsa_lock, THREAD_UNINT); - thread_block((void(*)(void))0); + thread_block(THREAD_CONTINUE_NULL); dev_buffer = vsa->vsa_addr; bytes_read = (unsigned int)vsa->vsa_size; @@ -2327,9 +2316,9 @@ ps_read_device( records_read = (bytes_read >> (vm_page_shift - ps->ps_record_shift)); dev_offset += records_read; - DEBUG(DEBUG_VS_INTERNAL, - ("calling vm_deallocate(addr=0x%X,size=0x%X)\n", - dev_buffer, bytes_read)); + DP_DEBUG(DEBUG_VS_INTERNAL, + ("calling vm_deallocate(addr=0x%X,size=0x%X)\n", + dev_buffer, bytes_read)); if (vm_deallocate(kernel_map, dev_buffer, bytes_read) != KERN_SUCCESS) Panic("dealloc buf"); @@ -2338,7 +2327,7 @@ ps_read_device( *residualp = size - total_read; if((dev_buffer != *bufferp) && (total_read != 0)) { vm_offset_t temp_buffer; - vm_allocate(kernel_map, &temp_buffer, total_read, TRUE); + vm_allocate(kernel_map, &temp_buffer, total_read, VM_FLAGS_ANYWHERE); memcpy((void *) temp_buffer, (void *) *bufferp, total_read); if(vm_map_copyin_page_list(kernel_map, temp_buffer, total_read, VM_MAP_COPYIN_OPT_SRC_DESTROY | @@ -2368,8 +2357,6 @@ ps_read_device( return KERN_SUCCESS; } -kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */ - kern_return_t ps_write_device( paging_segment_t ps, @@ -2386,7 +2373,7 @@ ps_write_device( - clustered_writes[atop(size)]++; + clustered_writes[atop_32(size)]++; dev_offset = (ps->ps_offset + (offset >> (vm_page_shift - ps->ps_record_shift))); @@ -2423,7 +2410,7 @@ ps_write_device( "device_write_request returned ", kr, addr, size, offset)); BS_STAT(ps->ps_bs, - ps->ps_bs->bs_pages_out_fail += atop(size)); + ps->ps_bs->bs_pages_out_fail += atop_32(size)); /* do the completion notification to free resources */ device_write_reply(reply_port, kr, 0); return PAGER_ERROR; @@ -2449,7 +2436,7 @@ ps_write_device( "device_write returned ", kr, addr, size, offset)); BS_STAT(ps->ps_bs, - ps->ps_bs->bs_pages_out_fail += atop(size)); + ps->ps_bs->bs_pages_out_fail += atop_32(size)); return PAGER_ERROR; } if (bytes_written & ((vm_page_size >> ps->ps_record_shift) - 1)) @@ -2475,43 +2462,44 @@ ps_write_device( kern_return_t ps_read_device( - paging_segment_t ps, - vm_offset_t offset, - vm_offset_t *bufferp, - unsigned int size, - unsigned int *residualp, - int flags) + __unused paging_segment_t ps, + __unused vm_offset_t offset, + __unused vm_offset_t *bufferp, + __unused unsigned int size, + __unused unsigned int *residualp, + __unused int flags) { panic("ps_read_device not supported"); } +kern_return_t ps_write_device( - paging_segment_t ps, - vm_offset_t offset, - vm_offset_t addr, - unsigned int size, - struct vs_async *vsa) + __unused paging_segment_t ps, + __unused vm_offset_t offset, + __unused vm_offset_t addr, + __unused unsigned int size, + __unused struct vs_async *vsa) { panic("ps_write_device not supported"); } #endif /* DEVICE_PAGING */ -void pvs_object_data_provided(vstruct_t, upl_t, vm_offset_t, vm_size_t); /* forward */ +void pvs_object_data_provided(vstruct_t, upl_t, upl_offset_t, upl_size_t); /* forward */ void pvs_object_data_provided( - vstruct_t vs, - upl_t upl, - vm_offset_t offset, - vm_size_t size) + __unused vstruct_t vs, + __unused upl_t upl, + __unused upl_offset_t offset, + upl_size_t size) { - DEBUG(DEBUG_VS_INTERNAL, - ("buffer=0x%x,offset=0x%x,size=0x%x\n", - upl, offset, size)); + DP_DEBUG(DEBUG_VS_INTERNAL, + ("buffer=0x%x,offset=0x%x,size=0x%x\n", + upl, offset, size)); ASSERT(size > 0); - GSTAT(global_stats.gs_pages_in += atop(size)); + GSTAT(global_stats.gs_pages_in += atop_32(size)); #if USE_PRECIOUS @@ -2528,15 +2516,15 @@ pvs_cluster_read( { upl_t upl; kern_return_t error = KERN_SUCCESS; - int size; - unsigned int residual; + int size; + int residual; unsigned int request_flags; - int seg_index; - int pages_in_cl; + int seg_index; + int pages_in_cl; int cl_size; int cl_mask; - int cl_index; - int xfer_size; + int cl_index; + int xfer_size; vm_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT]; paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT]; struct clmap clmap; @@ -2561,18 +2549,35 @@ pvs_cluster_read( */ #if USE_PRECIOUS - request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS; + request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS | UPL_RET_ONLY_ABSENT; #else - request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE ; + request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_RET_ONLY_ABSENT; #endif + + assert(dp_encryption_inited); + if (dp_encryption) { + /* + * ENCRYPTED SWAP: + * request that the UPL be prepared for + * decryption. + */ + request_flags |= UPL_ENCRYPT; + } + while (cnt && (error == KERN_SUCCESS)) { int ps_info_valid; int page_list_count; - if (cnt > VM_SUPER_CLUSTER) + if((vs_offset & cl_mask) && + (cnt > (VM_SUPER_CLUSTER - + (vs_offset & cl_mask)))) { + size = VM_SUPER_CLUSTER; + size -= vs_offset & cl_mask; + } else if (cnt > VM_SUPER_CLUSTER) { size = VM_SUPER_CLUSTER; - else + } else { size = cnt; + } cnt -= size; ps_info_valid = 0; @@ -2634,7 +2639,7 @@ pvs_cluster_read( /* * Let VM system know about holes in clusters. */ - GSTAT(global_stats.gs_pages_unavail += atop(abort_size)); + GSTAT(global_stats.gs_pages_unavail += atop_32(abort_size)); page_list_count = 0; memory_object_super_upl_request( @@ -2677,9 +2682,11 @@ pvs_cluster_read( */ for (xfer_size = 0; xfer_size < size; ) { - while (cl_index < pages_in_cl && xfer_size < size) { + while (cl_index < pages_in_cl + && xfer_size < size) { /* - * accumulate allocated pages within a physical segment + * accumulate allocated pages within + * a physical segment */ if (CLMAP_ISSET(clmap, cl_index)) { xfer_size += vm_page_size; @@ -2691,35 +2698,43 @@ pvs_cluster_read( } else break; } - if (cl_index < pages_in_cl || xfer_size >= size) { + if (cl_index < pages_in_cl + || xfer_size >= size) { /* - * we've hit an unallocated page or the - * end of this request... go fire the I/O + * we've hit an unallocated page or + * the end of this request... go fire + * the I/O */ break; } /* - * we've hit the end of the current physical segment - * and there's more to do, so try moving to the next one + * we've hit the end of the current physical + * segment and there's more to do, so try + * moving to the next one */ seg_index++; - ps_offset[seg_index] = ps_clmap(vs, cur_offset & ~cl_mask, &clmap, CL_FIND, 0, 0); - psp[seg_index] = CLMAP_PS(clmap); + ps_offset[seg_index] = + ps_clmap(vs, + cur_offset & ~cl_mask, + &clmap, CL_FIND, 0, 0); + psp[seg_index] = CLMAP_PS(clmap); ps_info_valid = 1; if ((ps_offset[seg_index - 1] != (ps_offset[seg_index] - cl_size)) || (psp[seg_index - 1] != psp[seg_index])) { /* - * if the physical segment we're about to step into - * is not contiguous to the one we're currently - * in, or it's in a different paging file, or + * if the physical segment we're about + * to step into is not contiguous to + * the one we're currently in, or it's + * in a different paging file, or * it hasn't been allocated.... * we stop here and generate the I/O */ break; } /* - * start with first page of the next physical segment + * start with first page of the next physical + * segment */ cl_index = 0; } @@ -2730,68 +2745,78 @@ pvs_cluster_read( */ page_list_count = 0; memory_object_super_upl_request(vs->vs_control, - (memory_object_offset_t)vs_offset, - xfer_size, xfer_size, - &upl, NULL, &page_list_count, - request_flags | UPL_SET_INTERNAL); + (memory_object_offset_t)vs_offset, + xfer_size, xfer_size, + &upl, NULL, &page_list_count, + request_flags | UPL_SET_INTERNAL); - error = ps_read_file(psp[beg_pseg], upl, (vm_offset_t) 0, - ps_offset[beg_pseg] + (beg_indx * vm_page_size), xfer_size, &residual, 0); + error = ps_read_file(psp[beg_pseg], + upl, (upl_offset_t) 0, + ps_offset[beg_pseg] + + (beg_indx * vm_page_size), + xfer_size, &residual, 0); } else continue; failed_size = 0; /* - * Adjust counts and send response to VM. Optimize for the - * common case, i.e. no error and/or partial data. - * If there was an error, then we need to error the entire - * range, even if some data was successfully read. - * If there was a partial read we may supply some + * Adjust counts and send response to VM. Optimize + * for the common case, i.e. no error and/or partial + * data. If there was an error, then we need to error + * the entire range, even if some data was successfully + * read. If there was a partial read we may supply some * data and may error some as well. In all cases the - * VM must receive some notification for every page in the - * range. + * VM must receive some notification for every page + * in the range. */ if ((error == KERN_SUCCESS) && (residual == 0)) { /* - * Got everything we asked for, supply the data to - * the VM. Note that as a side effect of supplying - * the data, the buffer holding the supplied data is - * deallocated from the pager's address space. + * Got everything we asked for, supply the data + * to the VM. Note that as a side effect of + * supplying the data, the buffer holding the + * supplied data is deallocated from the pager's + * address space. */ - pvs_object_data_provided(vs, upl, vs_offset, xfer_size); + pvs_object_data_provided( + vs, upl, vs_offset, xfer_size); } else { failed_size = xfer_size; if (error == KERN_SUCCESS) { if (residual == xfer_size) { - /* - * If a read operation returns no error - * and no data moved, we turn it into - * an error, assuming we're reading at - * or beyong EOF. - * Fall through and error the entire - * range. - */ + /* + * If a read operation returns no error + * and no data moved, we turn it into + * an error, assuming we're reading at + * or beyong EOF. + * Fall through and error the entire + * range. + */ error = KERN_FAILURE; } else { - /* - * Otherwise, we have partial read. If - * the part read is a integral number - * of pages supply it. Otherwise round - * it up to a page boundary, zero fill - * the unread part, and supply it. - * Fall through and error the remainder - * of the range, if any. - */ + /* + * Otherwise, we have partial read. If + * the part read is a integral number + * of pages supply it. Otherwise round + * it up to a page boundary, zero fill + * the unread part, and supply it. + * Fall through and error the remainder + * of the range, if any. + */ int fill, lsize; - fill = residual & ~vm_page_size; - lsize = (xfer_size - residual) + fill; - pvs_object_data_provided(vs, upl, vs_offset, lsize); + fill = residual + & ~vm_page_size; + lsize = (xfer_size - residual) + + fill; + pvs_object_data_provided( + vs, upl, + vs_offset, lsize); if (lsize < xfer_size) { - failed_size = xfer_size - lsize; + failed_size = + xfer_size - lsize; error = KERN_FAILURE; } } @@ -2799,12 +2824,13 @@ pvs_cluster_read( } /* * If there was an error in any part of the range, tell - * the VM. Note that error is explicitly checked again since - * it can be modified above. + * the VM. Note that error is explicitly checked again + * since it can be modified above. */ if (error != KERN_SUCCESS) { BS_STAT(psp[beg_pseg]->ps_bs, - psp[beg_pseg]->ps_bs->bs_pages_in_fail += atop(failed_size)); + psp[beg_pseg]->ps_bs->bs_pages_in_fail + += atop_32(failed_size)); } size -= xfer_size; vs_offset += xfer_size; @@ -2820,50 +2846,45 @@ kern_return_t vs_cluster_write( vstruct_t vs, upl_t internal_upl, - vm_offset_t offset, - vm_size_t cnt, + upl_offset_t offset, + upl_size_t cnt, boolean_t dp_internal, int flags) { - vm_offset_t size; - vm_offset_t transfer_size; + upl_size_t transfer_size; int error = 0; struct clmap clmap; vm_offset_t actual_offset; /* Offset within paging segment */ paging_segment_t ps; - vm_offset_t subx_size; vm_offset_t mobj_base_addr; vm_offset_t mobj_target_addr; - int mobj_size; - - struct vs_async *vsa; - vm_map_copy_t copy; upl_t upl; upl_page_info_t *pl; int page_index; int list_size; - int cl_size; + int pages_in_cl; + unsigned int cl_size; + int base_index; + unsigned int seg_size; + + pages_in_cl = 1 << vs->vs_clshift; + cl_size = pages_in_cl * vm_page_size; if (!dp_internal) { int page_list_count; int request_flags; - int super_size; + unsigned int super_size; int first_dirty; int num_dirty; int num_of_pages; int seg_index; - int pages_in_cl; - int must_abort; - vm_offset_t upl_offset; + upl_offset_t upl_offset; vm_offset_t seg_offset; - vm_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT]; - paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT]; - + vm_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1]; + paging_segment_t psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1]; - pages_in_cl = 1 << vs->vs_clshift; - cl_size = pages_in_cl * vm_page_size; if (bs_low) { super_size = cl_size; @@ -2879,21 +2900,45 @@ vs_cluster_write( UPL_NO_SYNC | UPL_SET_INTERNAL; } + if (!dp_encryption_inited) { + /* + * ENCRYPTED SWAP: + * Once we've started using swap, we + * can't change our mind on whether + * it needs to be encrypted or + * not. + */ + dp_encryption_inited = TRUE; + } + if (dp_encryption) { + /* + * ENCRYPTED SWAP: + * request that the UPL be prepared for + * encryption. + */ + request_flags |= UPL_ENCRYPT; + flags |= UPL_PAGING_ENCRYPTED; + } + page_list_count = 0; memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)offset, cnt, super_size, &upl, NULL, &page_list_count, - request_flags | UPL_PAGEOUT); + request_flags | UPL_FOR_PAGEOUT); pl = UPL_GET_INTERNAL_PAGE_LIST(upl); - for (seg_index = 0, transfer_size = upl->size; transfer_size > 0; ) { + seg_size = cl_size - (upl->offset % cl_size); + upl_offset = upl->offset & ~(cl_size - 1); - ps_offset[seg_index] = ps_clmap(vs, upl->offset + (seg_index * cl_size), - &clmap, CL_ALLOC, - transfer_size < cl_size ? - transfer_size : cl_size, 0); + for (seg_index = 0, transfer_size = upl->size; + transfer_size > 0; ) { + ps_offset[seg_index] = + ps_clmap(vs, + upl_offset, + &clmap, CL_ALLOC, + cl_size, 0); if (ps_offset[seg_index] == (vm_offset_t) -1) { upl_abort(upl, 0); @@ -2904,27 +2949,42 @@ vs_cluster_write( } psp[seg_index] = CLMAP_PS(clmap); - if (transfer_size > cl_size) { - transfer_size -= cl_size; + if (transfer_size > seg_size) { + transfer_size -= seg_size; + upl_offset += cl_size; + seg_size = cl_size; seg_index++; } else transfer_size = 0; } - for (page_index = 0, num_of_pages = upl->size / vm_page_size; page_index < num_of_pages; ) { + /* + * Ignore any non-present pages at the end of the + * UPL. + */ + for (page_index = upl->size / vm_page_size; page_index > 0;) + if (UPL_PAGE_PRESENT(pl, --page_index)) + break; + num_of_pages = page_index + 1; + + base_index = (upl->offset % cl_size) / PAGE_SIZE; + + for (page_index = 0; page_index < num_of_pages; ) { /* * skip over non-dirty pages */ for ( ; page_index < num_of_pages; page_index++) { - if (UPL_DIRTY_PAGE(pl, page_index) || UPL_PRECIOUS_PAGE(pl, page_index)) + if (UPL_DIRTY_PAGE(pl, page_index) + || UPL_PRECIOUS_PAGE(pl, page_index)) /* * this is a page we need to write - * go see if we can buddy it up with others - * that are contiguous to it + * go see if we can buddy it up with + * others that are contiguous to it */ break; /* - * if the page is not-dirty, but present we need to commit it... - * this is an unusual case since we only asked for dirty pages + * if the page is not-dirty, but present we + * need to commit it... This is an unusual + * case since we only asked for dirty pages */ if (UPL_PAGE_PRESENT(pl, page_index)) { boolean_t empty = FALSE; @@ -2933,10 +2993,13 @@ vs_cluster_write( vm_page_size, UPL_COMMIT_NOTIFY_EMPTY, pl, - MAX_UPL_TRANSFER, + page_list_count, &empty); - if (empty) + if (empty) { + assert(page_index == + num_of_pages - 1); upl_deallocate(upl); + } } } if (page_index == num_of_pages) @@ -2946,14 +3009,16 @@ vs_cluster_write( break; /* - * gather up contiguous dirty pages... we have at least 1 - * otherwise we would have bailed above + * gather up contiguous dirty pages... we have at + * least 1 * otherwise we would have bailed above * make sure that each physical segment that we step * into is contiguous to the one we're currently in * if it's not, we have to stop and write what we have */ - for (first_dirty = page_index; page_index < num_of_pages; ) { - if ( !UPL_DIRTY_PAGE(pl, page_index) && !UPL_PRECIOUS_PAGE(pl, page_index)) + for (first_dirty = page_index; + page_index < num_of_pages; ) { + if ( !UPL_DIRTY_PAGE(pl, page_index) + && !UPL_PRECIOUS_PAGE(pl, page_index)) break; page_index++; /* @@ -2965,57 +3030,66 @@ vs_cluster_write( int cur_seg; int nxt_seg; - cur_seg = (page_index - 1) / pages_in_cl; - nxt_seg = page_index / pages_in_cl; + cur_seg = (base_index + (page_index - 1))/pages_in_cl; + nxt_seg = (base_index + page_index)/pages_in_cl; if (cur_seg != nxt_seg) { if ((ps_offset[cur_seg] != (ps_offset[nxt_seg] - cl_size)) || (psp[cur_seg] != psp[nxt_seg])) - /* - * if the segment we're about to step into - * is not contiguous to the one we're currently - * in, or it's in a different paging file.... - * we stop here and generate the I/O - */ + /* + * if the segment we're about + * to step into is not + * contiguous to the one we're + * currently in, or it's in a + * different paging file.... + * we stop here and generate + * the I/O + */ break; } } } num_dirty = page_index - first_dirty; - must_abort = 1; if (num_dirty) { upl_offset = first_dirty * vm_page_size; - seg_index = first_dirty / pages_in_cl; - seg_offset = upl_offset - (seg_index * cl_size); transfer_size = num_dirty * vm_page_size; - error = ps_write_file(psp[seg_index], upl, upl_offset, - ps_offset[seg_index] + seg_offset, transfer_size, flags); + while (transfer_size) { - if (error == 0) { - while (transfer_size) { - int seg_size; + if ((seg_size = cl_size - + ((upl->offset + upl_offset) % cl_size)) + > transfer_size) + seg_size = transfer_size; - if ((seg_size = cl_size - (upl_offset % cl_size)) > transfer_size) - seg_size = transfer_size; + ps_vs_write_complete(vs, + upl->offset + upl_offset, + seg_size, error); - ps_vs_write_complete(vs, upl->offset + upl_offset, seg_size, error); - - transfer_size -= seg_size; - upl_offset += seg_size; - } - must_abort = 0; + transfer_size -= seg_size; + upl_offset += seg_size; } - } - if (must_abort) { + upl_offset = first_dirty * vm_page_size; + transfer_size = num_dirty * vm_page_size; + + seg_index = (base_index + first_dirty) / pages_in_cl; + seg_offset = (upl->offset + upl_offset) % cl_size; + + error = ps_write_file(psp[seg_index], + upl, upl_offset, + ps_offset[seg_index] + + seg_offset, + transfer_size, flags); + } else { boolean_t empty = FALSE; upl_abort_range(upl, first_dirty * vm_page_size, num_dirty * vm_page_size, UPL_ABORT_NOTIFY_EMPTY, &empty); - if (empty) + if (empty) { + assert(page_index == num_of_pages); upl_deallocate(upl); + } } } @@ -3047,13 +3121,13 @@ vs_cluster_write( /* Assume that the caller has given us contiguous */ /* pages */ if(cnt) { + ps_vs_write_complete(vs, mobj_target_addr, + cnt, error); error = ps_write_file(ps, internal_upl, 0, actual_offset, cnt, flags); if (error) break; - ps_vs_write_complete(vs, mobj_target_addr, - cnt, error); } if (error) break; @@ -3078,7 +3152,7 @@ ps_vstruct_allocated_size( { int num_pages; struct vs_map *vsmap; - int i, j, k; + unsigned int i, j, k; num_pages = 0; if (vs->vs_indirect) { @@ -3116,7 +3190,7 @@ ps_vstruct_allocated_size( } } - return ptoa(num_pages); + return ptoa_32(num_pages); } size_t @@ -3125,10 +3199,10 @@ ps_vstruct_allocated_pages( default_pager_page_t *pages, size_t pages_size) { - int num_pages; + unsigned int num_pages; struct vs_map *vsmap; vm_offset_t offset; - int i, j, k; + unsigned int i, j, k; num_pages = 0; offset = 0; @@ -3191,16 +3265,12 @@ kern_return_t ps_vstruct_transfer_from_segment( vstruct_t vs, paging_segment_t segment, -#ifndef ubc_sync_working - vm_object_t transfer_object) -#else upl_t upl) -#endif { struct vs_map *vsmap; - struct vs_map old_vsmap; - struct vs_map new_vsmap; - int i, j, k; +// struct vs_map old_vsmap; +// struct vs_map new_vsmap; + unsigned int i, j; VS_LOCK(vs); /* block all work on this vstruct */ /* can't allow the normal multiple write */ @@ -3223,8 +3293,8 @@ ps_vstruct_transfer_from_segment( VS_UNLOCK(vs); vs_changed: if (vs->vs_indirect) { - int vsmap_size; - int clmap_off; + unsigned int vsmap_size; + int clmap_off; /* loop on indirect maps */ for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) { vsmap = vs->vs_imap[i]; @@ -3246,11 +3316,7 @@ vs_changed: (vm_page_size * (j << vs->vs_clshift)) + clmap_off, vm_page_size << vs->vs_clshift, -#ifndef ubc_sync_working - transfer_object) -#else upl) -#endif != KERN_SUCCESS) { VS_LOCK(vs); vs->vs_xfer_pending = FALSE; @@ -3292,11 +3358,7 @@ vs_changed: if(vs_cluster_transfer(vs, vm_page_size * (j << vs->vs_clshift), vm_page_size << vs->vs_clshift, -#ifndef ubc_sync_working - transfer_object) != KERN_SUCCESS) { -#else upl) != KERN_SUCCESS) { -#endif VS_LOCK(vs); vs->vs_xfer_pending = FALSE; VS_UNLOCK(vs); @@ -3337,7 +3399,7 @@ vs_get_map_entry( struct vs_map *vsmap; vm_offset_t cluster; - cluster = atop(offset) >> vs->vs_clshift; + cluster = atop_32(offset) >> vs->vs_clshift; if (vs->vs_indirect) { long ind_block = cluster/CLMAP_ENTRIES; @@ -3356,31 +3418,24 @@ vs_cluster_transfer( vstruct_t vs, vm_offset_t offset, vm_size_t cnt, -#ifndef ubc_sync_working - vm_object_t transfer_object) -#else upl_t upl) -#endif { vm_offset_t actual_offset; paging_segment_t ps; struct clmap clmap; kern_return_t error = KERN_SUCCESS; - int size, size_wanted, i; + unsigned int size, size_wanted; + int i; unsigned int residual; - int unavail_size; - default_pager_thread_t *dpt; - boolean_t dealloc; - struct vs_map *vsmap_ptr; + unsigned int unavail_size; +// default_pager_thread_t *dpt; +// boolean_t dealloc; + struct vs_map *vsmap_ptr = NULL; struct vs_map read_vsmap; struct vs_map original_read_vsmap; struct vs_map write_vsmap; - upl_t sync_upl; -#ifndef ubc_sync_working - upl_t upl; -#endif - - vm_offset_t ioaddr; +// upl_t sync_upl; +// vm_offset_t ioaddr; /* vs_cluster_transfer reads in the pages of a cluster and * then writes these pages back to new backing store. The @@ -3483,30 +3538,15 @@ vs_cluster_transfer( if(ps->ps_segtype == PS_PARTITION) { /* - NEED TO BE WITH SYNC & NO COMMIT + NEED TO ISSUE WITH SYNC & NO COMMIT error = ps_read_device(ps, actual_offset, &buffer, size, &residual, flags); */ } else { -#ifndef ubc_sync_working - int page_list_count = 0; - - error = vm_object_upl_request(transfer_object, -(vm_object_offset_t) (actual_offset & ((vm_page_size << vs->vs_clshift) - 1)), - size, &upl, NULL, &page_list_count, - UPL_NO_SYNC | UPL_CLEAN_IN_PLACE - | UPL_SET_INTERNAL); - if (error == KERN_SUCCESS) { - error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset, - size, &residual, 0); - } - -#else - /* NEED TO BE WITH SYNC & NO COMMIT & NO RDAHEAD*/ - error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset, + /* NEED TO ISSUE WITH SYNC & NO COMMIT */ + error = ps_read_file(ps, upl, (upl_offset_t) 0, actual_offset, size, &residual, - (UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD)); -#endif + (UPL_IOSYNC | UPL_NOCOMMIT)); } read_vsmap = *vsmap_ptr; @@ -3520,7 +3560,6 @@ vs_cluster_transfer( * */ if ((error == KERN_SUCCESS) && (residual == 0)) { - int page_list_count = 0; /* * Got everything we asked for, supply the data to @@ -3535,20 +3574,8 @@ vs_cluster_transfer( /* the vm_map_copy_page_discard call */ *vsmap_ptr = write_vsmap; -#ifndef ubc_sync_working - error = vm_object_upl_request(transfer_object, - (vm_object_offset_t) - (actual_offset & ((vm_page_size << vs->vs_clshift) - 1)), - size, &upl, NULL, &page_list_count, - UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL); - if(vs_cluster_write(vs, upl, offset, - size, TRUE, 0) != KERN_SUCCESS) { - upl_commit(upl, NULL); - upl_deallocate(upl); -#else if(vs_cluster_write(vs, upl, offset, size, TRUE, UPL_IOSYNC | UPL_NOCOMMIT ) != KERN_SUCCESS) { -#endif error = KERN_FAILURE; if(!(VSM_ISCLR(*vsmap_ptr))) { /* unmap the new backing store object */ @@ -3627,14 +3654,16 @@ vs_cluster_transfer( } kern_return_t -default_pager_add_file(MACH_PORT_FACE backing_store, - int *vp, +default_pager_add_file( + MACH_PORT_FACE backing_store, + vnode_ptr_t vp, int record_size, - long size) + vm_size_t size) { backing_store_t bs; paging_segment_t ps; int i; + unsigned int j; int error; if ((bs = backing_store_lookup(backing_store)) @@ -3684,20 +3713,20 @@ default_pager_add_file(MACH_PORT_FACE backing_store, PS_LOCK_INIT(ps); ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls)); if (!ps->ps_bmap) { - kfree((vm_offset_t)ps, sizeof *ps); + kfree(ps, sizeof *ps); BS_UNLOCK(bs); return KERN_RESOURCE_SHORTAGE; } - for (i = 0; i < ps->ps_ncls; i++) { - clrbit(ps->ps_bmap, i); + for (j = 0; j < ps->ps_ncls; j++) { + clrbit(ps->ps_bmap, j); } ps->ps_going_away = FALSE; ps->ps_bs = bs; if ((error = ps_enter(ps)) != 0) { - kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); - kfree((vm_offset_t)ps, sizeof *ps); + kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); + kfree(ps, sizeof *ps); BS_UNLOCK(bs); return KERN_RESOURCE_SHORTAGE; } @@ -3712,10 +3741,10 @@ default_pager_add_file(MACH_PORT_FACE backing_store, bs_more_space(ps->ps_clcount); - DEBUG(DEBUG_BS_INTERNAL, - ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n", - device, offset, size, record_size, - ps->ps_record_shift, ps->ps_pgnum)); + DP_DEBUG(DEBUG_BS_INTERNAL, + ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n", + device, offset, size, record_size, + ps->ps_record_shift, ps->ps_pgnum)); return KERN_SUCCESS; } @@ -3726,9 +3755,9 @@ kern_return_t ps_read_file( paging_segment_t ps, upl_t upl, - vm_offset_t upl_offset, + upl_offset_t upl_offset, vm_offset_t offset, - unsigned int size, + upl_size_t size, unsigned int *residualp, int flags) { @@ -3736,8 +3765,9 @@ ps_read_file( int error = 0; int result; + assert(dp_encryption_inited); - clustered_reads[atop(size)]++; + clustered_reads[atop_32(size)]++; f_offset = (vm_object_offset_t)(ps->ps_offset + offset); @@ -3765,7 +3795,7 @@ kern_return_t ps_write_file( paging_segment_t ps, upl_t upl, - vm_offset_t upl_offset, + upl_offset_t upl_offset, vm_offset_t offset, unsigned int size, int flags) @@ -3773,11 +3803,20 @@ ps_write_file( vm_object_offset_t f_offset; kern_return_t result; - int error = 0; + assert(dp_encryption_inited); - clustered_writes[atop(size)]++; + clustered_writes[atop_32(size)]++; f_offset = (vm_object_offset_t)(ps->ps_offset + offset); + if (flags & UPL_PAGING_ENCRYPTED) { + /* + * ENCRYPTED SWAP: + * encrypt all the pages that we're going + * to pageout. + */ + upl_encrypt(upl, upl_offset, size); + } + if (vnode_pageout(ps->ps_vnode, upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL)) result = KERN_FAILURE; @@ -3788,7 +3827,7 @@ ps_write_file( } kern_return_t -default_pager_triggers(MACH_PORT_FACE default_pager, +default_pager_triggers( __unused MACH_PORT_FACE default_pager, int hi_wat, int lo_wat, int flags, @@ -3798,7 +3837,27 @@ default_pager_triggers(MACH_PORT_FACE default_pager, kern_return_t kr; PSL_LOCK(); - if (flags == HI_WAT_ALERT) { + if (flags == SWAP_ENCRYPT_ON) { + /* ENCRYPTED SWAP: turn encryption on */ + release = trigger_port; + if (!dp_encryption_inited) { + dp_encryption_inited = TRUE; + dp_encryption = TRUE; + kr = KERN_SUCCESS; + } else { + kr = KERN_FAILURE; + } + } else if (flags == SWAP_ENCRYPT_OFF) { + /* ENCRYPTED SWAP: turn encryption off */ + release = trigger_port; + if (!dp_encryption_inited) { + dp_encryption_inited = TRUE; + dp_encryption = FALSE; + kr = KERN_SUCCESS; + } else { + kr = KERN_FAILURE; + } + } else if (flags == HI_WAT_ALERT) { release = min_pages_trigger_port; min_pages_trigger_port = trigger_port; minimum_pages_remaining = hi_wat/vm_page_size; @@ -3820,3 +3879,75 @@ default_pager_triggers(MACH_PORT_FACE default_pager, return kr; } + +/* + * Monitor the amount of available backing store vs. the amount of + * required backing store, notify a listener (if present) when + * backing store may safely be removed. + * + * We attempt to avoid the situation where backing store is + * discarded en masse, as this can lead to thrashing as the + * backing store is compacted. + */ + +#define PF_INTERVAL 3 /* time between free level checks */ +#define PF_LATENCY 10 /* number of intervals before release */ + +static int dp_pages_free_low_count = 0; +thread_call_t default_pager_backing_store_monitor_callout; + +void +default_pager_backing_store_monitor(__unused thread_call_param_t p1, + __unused thread_call_param_t p2) +{ +// unsigned long long average; + ipc_port_t trigger; + uint64_t deadline; + + /* + * We determine whether it will be safe to release some + * backing store by watching the free page level. If + * it remains below the maximum_pages_free threshold for + * at least PF_LATENCY checks (taken at PF_INTERVAL seconds) + * then we deem it safe. + * + * Note that this establishes a maximum rate at which backing + * store will be released, as each notification (currently) + * only results in a single backing store object being + * released. + */ + if (dp_pages_free > maximum_pages_free) { + dp_pages_free_low_count++; + } else { + dp_pages_free_low_count = 0; + } + + /* decide whether to send notification */ + trigger = IP_NULL; + if (max_pages_trigger_port && + (backing_store_release_trigger_disable == 0) && + (dp_pages_free_low_count > PF_LATENCY)) { + trigger = max_pages_trigger_port; + max_pages_trigger_port = NULL; + } + + /* send notification */ + if (trigger != IP_NULL) { + VSL_LOCK(); + if(backing_store_release_trigger_disable != 0) { + assert_wait((event_t) + &backing_store_release_trigger_disable, + THREAD_UNINT); + VSL_UNLOCK(); + thread_block(THREAD_CONTINUE_NULL); + } else { + VSL_UNLOCK(); + } + default_pager_space_alert(trigger, LO_WAT_ALERT); + ipc_port_release_send(trigger); + dp_pages_free_low_count = 0; + } + + clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline); + thread_call_enter_delayed(default_pager_backing_store_monitor_callout, deadline); +}